Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,24 @@ Bloom is slower here. The index fits in cache so a plain map lookup is cheaper t

Bloom wins on misses. Index maps are too big for cache, bloom's bitset isn't.

**Without compaction (maxSize=10k, ~50 small SSTables)**

| | Writes | Read hits | Read misses |
|-|--------|-----------|-------------|
| Bloom on | 4.25s | 125ms | 52ms |
| Bloom off | 4.44s | 114ms | 34ms |

Reads walk 50 SSTables. Each index fits in CPU cache so map lookups are fast and bloom adds more overhead than it saves.

**With compaction (maxSize=100k, ~5 large SSTables)**

| | Writes | Read hits | Read misses |
|-|--------|-----------|-------------|
| Bloom on | 15.4s | 62ms | 8.3ms |
| Bloom off | 15.7s | 68ms | 9.1ms |

Fewer SSTables so reads are faster. Writes are slower since each flush is much larger. Index maps no longer fit in CPU cache, so bloom's compact bitset beats a plain map lookup on misses.

## Running

```bash
Expand Down
59 changes: 59 additions & 0 deletions compactor/compactor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package compaction

import (
"fmt"
"lorem-lsm/memtable"
"lorem-lsm/sstable"
"time"
)

type Compactor struct {
candidates []*sstable.SSTable
result *sstable.SSTable
}

func NewCompactor(candidates []*sstable.SSTable) *Compactor {

path := fmt.Sprintf("sstable/compacted-%d", time.Now().UnixNano())
result, erro := sstable.CreateSSTable(path, true)
if erro != nil {
return nil
}

return &Compactor{
candidates: candidates,
result: result,
}
}

func (compactor *Compactor) Compact() *sstable.SSTable {

keys := map[string](string){}
isDeleted := map[string](bool){}

for _, candidate := range compactor.candidates {

for key, _ := range candidate.IndexFileMemory {

keys[key], isDeleted[key] = candidate.Get(key)
}
}
tempMemTable := memtable.NewMemTable()
for key, val := range keys {
if !isDeleted[key] {
tempMemTable.Put(key, val)
}
}

compactor.result.FlushMemTable(tempMemTable)
compactor.CleanUp()
return compactor.result
}

func (compactor *Compactor) CleanUp() {

for _, candidate := range compactor.candidates {
candidate.Close()
}

}
31 changes: 20 additions & 11 deletions db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package db

import (
"fmt"
compaction "lorem-lsm/compactor"
"lorem-lsm/memtable"
"lorem-lsm/sstable"
"lorem-lsm/wal"
Expand All @@ -10,11 +11,12 @@ import (
)

type LoremDB struct {
wal *wal.Wal
memTable *memtable.MemTable
ssTables []*sstable.SSTable
ssTablePath string
useBloom bool
wal *wal.Wal
memTable *memtable.MemTable
ssTables []*sstable.SSTable
ssTablePath string
useBloom bool
ssTableLimit int
}

func NewLoremDB(useBloom bool) *LoremDB {
Expand All @@ -26,11 +28,12 @@ func NewLoremDB(useBloom bool) *LoremDB {
ssTablePath := "sstable"

return &LoremDB{
wal: wal,
memTable: memTable,
ssTables: ssTables,
ssTablePath: ssTablePath,
useBloom: useBloom,
wal: wal,
memTable: memTable,
ssTables: ssTables,
ssTablePath: ssTablePath,
useBloom: useBloom,
ssTableLimit: 5,
}
}

Expand All @@ -54,6 +57,12 @@ func (db *LoremDB) Put(key string, value string) error {

table.FlushMemTable(db.memTable)
db.ssTables = append(db.ssTables, table)

if len(db.ssTables) > db.ssTableLimit {
compactor := compaction.NewCompactor(db.ssTables)
db.ssTables = []*sstable.SSTable{compactor.Compact()}

}
db.memTable = memtable.NewMemTable()
}
return nil
Expand Down Expand Up @@ -94,7 +103,7 @@ func (db *LoremDB) Get(key string) (string, bool) {
}
// fallback to sstable
for i := len(db.ssTables) - 1; i >= 0; i-- {
value := db.ssTables[i].Get(key)
value, _ := db.ssTables[i].Get(key)

if value != "" {
return value, true
Expand Down
32 changes: 21 additions & 11 deletions sstable/sstable.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,18 @@ import (
type SSTable struct {
indexFile *os.File
dataFile *os.File
indexFileMemory map[string]int64
IndexFileMemory map[string]int64
bloomFilter *bloom.BloomFilter
useBloom bool
BasePath string
}

func (table *SSTable) Close() {

table.indexFile.Close()
table.dataFile.Close()
os.RemoveAll(table.BasePath)
println("Cleaned")
}

func CreateSSTable(basePath string, useBloom bool) (*SSTable, error) {
Expand All @@ -40,9 +49,10 @@ func CreateSSTable(basePath string, useBloom bool) (*SSTable, error) {
return &SSTable{
indexFile: idxFile,
dataFile: dFile,
indexFileMemory: indexFileMemory,
IndexFileMemory: indexFileMemory,
bloomFilter: bloom.NewBloomFilter(1000000, 3),
useBloom: useBloom,
BasePath: basePath,
}, nil
}

Expand Down Expand Up @@ -74,7 +84,7 @@ func ReadSSTable(basePath string) (*SSTable, error) {
return &SSTable{
indexFile: idxFile,
dataFile: dFile,
indexFileMemory: indexFileMemory,
IndexFileMemory: indexFileMemory,
bloomFilter: bloom.NewBloomFilter(1000000, 3),
}, nil
}
Expand All @@ -93,7 +103,7 @@ func (ssTable *SSTable) FlushMemTable(memTable *memtable.MemTable) error {
if err != nil {
fmt.Println("write error:", err)
}
ssTable.indexFileMemory[row.Key] = start
ssTable.IndexFileMemory[row.Key] = start
idxItem := fmt.Sprintf("%s,%d\n", row.Key, start)
start += int64(len(item))
ssTable.indexFile.WriteString(idxItem)
Expand All @@ -102,19 +112,19 @@ func (ssTable *SSTable) FlushMemTable(memTable *memtable.MemTable) error {
return nil
}

func (ssTable *SSTable) Get(key string) string {
func (ssTable *SSTable) Get(key string) (string, bool) {
if ssTable.useBloom && !ssTable.bloomFilter.Contains(key) {
return ""
return "", false
}
seekPoint, yes := ssTable.indexFileMemory[key]
seekPoint, yes := ssTable.IndexFileMemory[key]
if !yes {
return ""
return "", false
}

_, err := ssTable.dataFile.Seek(seekPoint, 0)

if err != nil {
return ""
return "", false
}

reader := bufio.NewReader(ssTable.dataFile)
Expand All @@ -124,7 +134,7 @@ func (ssTable *SSTable) Get(key string) string {
parts := strings.Split(row, ",")
isDeleted := parts[2] == "t"
if isDeleted {
return ""
return "", true
}
return parts[1]
return parts[1], false
}
Loading
Loading