sig5 · sig5 · Apr 25, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/README.md b/README.md
@@ -90,7 +90,7 @@ Run on Apple M4, `-benchtime=5s`. Numbers are per-operation.
 | bloom=false, lock=true | 70,526 | 35,228 |
 | bloom=false, lock=false | 80,792 | 39,588 |
 
-### Sequential reads — hits (10k keys pre-loaded)
+### Sequential reads, hits (10k keys pre-loaded)
 
 | Config | ns/op |
 |--------|------:|
@@ -99,7 +99,7 @@ Run on Apple M4, `-benchtime=5s`. Numbers are per-operation.
 | bloom=false, lock=true | 5,351 |
 | bloom=false, lock=false | 5,333 |
 
-### Sequential reads — misses (10k keys pre-loaded, reading non-existent keys)
+### Sequential reads, misses (10k keys pre-loaded, reading non-existent keys)
 
 | Config | ns/op |
 |--------|------:|
@@ -108,7 +108,7 @@ Run on Apple M4, `-benchtime=5s`. Numbers are per-operation.
 | bloom=false, lock=true | 67.5 |
 | bloom=false, lock=false | 67.4 |
 
-### Reads — large dataset (100k keys, hits vs misses)
+### Reads, large dataset (100k keys, hits vs misses)
 
 | Config | ns/op |
 |--------|------:|
@@ -133,17 +133,17 @@ Run on Apple M4, `-benchtime=5s`. Numbers are per-operation.
 
 ### What the numbers tell us
 
-**Bloom filter only helps misses.** On a hit, the bloom filter says "maybe" and you still have to read the index — so you pay the hash cost for nothing. On a miss, bloom can reject a key outright without touching the index at all, which is where the speedup comes from.
+**Bloom filter only helps misses.** On a hit, bloom says "maybe" and you still have to read the index, so you pay the hash cost for nothing. On a miss it can reject the key without touching the index at all, which is where the speedup comes from.
 
-**The bloom advantage scales with index size.** With 10k keys (small SSTables, index fits in cache), bloom saves ~3% on misses (65ns vs 67ns). With 100k keys (larger indices after compaction), the saving grows to ~12% (97ns vs 110ns). The index maps are too large for CPU cache at that point, so each lookup becomes a RAM access; bloom's compact bitset stays cache-hot and wins.
+**The advantage scales with index size.** With 10k keys the SSTable index maps fit in CPU cache, so a plain map lookup beats running 3 hashes and bloom only saves about 3% on misses (65ns vs 67ns). With 100k keys the indices spill out of cache and each lookup costs a RAM round-trip. At that point bloom's compact bitset stays cache-hot and the saving grows to about 12% (97ns vs 110ns).
 
-**Bloom never helps read hits.** The hit numbers across small and large datasets are nearly identical with and without bloom (~5.3–5.4µs). The SSTable data file read dominates, and bloom adds a small hash overhead before it.
+**Bloom never helps hits.** Hit latency is nearly identical with and without bloom across both dataset sizes (~5.3-5.4µs). The SSTable data file read dominates and bloom just adds a small hash overhead before it.
 
-**An uncontended lock is basically free.** For sequential writes, `lock=true` is actually faster than `lock=false` (58µs vs 73µs). There's no contention so the mutex costs nothing, and the two code paths end up with different allocation patterns — `lock=false` allocates about 30% more memory per op, which is where the slowdown comes from.
+**An uncontended lock is basically free.** Sequential writes with `lock=true` are actually faster than `lock=false` (58µs vs 73µs). There's no contention so the mutex costs nothing, and the two code paths end up with different allocation patterns. `lock=false` allocates about 30% more per op, which is where the difference comes from.
 
-**Concurrent writes scale to about 3x, not 10x.** Sequential puts cost ~58µs; parallel drops to ~18µs across 10 goroutines. The ceiling is the memtable write lock — WAL appends and B-tree inserts both serialize, so adding more goroutines doesn't help past a point.
+**Concurrent writes scale to about 3x, not 10x.** Sequential puts cost ~58µs; parallel drops to ~18µs across 10 goroutines. WAL appends and B-tree inserts both serialize on the write lock, so more goroutines can't help past that point.
 
-**Concurrent reads barely move the needle** (~5.4µs → 4.7µs). Reads take a shared `RLock` so they can technically run in parallel, but the bottleneck is SSTable file I/O which doesn't fan out well on a single drive.
+**Concurrent reads barely move.** ~5.4µs drops to ~4.7µs. Reads take a shared `RLock` so they can run in parallel, but the bottleneck is SSTable file I/O which doesn't fan out well on a single drive.
 
 ## Running
 

diff --git a/db/db.go b/db/db.go
@@ -1,144 +1,67 @@
 package db
 
 import (
-	"fmt"
-	compaction "lorem-lsm/compactor"
-	"lorem-lsm/memtable"
-	"lorem-lsm/sstable"
-	"lorem-lsm/wal"
-	"os"
+	"lorem-lsm/shard"
 	"sync"
-	"time"
 )
 
 type LoremDB struct {
-	wal          *wal.Wal
-	memTable     *memtable.MemTable
-	ssTables     []*sstable.SSTable
-	ssTablePath  string
-	useBloom     bool
-	useLock      bool
-	ssTableLimit int
-	lock         sync.RWMutex
+	shardCount         int
+	dbList             []*shard.LoremDBShard
+	supportConcurrency bool
+	useBloom           bool
+	lock               sync.Mutex
 }
 
-func NewLoremDB(useBloom bool, supportConcurrency bool) *LoremDB {
-
-	os.MkdirAll("sstable", 0755)
-	writeAheadLog, _ := wal.NewWal("wal.log")
-	memTable := memtable.NewMemTable()
-
-	writeAheadLog.Recover(func(walRow *wal.WalRow) {
-		if !walRow.IsDeleted {
-			memTable.Put(walRow.Key, walRow.Value)
-		} else {
-			memTable.Delete(walRow.Key)
-		}
-	})
-
-	ssTables := []*sstable.SSTable{}
-	ssTablePath := "sstable"
+func NewLoremDB(shardCount int, useBloom bool, supportConcurrency bool) *LoremDB {
 
 	return &LoremDB{
-		wal:          writeAheadLog,
-		memTable:     memTable,
-		ssTables:     ssTables,
-		ssTablePath:  ssTablePath,
-		useBloom:     useBloom,
-		ssTableLimit: 5,
-		useLock:      supportConcurrency,
+		shardCount:         shardCount,
+		dbList:             make([]*shard.LoremDBShard, shardCount),
+		useBloom:           useBloom,
+		supportConcurrency: supportConcurrency,
 	}
 }
 
 func (db *LoremDB) Put(key string, value string) error {
-	// write wal first to maximize data recovery chances
-	if db.useLock {
-		db.lock.Lock()
-		defer db.lock.Unlock()
-	}
-
-	db.wal.Append(wal.WalRow{
-		Key:       key,
-		Value:     value,
-		IsDeleted: false,
-	})
-
-	isMemTableFull := db.memTable.Put(key, value)
-
-	if isMemTableFull {
-		path := fmt.Sprintf("%s/%d", db.ssTablePath, time.Now().UnixNano())
-		table, err := sstable.CreateSSTable(path, db.useBloom)
-
-		if err != nil {
-			return err
-		}
-
-		table.FlushMemTable(db.memTable)
-		db.ssTables = append(db.ssTables, table)
-
-		if len(db.ssTables) > db.ssTableLimit {
-			compactor := compaction.NewCompactor(db.ssTables)
-			db.ssTables = []*sstable.SSTable{compactor.Compact()}
+	shard := db.GetShard(&key)
+	return shard.Put(key, value)
+}
 
-		}
-		db.memTable = memtable.NewMemTable()
-		db.wal.Clear()
-	}
-	return nil
+func (db *LoremDB) Get(key string) (string, bool) {
+	shard := db.GetShard(&key)
+	return shard.Get(key)
 }
 
 func (db *LoremDB) Delete(key string) error {
+	shard := db.GetShard(&key)
+	return shard.Delete(key)
+}
 
-	if db.useLock {
+func (db *LoremDB) GetShard(key *string) *shard.LoremDBShard {
+	id := db.GetShardId(key)
+	selectedShard := db.dbList[id]
+	if selectedShard == nil {
+		// lazy loadingl
 		db.lock.Lock()
 		defer db.lock.Unlock()
-	}
-
-	// write wal first to maximize data recovery chances
-	db.wal.Append(wal.WalRow{
-		Key:       key,
-		Value:     "",
-		IsDeleted: true,
-	})
-
-	isMemTableFull := db.memTable.Delete(key)
-
-	if isMemTableFull {
-		path := fmt.Sprintf("%s/%d", db.ssTablePath, time.Now().UnixNano())
-		table, err := sstable.CreateSSTable(path, db.useBloom)
-
-		if err != nil {
-
-			return err
+		selectedShard = db.dbList[id]
+		if selectedShard == nil {
+			selectedShard = shard.NewLoremDBShard(id, db.useBloom, db.supportConcurrency)
+			db.dbList[id] = selectedShard
 		}
-
-		table.FlushMemTable(db.memTable)
-		db.ssTables = append(db.ssTables, table)
-		db.memTable = memtable.NewMemTable()
-		fmt.Println("reset, new size:", db.memTable.Size())
 	}
-	return nil
+	return selectedShard
 }
 
-func (db *LoremDB) Get(key string) (string, bool) {
+func (db *LoremDB) GetShardId(key *string) int {
 
-	if db.useLock {
-		db.lock.RLock()
-		defer db.lock.RUnlock()
-	}
+	// get value of first 5 characters.
+	sum := 0
 
-	// check in memtable
-	val, ok := db.memTable.Get(key)
-	if ok {
-		return val, true
-	}
-	// fallback to sstable
-	for i := len(db.ssTables) - 1; i >= 0; i-- {
-		value, _ := db.ssTables[i].Get(key)
-
-		if value != "" {
-			return value, true
-		}
+	for i := 0; i < min(len(*key), 5); i++ {
+		sum += int((*key)[i] - 'a')
 	}
-	return "", false
+	shardHash := sum % (db.shardCount)
+	return shardHash
 }
diff --git a/db/db_bench_test.go b/db/db_bench_test.go
@@ -6,27 +6,30 @@ import (
 	"testing"
 )
 
-func setupDB(b *testing.B, useBloom bool, useLock bool) *LoremDB {
+func setupDB(b *testing.B, shardCount int, useBloom bool, useLock bool) *LoremDB {
 	os.RemoveAll("sstable")
 	os.Remove("wal.log")
 	os.MkdirAll("sstable", 0755)
-	return NewLoremDB(useBloom, useLock)
+	return NewLoremDB(shardCount, useBloom, useLock)
 }
 
 func BenchmarkPut(b *testing.B) {
 	configs := []struct {
-		useBloom bool
-		useLock  bool
+		useBloom   bool
+		useLock    bool
+		shardCount int
 	}{
-		{true, true},
-		{true, false},
-		{false, true},
-		{false, false},
+		{true, true, 1},
+		{true, true, 2},
+		{true, false, 1},
+		{false, true, 1},
+		{false, true, 2},
+		{false, false, 1},
 	}
 	for _, cfg := range configs {
-		name := fmt.Sprintf("bloom=%v,lock=%v", cfg.useBloom, cfg.useLock)
+		name := fmt.Sprintf("bloom=%v,lock=%v,shardCount=%d", cfg.useBloom, cfg.useLock, cfg.shardCount)
 		b.Run(name, func(b *testing.B) {
-			store := setupDB(b, cfg.useBloom, cfg.useLock)
+			store := setupDB(b, cfg.shardCount, cfg.useBloom, cfg.useLock)
 			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
 				store.Put(fmt.Sprintf("key-%d", i), fmt.Sprintf("value-%d", i))
@@ -37,18 +40,21 @@ func BenchmarkPut(b *testing.B) {
 
 func BenchmarkGet(b *testing.B) {
 	configs := []struct {
-		useBloom bool
-		useLock  bool
+		useBloom   bool
+		useLock    bool
+		shardCount int
 	}{
-		{true, true},
-		{true, false},
-		{false, true},
-		{false, false},
+		{true, true, 1},
+		{true, true, 2},
+		{true, false, 1},
+		{false, true, 1},
+		{false, true, 2},
+		{false, false, 1},
 	}
 	for _, cfg := range configs {
-		name := fmt.Sprintf("bloom=%v,lock=%v", cfg.useBloom, cfg.useLock)
+		name := fmt.Sprintf("bloom=%v,lock=%v,shardCount=%d", cfg.useBloom, cfg.useLock, cfg.shardCount)
 		b.Run(name, func(b *testing.B) {
-			store := setupDB(b, cfg.useBloom, cfg.useLock)
+			store := setupDB(b, cfg.shardCount, cfg.useBloom, cfg.useLock)
 			for i := 0; i < 10000; i++ {
 				store.Put(fmt.Sprintf("key-%d", i), fmt.Sprintf("value-%d", i))
 			}
@@ -64,18 +70,21 @@ func BenchmarkGet(b *testing.B) {
 // every SSTable lookup (no index access needed), so the gap vs no-bloom is largest here.
 func BenchmarkGetMiss(b *testing.B) {
 	configs := []struct {
-		useBloom bool
-		useLock  bool
+		useBloom   bool
+		useLock    bool
+		shardCount int
 	}{
-		{true, true},
-		{true, false},
-		{false, true},
-		{false, false},
+		{true, true, 1},
+		{true, true, 2},
+		{true, false, 1},
+		{false, true, 1},
+		{false, true, 2},
+		{false, false, 1},
 	}
 	for _, cfg := range configs {
-		name := fmt.Sprintf("bloom=%v,lock=%v", cfg.useBloom, cfg.useLock)
+		name := fmt.Sprintf("bloom=%v,lock=%v,shardCount=%d", cfg.useBloom, cfg.useLock, cfg.shardCount)
 		b.Run(name, func(b *testing.B) {
-			store := setupDB(b, cfg.useBloom, cfg.useLock)
+			store := setupDB(b, cfg.shardCount, cfg.useBloom, cfg.useLock)
 			for i := 0; i < 10000; i++ {
 				store.Put(fmt.Sprintf("key-%d", i), fmt.Sprintf("value-%d", i))
 			}
@@ -107,7 +116,7 @@ func BenchmarkGetLargeDataset(b *testing.B) {
 			hit, useBloom := hit, useBloom
 			label := fmt.Sprintf("hit=%v,bloom=%v", hit, useBloom)
 			b.Run(label, func(b *testing.B) {
-				store := setupDB(b, useBloom, false)
+				store := setupDB(b, 1, useBloom, false)
 				for i := 0; i < preload; i++ {
 					store.Put(fmt.Sprintf("key-%d", i), fmt.Sprintf("value-%d", i))
 				}
@@ -126,16 +135,19 @@ func BenchmarkGetLargeDataset(b *testing.B) {
 
 func BenchmarkPutConcurrent(b *testing.B) {
 	configs := []struct {
-		useBloom bool
-		useLock  bool
+		useBloom   bool
+		useLock    bool
+		shardCount int
 	}{
-		{true, true},
-		{false, true},
+		{true, true, 1},
+		{false, true, 1},
+		{true, true, 2},
+		{false, true, 2},
 	}
 	for _, cfg := range configs {
-		name := fmt.Sprintf("bloom=%v,lock=%v", cfg.useBloom, cfg.useLock)
+		name := fmt.Sprintf("bloom=%v,lock=%v,shardCount=%d", cfg.useBloom, cfg.useLock, cfg.shardCount)
 		b.Run(name, func(b *testing.B) {
-			store := setupDB(b, cfg.useBloom, cfg.useLock)
+			store := setupDB(b, cfg.shardCount, cfg.useBloom, cfg.useLock)
 			b.ResetTimer()
 			b.RunParallel(func(pb *testing.PB) {
 				i := 0
@@ -150,16 +162,19 @@ func BenchmarkPutConcurrent(b *testing.B) {
 
 func BenchmarkGetConcurrent(b *testing.B) {
 	configs := []struct {
-		useBloom bool
-		useLock  bool
+		useBloom   bool
+		useLock    bool
+		shardCount int
 	}{
-		{true, true},
-		{false, true},
+		{true, true, 1},
+		{false, true, 1},
+		{true, true, 5},
+		{false, true, 5},
 	}
 	for _, cfg := range configs {
-		name := fmt.Sprintf("bloom=%v,lock=%v", cfg.useBloom, cfg.useLock)
+		name := fmt.Sprintf("bloom=%v,lock=%v,shardCount=%d", cfg.useBloom, cfg.useLock, cfg.shardCount)
 		b.Run(name, func(b *testing.B) {
-			store := setupDB(b, cfg.useBloom, cfg.useLock)
+			store := setupDB(b, cfg.shardCount, cfg.useBloom, cfg.useLock)
 			for i := 0; i < 10000; i++ {
 				store.Put(fmt.Sprintf("key-%d", i), fmt.Sprintf("value-%d", i))
 			}