Skip to content

Commit cd1b577

Browse files
authored
Merge pull request #2 from hungpdn/release/v0.1.0
release/v0.1.0
2 parents a3eec7d + 8f7909f commit cd1b577

4 files changed

Lines changed: 254 additions & 149 deletions

File tree

README.md

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,17 @@ A high-performance, concurrent-safe, and crash-resilient **Write-Ahead Log (WAL)
1414
- 🔄 **Log Rotation**: Automatic segment rotation based on configurable size.
1515
- 💾 **Flexible Sync Strategies**: Choose between Performance (Background), Safety (Always), or Balance (OSCache).
1616
- 🔍 **Iterator API**: Memory-efficient sequential reading of logs.
17-
- 🧵 **Crash Safety:** Automatic recovery from power failures. Detects and truncates corrupted log tails (partial writes) on startup.
17+
-**Optimized Startup**: Uses reverse scanning to instantly recover the last segment state without reading the whole file.
18+
- 🧹 **Retention Policies**: Automatic cleanup based on TTL (Time-To-Live) or Total Size.
19+
20+
## Roadmap
21+
22+
The following features are planned for future releases:
23+
24+
- [ ] **v0.1.1 - Compression Support**: Add Snappy/Zstd compression for payloads to reduce disk usage.
25+
- [ ] **v0.1.2 - Sparse Indexing**: Implement a sidecar `.idx` file to support O(1) lookup time for `Seek(SeqID)`.
26+
- [ ] **v0.1.3 - Metrics & Observability**: OpenTelemetry / Prometheus integration for monitoring throughput and latency.
27+
- [ ] **v0.2.0 - Replication Hooks**: APIs to support streaming WAL entries to other nodes (Raft/Paxos integration).
1828

1929
## Architecture
2030

@@ -23,17 +33,18 @@ A high-performance, concurrent-safe, and crash-resilient **Write-Ahead Log (WAL)
2333
Each segment file consists of a sequence of binary encoded entries.
2434

2535
```Plaintext
26-
+-------------------+-------------------+-------------------+----------------------+
27-
| CRC32 (4 bytes) | Size (8 bytes) | SeqID (8 bytes) | Payload (N bytes) |
28-
+-------------------+-------------------+-------------------+----------------------+
29-
| Checksum of Data | Length of Payload | Monotonic ID | The actual data |
30-
+-------------------+-------------------+-------------------+----------------------+
36+
+-------------------+-------------------+-------------------+----------------------+-------------------+
37+
| CRC32 (4 bytes) | Size (8 bytes) | SeqID (8 bytes) | Payload (N bytes) | Size (8 bytes) |
38+
+-------------------+-------------------+-------------------+----------------------+-------------------+
39+
| Checksum of Data | Length of Payload | Monotonic ID | The actual data | Backward Pointer |
40+
+-------------------+-------------------+-------------------+----------------------+-------------------+
3141
```
3242

3343
- CRC (Cyclic Redundancy Check): Ensures data integrity.
34-
- Size: Enable fast reading without parsing the entire file.
44+
- Size: Enable fast forward reading (skipping payloads).
3545
- SeqID: Global Sequence ID
3646
- Payload: The actual data.
47+
- Size (Footer): Enable fast reverse reading for optimized startup recovery.
3748

3849
## Installation
3950

@@ -106,17 +117,11 @@ if err := iter.Err(); err != nil {
106117

107118
Contributions are welcome! Please fork the repository and open a pull request.
108119

109-
1. Fork the Project.
110-
2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`)
111-
3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`)
112-
4. Push to the Branch (`git push origin feature/AmazingFeature`)
113-
5. Open a Pull Request
114-
115120
## License
116121

117122
MIT License. See [LICENSE](LICENSE) file.
118123

119-
## Reference
124+
## References
120125

121-
- [tidwall/wal](https://github.com/tidwall/wal)
122126
- [Log: What Every Software Engineer Should Know About Real-Time Data's Unifying Abstraction](https://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying)
127+
- [Designing Data-Intensive Applications](https://www.oreilly.com/library/view/designing-data-intensive-applications/9781491903063)

reader.go

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ func (it *Iterator) Next() bool {
6464

6565
// Loop to handle switching between segment files
6666
for {
67-
// If file is not opened or we've read all of the old file -> Open new file
6867
if it.currentFile == nil {
6968
if it.currentIdx >= len(it.segmentPaths) {
7069
return false // All files have been read
@@ -73,15 +72,11 @@ func (it *Iterator) Next() bool {
7372
path := it.segmentPaths[it.currentIdx]
7473
f, err := os.Open(path)
7574
if err != nil {
76-
// If file is deleted (e.g. retention policy), we report error
77-
// or we could skip it using it.currentIdx++ (data loss scenario)
78-
// For strict consistency, we return error.
7975
it.err = err
8076
return false
8177
}
8278

8379
it.currentFile = f
84-
// Create a buffer reader for faster reading
8580
br := bufio.NewReader(f)
8681
reader := io.Reader(br)
8782
it.currentReader = &reader
@@ -92,14 +87,12 @@ func (it *Iterator) Next() bool {
9287
_, err := io.ReadFull(*it.currentReader, header)
9388

9489
if err != nil {
95-
// If EOF is encountered, close the current file and increment the index so that the next iteration opens a new file
9690
if err == io.EOF {
9791
it.currentFile.Close()
9892
it.currentFile = nil
9993
it.currentIdx++
10094
continue
10195
}
102-
// If unexpected error occurs (UnexpectedEOF) -> Report error
10396
it.err = err
10497
return false
10598
}
@@ -114,6 +107,12 @@ func (it *Iterator) Next() bool {
114107
return false
115108
}
116109

110+
// Skip Footer (8 bytes)
111+
if _, err := io.CopyN(io.Discard, *it.currentReader, int64(footerSize)); err != nil {
112+
it.err = err
113+
return false
114+
}
115+
117116
neededSize := sizeSize + seqIDSize + len(payload)
118117
if cap(it.verifyBuf) < neededSize {
119118
it.verifyBuf = make([]byte, neededSize)
@@ -143,7 +142,6 @@ func (it *Iterator) Index() uint64 {
143142

144143
// Value: Get the data of the current log entry
145144
func (it *Iterator) Value() []byte {
146-
// Return a copy for safety, or return the original slice if you want zero-allocation (be careful)
147145
out := make([]byte, len(it.currentEntry))
148146
copy(out, it.currentEntry)
149147
return out
@@ -164,10 +162,8 @@ func (it *Iterator) Close() error {
164162
}
165163

166164
// Seek: Fast-forward to the record with ID >= startID
167-
// It will skip reading the payload and verifying the CRC of older records
168165
func (it *Iterator) Seek(startID uint64) bool {
169166
for {
170-
171167
if it.currentFile == nil {
172168
if it.currentIdx >= len(it.segmentPaths) {
173169
return false
@@ -200,20 +196,26 @@ func (it *Iterator) Seek(startID uint64) bool {
200196
seqID := binary.BigEndian.Uint64(header[crcSize+sizeSize : headerSize])
201197

202198
if seqID < startID {
203-
// Use Discard to jump over byte 'size' without copying the data
204-
if _, err := io.CopyN(io.Discard, *it.currentReader, int64(size)); err != nil {
199+
// Skip payload + footer efficiently
200+
if _, err := io.CopyN(io.Discard, *it.currentReader, int64(size)+int64(footerSize)); err != nil {
205201
it.err = err
206202
return false
207203
}
208204
continue
209205
} else {
210-
206+
// Read Payload
211207
payload := make([]byte, size)
212208
if _, err := io.ReadFull(*it.currentReader, payload); err != nil {
213209
it.err = err
214210
return false
215211
}
216212

213+
// Read and discard footer
214+
if _, err := io.CopyN(io.Discard, *it.currentReader, int64(footerSize)); err != nil {
215+
it.err = err
216+
return false
217+
}
218+
217219
verifyBuf := make([]byte, sizeSize+seqIDSize+len(payload))
218220
binary.BigEndian.PutUint64(verifyBuf[:sizeSize], size)
219221
binary.BigEndian.PutUint64(verifyBuf[sizeSize:sizeSize+seqIDSize], seqID)

0 commit comments

Comments
 (0)