Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions Sources/CodexBarCore/Vendored/CostUsage/CostUsageJsonl.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,24 @@ enum CostUsageJsonl {
try handle.seek(toOffset: UInt64(startOffset))
}

var buffer = Data()
buffer.reserveCapacity(64 * 1024)

var current = Data()
current.reserveCapacity(4 * 1024)
var lineBytes = 0
var truncated = false
var bytesRead: Int64 = 0

func appendSegment(_ segment: Data.SubSequence) {
guard !segment.isEmpty else { return }
lineBytes += segment.count
guard !truncated else { return }
if lineBytes > maxLineBytes || lineBytes > prefixBytes {
truncated = true
current.removeAll(keepingCapacity: true)
return
}
current.append(contentsOf: segment)
}

func flushLine() {
guard lineBytes > 0 else { return }
let line = Line(bytes: current, wasTruncated: truncated)
Expand All @@ -49,23 +58,14 @@ enum CostUsageJsonl {
}

bytesRead += Int64(chunk.count)
buffer.append(chunk)

while true {
guard let nl = buffer.firstIndex(of: 0x0A) else { break }
let linePart = buffer[..<nl]
buffer.removeSubrange(...nl)

lineBytes += linePart.count
if !truncated {
if lineBytes > maxLineBytes || lineBytes > prefixBytes {
truncated = true
current.removeAll(keepingCapacity: true)
} else {
current.append(contentsOf: linePart)
}
}
var segmentStart = chunk.startIndex
while let nl = chunk[segmentStart...].firstIndex(of: 0x0A) {
appendSegment(chunk[segmentStart..<nl])
flushLine()
segmentStart = chunk.index(after: nl)
}
if segmentStart < chunk.endIndex {
appendSegment(chunk[segmentStart..<chunk.endIndex])
}
}

Expand Down
54 changes: 54 additions & 0 deletions Tests/CodexBarTests/CostUsageScannerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,60 @@ struct CostUsageScannerTests {
#expect(report.data[0].outputTokens == 15)
#expect(report.data[0].totalTokens == 45)
}

@Test
func jsonlScannerHandlesLinesAcrossReadChunks() throws {
let env = try CostUsageTestEnvironment()
defer { env.cleanup() }

let fileURL = env.root.appendingPathComponent("large-lines.jsonl", isDirectory: false)
let largeLine = String(repeating: "x", count: 300_000)
let contents = "\(largeLine)\nsmall\n"
try contents.write(to: fileURL, atomically: true, encoding: .utf8)

var scanned: [(count: Int, truncated: Bool)] = []
let endOffset = try CostUsageJsonl.scan(
fileURL: fileURL,
maxLineBytes: 400_000,
prefixBytes: 400_000)
{ line in
scanned.append((line.bytes.count, line.wasTruncated))
}

#expect(endOffset == Int64(Data(contents.utf8).count))
#expect(scanned.count == 2)
#expect(scanned[0].count == 300_000)
#expect(scanned[0].truncated == false)
#expect(scanned[1].count == 5)
#expect(scanned[1].truncated == false)
}

@Test
func jsonlScannerMarksPrefixLimitedLinesAsTruncated() throws {
let env = try CostUsageTestEnvironment()
defer { env.cleanup() }

let fileURL = env.root.appendingPathComponent("truncated-lines.jsonl", isDirectory: false)
let shortLine = "ok"
let longLine = String(repeating: "a", count: 2000)
let contents = "\(shortLine)\n\(longLine)\n"
try contents.write(to: fileURL, atomically: true, encoding: .utf8)

var scanned: [CostUsageJsonl.Line] = []
_ = try CostUsageJsonl.scan(
fileURL: fileURL,
maxLineBytes: 10000,
prefixBytes: 64)
{ line in
scanned.append(line)
}

#expect(scanned.count == 2)
#expect(String(data: scanned[0].bytes, encoding: .utf8) == "ok")
#expect(scanned[0].wasTruncated == false)
#expect(scanned[1].bytes.isEmpty)
#expect(scanned[1].wasTruncated == true)
}
}

private struct CostUsageTestEnvironment {
Expand Down