Skip to content

Commit 722aa9f

Browse files
g-cqdclaude
andcommitted
Remove development artifacts and add test improvements
- Remove compass artifact file - Add new encoder test files for nested, parallel, and strategy testing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 6043d2c commit 722aa9f

9 files changed

Lines changed: 1014 additions & 1002 deletions

Sources/CSVCoder/Parsing/CSVParser.swift

Lines changed: 228 additions & 399 deletions
Large diffs are not rendered by default.
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
//
2+
// CSVRowView.swift
3+
// CSVCoder
4+
//
5+
// A zero-copy view into a single CSV row within a raw UTF-8 buffer.
6+
//
7+
8+
import Foundation
9+
10+
// MARK: - CSVRowView
11+
12+
/// A zero-copy view into a single CSV row within a raw UTF-8 buffer.
13+
///
14+
/// `CSVRowView` provides efficient access to field data without allocating copies.
15+
/// It stores byte offsets and lengths rather than string values, enabling
16+
/// high-performance parsing of large CSV files.
17+
///
18+
/// ## Thread Safety
19+
///
20+
/// `CSVRowView` is **not** `Sendable` because it references a borrowed buffer
21+
/// that must remain valid for the view's lifetime. Always use within the closure
22+
/// scope of ``CSVParser/parse(data:delimiter:body:)``.
23+
///
24+
/// ## Usage
25+
///
26+
/// ```swift
27+
/// CSVParser.parse(data: csvData) { parser in
28+
/// for row in parser {
29+
/// for i in 0..<row.count {
30+
/// if let value = row.string(at: i) {
31+
/// print(value)
32+
/// }
33+
/// }
34+
/// }
35+
/// }
36+
/// ```
37+
///
38+
/// ## Performance Notes
39+
///
40+
/// - Field access via ``string(at:)`` is O(1) for unquoted fields
41+
/// - Quoted fields with escaped quotes (`""`) require O(n) unescaping
42+
/// - Use ``getBytes(at:)`` for maximum performance when UTF-8 bytes suffice
43+
public struct CSVRowView {
44+
// MARK: Lifecycle
45+
46+
/// Creates a row view with the given buffer and field metadata.
47+
public init(
48+
buffer: UnsafeBufferPointer<UInt8>,
49+
fieldStarts: [Int],
50+
fieldLengths: [Int],
51+
fieldQuoted: [Bool],
52+
fieldHasEscapedQuote: [Bool],
53+
hasUnterminatedQuote: Bool,
54+
hasQuoteInUnquotedField: Bool,
55+
) {
56+
self.buffer = buffer
57+
self.fieldStarts = fieldStarts
58+
self.fieldLengths = fieldLengths
59+
self.fieldQuoted = fieldQuoted
60+
self.fieldHasEscapedQuote = fieldHasEscapedQuote
61+
self.hasUnterminatedQuote = hasUnterminatedQuote
62+
self.hasQuoteInUnquotedField = hasQuoteInUnquotedField
63+
}
64+
65+
// MARK: Public
66+
67+
/// Reference to the full buffer (owned elsewhere).
68+
public let buffer: UnsafeBufferPointer<UInt8>
69+
70+
/// Offsets of field starts within the buffer.
71+
public let fieldStarts: [Int]
72+
73+
/// Lengths of each field.
74+
public let fieldLengths: [Int]
75+
76+
/// Whether each field was quoted (needs unescaping).
77+
public let fieldQuoted: [Bool]
78+
79+
/// Whether each field contains escaped quotes ("" that need unescaping.
80+
public let fieldHasEscapedQuote: [Bool]
81+
82+
/// Whether any field has an unterminated quote.
83+
public let hasUnterminatedQuote: Bool
84+
85+
/// Whether any unquoted field contains a quote character (RFC 4180 violation).
86+
public let hasQuoteInUnquotedField: Bool
87+
88+
/// The number of fields in this row.
89+
public var count: Int { fieldStarts.count }
90+
91+
/// Returns the raw UTF-8 bytes for the field at the given index.
92+
///
93+
/// This method provides zero-copy access to field data, useful when
94+
/// you need to perform custom parsing or validation without allocating strings.
95+
///
96+
/// - Parameter index: The zero-based field index.
97+
/// - Returns: A buffer pointer to the field's UTF-8 bytes, or an empty buffer if out of bounds.
98+
/// - Complexity: O(1)
99+
///
100+
/// - Warning: The returned buffer is only valid while the parent `CSVParser`'s
101+
/// data remains in scope. Do not store the buffer beyond the parsing closure.
102+
public func getBytes(at index: Int) -> UnsafeBufferPointer<UInt8> {
103+
let start = fieldStarts[index]
104+
let length = fieldLengths[index]
105+
guard start + length <= buffer.count else { return UnsafeBufferPointer(start: nil, count: 0) }
106+
return UnsafeBufferPointer(start: buffer.baseAddress?.advanced(by: start), count: length)
107+
}
108+
109+
/// Decodes and returns the string value for the field at the given index.
110+
///
111+
/// Handles RFC 4180 quote unescaping automatically:
112+
/// - Quoted fields have outer quotes stripped
113+
/// - Escaped quotes (`""`) are converted to single quotes (`"`)
114+
///
115+
/// - Parameter index: The zero-based field index.
116+
/// - Returns: The decoded string value, or `nil` if the index is out of bounds.
117+
/// - Complexity: O(1) for unquoted fields; O(n) for quoted fields with escaped quotes.
118+
public func string(at index: Int) -> String? {
119+
string(at: index, encoding: .utf8)
120+
}
121+
122+
/// Decodes and returns the string value for the field at the given index using the specified encoding.
123+
///
124+
/// Handles RFC 4180 quote unescaping automatically:
125+
/// - Quoted fields have outer quotes stripped
126+
/// - Escaped quotes (`""`) are converted to single quotes (`"`)
127+
///
128+
/// - Parameters:
129+
/// - index: The zero-based field index.
130+
/// - encoding: The string encoding to use for conversion. For best performance, use `.utf8`.
131+
/// - Returns: The decoded string value, or `nil` if the index is out of bounds or conversion fails.
132+
/// - Complexity: O(1) for unquoted UTF-8 fields; O(n) for quoted fields with escaped quotes or non-UTF-8 encodings.
133+
public func string(at index: Int, encoding: String.Encoding) -> String? {
134+
guard index < fieldStarts.count else { return nil }
135+
136+
let start = fieldStarts[index]
137+
let length = fieldLengths[index]
138+
let isQuoted = fieldQuoted[index]
139+
let hasEscapedQuote = fieldHasEscapedQuote[index]
140+
141+
guard let base = buffer.baseAddress else { return nil }
142+
143+
let ptr = base.advanced(by: start)
144+
let fieldBuffer = UnsafeBufferPointer(start: ptr, count: length)
145+
146+
// Fast path for UTF-8 (most common case)
147+
if encoding == .utf8 {
148+
if isQuoted, hasEscapedQuote {
149+
// Use zero-allocation unescaper
150+
return CSVUnescaper.unescape(buffer: fieldBuffer)
151+
} else {
152+
// No unescaping needed - direct decode
153+
return String(decoding: fieldBuffer, as: UTF8.self)
154+
}
155+
}
156+
157+
// Non-UTF-8 encoding path (ASCII-compatible encodings like ISO-8859-1, Windows-1252)
158+
if isQuoted, hasEscapedQuote {
159+
return CSVUnescaper.unescape(buffer: fieldBuffer, encoding: encoding)
160+
}
161+
162+
let data = Data(bytes: ptr, count: length)
163+
return String(data: data, encoding: encoding)
164+
}
165+
}

Sources/CSVCoder/Utilities/CSVUtilities.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ enum CSVUtilities {
118118
.windowsCP1253,
119119
.windowsCP1254:
120120
true
121+
121122
case .unicode,
122123
.utf16,
123124
.utf16BigEndian,

Sources/CSVCoderBenchmarks/main.swift

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,26 @@ nonisolated func generateOrderCSV(rows: Int) -> String {
202202
let hasDiscount = i % 3 == 0
203203
let hasNotes = i % 5 == 0
204204
let hasShipDate = i % 2 == 0
205-
csv += "ORD-\(String(format: "%08d", i)),\(1000 + i % 500),\"Customer \(i)\",customer\(i)@example.com,\(i % 1000),\"Product \(i % 100)\",\(1 + i % 10),\(Double(10 + i % 100)),\(hasDiscount ? "0.1" : ""),0.08,5.99,\(Double(10 + i % 100) * Double(1 + i % 10)),USD,\(payments[i % payments.count]),2024-\(String(format: "%02d", 1 + i % 12))-\(String(format: "%02d", 1 + i % 28)),\(hasShipDate ? "2024-\(String(format: "%02d", 1 + (i + 3) % 12))-\(String(format: "%02d", 1 + (i + 3) % 28))" : ""),\(statuses[i % statuses.count]),\(hasNotes ? "\"Rush order, handle with care\"" : "")\n"
205+
let orderId = "ORD-\(String(format: "%08d", i))"
206+
let customerId = "\(1000 + i % 500)"
207+
let customerName = "\"Customer \(i)\""
208+
let email = "customer\(i)@example.com"
209+
let productId = "\(i % 1000)"
210+
let productName = "\"Product \(i % 100)\""
211+
let quantity = "\(1 + i % 10)"
212+
let unitPrice = "\(Double(10 + i % 100))"
213+
let discount = hasDiscount ? "0.1" : ""
214+
let totalAmount = "\(Double(10 + i % 100) * Double(1 + i % 10))"
215+
let payment = payments[i % payments.count]
216+
let orderDate = "2024-\(String(format: "%02d", 1 + i % 12))-\(String(format: "%02d", 1 + i % 28))"
217+
let shipDate = hasShipDate
218+
? "2024-\(String(format: "%02d", 1 + (i + 3) % 12))-\(String(format: "%02d", 1 + (i + 3) % 28))"
219+
: ""
220+
let status = statuses[i % statuses.count]
221+
let notes = hasNotes ? "\"Rush order, handle with care\"" : ""
222+
csv += "\(orderId),\(customerId),\(customerName),\(email),\(productId),\(productName),"
223+
csv += "\(quantity),\(unitPrice),\(discount),0.08,5.99,\(totalAmount),USD,\(payment),"
224+
csv += "\(orderDate),\(shipDate),\(status),\(notes)\n"
206225
}
207226
return csv
208227
}
@@ -215,7 +234,24 @@ nonisolated func generateTransactionCSV(rows: Int) -> String {
215234
let hasExchange = i % 4 == 0
216235
let hasRef = i % 3 == 0
217236
let hasProcessor = i % 2 == 0
218-
csv += "TXN\(String(format: "%012d", i)),ACC\(String(format: "%08d", i % 10000)),ACC\(String(format: "%08d", (i + 5000) % 10000)),\(Double(100 + i % 10000)),\(currencies[i % currencies.count]),\(hasExchange ? "1.12" : ""),\(Double(i % 50) * 0.01),2024-\(String(format: "%02d", 1 + i % 12))-\(String(format: "%02d", 1 + i % 28))T\(String(format: "%02d", i % 24)):\(String(format: "%02d", i % 60)):00Z,\(categories[i % categories.count]),\"Transaction \(i) description\",\(hasRef ? "REF\(i)" : ""),completed,\(hasProcessor ? "PROC\(i % 100)" : "")\n"
237+
let txnId = "TXN\(String(format: "%012d", i))"
238+
let accountFrom = "ACC\(String(format: "%08d", i % 10000))"
239+
let accountTo = "ACC\(String(format: "%08d", (i + 5000) % 10000))"
240+
let amount = "\(Double(100 + i % 10000))"
241+
let currency = currencies[i % currencies.count]
242+
let exchangeRate = hasExchange ? "1.12" : ""
243+
let fee = "\(Double(i % 50) * 0.01)"
244+
let month = String(format: "%02d", 1 + i % 12)
245+
let day = String(format: "%02d", 1 + i % 28)
246+
let hour = String(format: "%02d", i % 24)
247+
let minute = String(format: "%02d", i % 60)
248+
let timestamp = "2024-\(month)-\(day)T\(hour):\(minute):00Z"
249+
let category = categories[i % categories.count]
250+
let description = "\"Transaction \(i) description\""
251+
let reference = hasRef ? "REF\(i)" : ""
252+
let processor = hasProcessor ? "PROC\(i % 100)" : ""
253+
csv += "\(txnId),\(accountFrom),\(accountTo),\(amount),\(currency),\(exchangeRate),\(fee),"
254+
csv += "\(timestamp),\(category),\(description),\(reference),completed,\(processor)\n"
219255
}
220256
return csv
221257
}
@@ -230,7 +266,26 @@ nonisolated func generateLogCSV(rows: Int) -> String {
230266
let hasUserId = i % 3 == 0
231267
let hasDuration = i % 2 == 0
232268
let hasMetadata = i % 5 == 0
233-
csv += "2024-\(String(format: "%02d", 1 + i % 12))-\(String(format: "%02d", 1 + i % 28))T\(String(format: "%02d", i % 24)):\(String(format: "%02d", i % 60)):\(String(format: "%02d", i % 60)).\(String(format: "%03d", i % 1000))Z,\(levels[i % levels.count]),\(services[i % services.count]),host-\(i % 10).cluster.local,\(hasRequestId ? "req-\(UUID().uuidString.prefix(8))" : ""),\(hasUserId ? "user-\(i % 1000)" : ""),\(actions[i % actions.count]),/api/v1/resource/\(i % 100),\(hasDuration ? "\(50 + i % 500)" : ""),\(200 + (i % 5) * 100),\"Request processed successfully for item \(i)\",\(hasMetadata ? "\"{\"\"key\"\":\"\"value\"\"}\"" : "")\n"
269+
let month = String(format: "%02d", 1 + i % 12)
270+
let day = String(format: "%02d", 1 + i % 28)
271+
let hour = String(format: "%02d", i % 24)
272+
let minute = String(format: "%02d", i % 60)
273+
let second = String(format: "%02d", i % 60)
274+
let millis = String(format: "%03d", i % 1000)
275+
let timestamp = "2024-\(month)-\(day)T\(hour):\(minute):\(second).\(millis)Z"
276+
let level = levels[i % levels.count]
277+
let service = services[i % services.count]
278+
let host = "host-\(i % 10).cluster.local"
279+
let requestId = hasRequestId ? "req-\(UUID().uuidString.prefix(8))" : ""
280+
let userId = hasUserId ? "user-\(i % 1000)" : ""
281+
let action = actions[i % actions.count]
282+
let resource = "/api/v1/resource/\(i % 100)"
283+
let duration = hasDuration ? "\(50 + i % 500)" : ""
284+
let statusCode = "\(200 + (i % 5) * 100)"
285+
let message = "\"Request processed successfully for item \(i)\""
286+
let metadata = hasMetadata ? "\"{\"\"key\"\":\"\"value\"\"}\"" : ""
287+
csv += "\(timestamp),\(level),\(service),\(host),\(requestId),\(userId),"
288+
csv += "\(action),\(resource),\(duration),\(statusCode),\(message),\(metadata)\n"
234289
}
235290
return csv
236291
}
@@ -341,6 +396,7 @@ let simpleRecords100K = (0 ..< 100_000).map { SimpleRecord(
341396
score: Double($0) * 0.1,
342397
)
343398
}
399+
344400
let simpleRecords1M = (0 ..< 1_000_000).map { SimpleRecord(
345401
name: "Person\($0)",
346402
age: 20 + $0 % 50,

0 commit comments

Comments
 (0)