Skip to content

Commit bf7d898

Browse files
g-cqdclaude
andcommitted
Fix trimWhitespace configuration consistency across all decode paths
- Apply trimWhitespace in getValue() centrally for CSVKeyedDecodingContainer - Add trimmedValue computed property to CSVSingleValueContainer - Update all numeric, boolean, date, and string decoders to use trimmed values - Fix header trimming inconsistency between main and parallel decode paths - Document UTF-8 encoding limitation in configuration - Add comprehensive tests for whitespace trimming with numeric fields - Update CHANGELOG and GettingStarted documentation Previously, trimWhitespace only worked for streaming parser and parallel header extraction. Numeric fields like Int and Double would fail to parse when whitespace was present (e.g., " 2024 "). Boolean decoding always trimmed regardless of configuration. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent d5da4fe commit bf7d898

6 files changed

Lines changed: 133 additions & 55 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6767
- Unterminated quote detection with line/column information
6868
- Quoted fields containing CRLF now parse correctly
6969
- Empty quoted fields handled properly
70+
- `trimWhitespace` configuration now consistently applied to all field types
71+
- Previously, numeric fields (Int, Double, etc.) would fail parsing with whitespace
72+
- Boolean decoding now respects configuration instead of always trimming
73+
- Header extraction now applies trimWhitespace consistently across all decode paths
7074

7175
## [1.1.0] - 2025-12-27
7276

Sources/CSVCoder/CSVCoder.docc/GettingStarted.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,19 @@ let config = CSVDecoder.Configuration(
9797
)
9898
```
9999

100+
### Whitespace Handling
101+
102+
By default, CSVCoder trims leading and trailing whitespace from all field values:
103+
104+
```swift
105+
// CSV: " Toyota , 2024 " becomes "Toyota" and 2024
106+
let decoder = CSVDecoder() // trimWhitespace defaults to true
107+
108+
// Preserve whitespace if needed
109+
let config = CSVDecoder.Configuration(trimWhitespace: false)
110+
let decoder = CSVDecoder(configuration: config)
111+
```
112+
100113
## Topics
101114

102115
### Next Steps

Sources/CSVCoder/Core/CSVDecoder.swift

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ public final class CSVDecoder: Sendable {
9696
public var hasHeaders: Bool
9797

9898
/// The encoding to use when reading data. Default is UTF-8.
99+
///
100+
/// - Note: The current implementation optimizes for UTF-8 with zero-copy parsing.
101+
/// Non-UTF-8 data should be converted to String using the appropriate encoding
102+
/// before passing to `decode(from:)`. This property is reserved for future use.
99103
public var encoding: String.Encoding
100104

101105
/// Whether to trim whitespace from field values. Default is true.
@@ -388,7 +392,8 @@ public final class CSVDecoder: Sendable {
388392
rawHeaders.reserveCapacity(firstRow.count)
389393
for i in 0..<firstRow.count {
390394
if let s = firstRow.string(at: i) {
391-
rawHeaders.append(s)
395+
// Apply trimWhitespace to headers for consistency with parallel decoding
396+
rawHeaders.append(configuration.trimWhitespace ? s.trimmingCharacters(in: .whitespaces) : s)
392397
} else {
393398
rawHeaders.append("column\(i)")
394399
}

Sources/CSVCoder/Decoder/CSVRowDecoder.swift

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ struct CSVKeyedDecodingContainer<Key: CodingKey>: KeyedDecodingContainerProtocol
9898
}
9999

100100
private func getValue(for key: Key) throws -> String {
101+
let rawValue: String
101102
switch source {
102103
case .dictionary(let row):
103104
guard let value = row[key.stringValue] else {
@@ -106,7 +107,7 @@ struct CSVKeyedDecodingContainer<Key: CodingKey>: KeyedDecodingContainerProtocol
106107
location: makeLocation(for: key, includeAvailableKeys: true)
107108
)
108109
}
109-
return value
110+
rawValue = value
110111

111112
case .view(let view, let headerMap):
112113
guard let index = headerMap[key.stringValue], index < view.count else {
@@ -122,8 +123,11 @@ struct CSVKeyedDecodingContainer<Key: CodingKey>: KeyedDecodingContainerProtocol
122123
location: makeLocation(for: key, includeAvailableKeys: true)
123124
)
124125
}
125-
return value
126+
rawValue = value
126127
}
128+
129+
// Apply trimWhitespace configuration
130+
return configuration.trimWhitespace ? rawValue.trimmingCharacters(in: .whitespaces) : rawValue
127131
}
128132

129133
/// Returns the string value for a key, or nil if not present.
@@ -166,7 +170,7 @@ struct CSVKeyedDecodingContainer<Key: CodingKey>: KeyedDecodingContainerProtocol
166170
}
167171

168172
func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool {
169-
let value = try getValue(for: key).lowercased().trimmingCharacters(in: .whitespaces)
173+
let value = try getValue(for: key).lowercased()
170174
let location = makeLocation(for: key)
171175

172176
switch configuration.boolDecodingStrategy {

Sources/CSVCoder/Decoder/CSVSingleValueDecoder.swift

Lines changed: 57 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ struct CSVSingleValueContainer: SingleValueDecodingContainer {
3333
let configuration: CSVDecoder.Configuration
3434
let codingPath: [CodingKey]
3535

36+
/// The value with trimWhitespace applied based on configuration.
37+
private var trimmedValue: String {
38+
configuration.trimWhitespace ? value.trimmingCharacters(in: .whitespaces) : value
39+
}
40+
3641
private var location: CSVLocation {
3742
CSVLocation(codingPath: codingPath)
3843
}
@@ -42,7 +47,7 @@ struct CSVSingleValueContainer: SingleValueDecodingContainer {
4247
}
4348

4449
func decode(_ type: Bool.Type) throws -> Bool {
45-
let lower = value.lowercased().trimmingCharacters(in: .whitespaces)
50+
let lower = trimmedValue.lowercased()
4651

4752
switch configuration.boolDecodingStrategy {
4853
case .standard:
@@ -76,19 +81,19 @@ struct CSVSingleValueContainer: SingleValueDecodingContainer {
7681
}
7782

7883
func decode(_ type: String.Type) throws -> String {
79-
value
84+
trimmedValue
8085
}
8186

8287
func decode(_ type: Double.Type) throws -> Double {
83-
guard let result = parseDouble(value) else {
84-
throw CSVDecodingError.typeMismatch(expected: "Double", actual: value, location: location)
88+
guard let result = parseDouble(trimmedValue) else {
89+
throw CSVDecodingError.typeMismatch(expected: "Double", actual: trimmedValue, location: location)
8590
}
8691
return result
8792
}
8893

8994
func decode(_ type: Float.Type) throws -> Float {
90-
guard let result = parseDouble(value) else {
91-
throw CSVDecodingError.typeMismatch(expected: "Float", actual: value, location: location)
95+
guard let result = parseDouble(trimmedValue) else {
96+
throw CSVDecodingError.typeMismatch(expected: "Float", actual: trimmedValue, location: location)
9297
}
9398
return Float(result)
9499
}
@@ -221,71 +226,71 @@ struct CSVSingleValueContainer: SingleValueDecodingContainer {
221226
}
222227

223228
func decode(_ type: Int.Type) throws -> Int {
224-
guard let result = Int(value) else {
225-
throw CSVDecodingError.typeMismatch(expected: "Int", actual: value, location: location)
229+
guard let result = Int(trimmedValue) else {
230+
throw CSVDecodingError.typeMismatch(expected: "Int", actual: trimmedValue, location: location)
226231
}
227232
return result
228233
}
229234

230235
func decode(_ type: Int8.Type) throws -> Int8 {
231-
guard let result = Int8(value) else {
232-
throw CSVDecodingError.typeMismatch(expected: "Int8", actual: value, location: location)
236+
guard let result = Int8(trimmedValue) else {
237+
throw CSVDecodingError.typeMismatch(expected: "Int8", actual: trimmedValue, location: location)
233238
}
234239
return result
235240
}
236241

237242
func decode(_ type: Int16.Type) throws -> Int16 {
238-
guard let result = Int16(value) else {
239-
throw CSVDecodingError.typeMismatch(expected: "Int16", actual: value, location: location)
243+
guard let result = Int16(trimmedValue) else {
244+
throw CSVDecodingError.typeMismatch(expected: "Int16", actual: trimmedValue, location: location)
240245
}
241246
return result
242247
}
243248

244249
func decode(_ type: Int32.Type) throws -> Int32 {
245-
guard let result = Int32(value) else {
246-
throw CSVDecodingError.typeMismatch(expected: "Int32", actual: value, location: location)
250+
guard let result = Int32(trimmedValue) else {
251+
throw CSVDecodingError.typeMismatch(expected: "Int32", actual: trimmedValue, location: location)
247252
}
248253
return result
249254
}
250255

251256
func decode(_ type: Int64.Type) throws -> Int64 {
252-
guard let result = Int64(value) else {
253-
throw CSVDecodingError.typeMismatch(expected: "Int64", actual: value, location: location)
257+
guard let result = Int64(trimmedValue) else {
258+
throw CSVDecodingError.typeMismatch(expected: "Int64", actual: trimmedValue, location: location)
254259
}
255260
return result
256261
}
257262

258263
func decode(_ type: UInt.Type) throws -> UInt {
259-
guard let result = UInt(value) else {
260-
throw CSVDecodingError.typeMismatch(expected: "UInt", actual: value, location: location)
264+
guard let result = UInt(trimmedValue) else {
265+
throw CSVDecodingError.typeMismatch(expected: "UInt", actual: trimmedValue, location: location)
261266
}
262267
return result
263268
}
264269

265270
func decode(_ type: UInt8.Type) throws -> UInt8 {
266-
guard let result = UInt8(value) else {
267-
throw CSVDecodingError.typeMismatch(expected: "UInt8", actual: value, location: location)
271+
guard let result = UInt8(trimmedValue) else {
272+
throw CSVDecodingError.typeMismatch(expected: "UInt8", actual: trimmedValue, location: location)
268273
}
269274
return result
270275
}
271276

272277
func decode(_ type: UInt16.Type) throws -> UInt16 {
273-
guard let result = UInt16(value) else {
274-
throw CSVDecodingError.typeMismatch(expected: "UInt16", actual: value, location: location)
278+
guard let result = UInt16(trimmedValue) else {
279+
throw CSVDecodingError.typeMismatch(expected: "UInt16", actual: trimmedValue, location: location)
275280
}
276281
return result
277282
}
278283

279284
func decode(_ type: UInt32.Type) throws -> UInt32 {
280-
guard let result = UInt32(value) else {
281-
throw CSVDecodingError.typeMismatch(expected: "UInt32", actual: value, location: location)
285+
guard let result = UInt32(trimmedValue) else {
286+
throw CSVDecodingError.typeMismatch(expected: "UInt32", actual: trimmedValue, location: location)
282287
}
283288
return result
284289
}
285290

286291
func decode(_ type: UInt64.Type) throws -> UInt64 {
287-
guard let result = UInt64(value) else {
288-
throw CSVDecodingError.typeMismatch(expected: "UInt64", actual: value, location: location)
292+
guard let result = UInt64(trimmedValue) else {
293+
throw CSVDecodingError.typeMismatch(expected: "UInt64", actual: trimmedValue, location: location)
289294
}
290295
return result
291296
}
@@ -298,24 +303,24 @@ struct CSVSingleValueContainer: SingleValueDecodingContainer {
298303

299304
// Handle Decimal specially
300305
if type == Decimal.self {
301-
guard let decimal = parseDecimal(value) else {
302-
throw CSVDecodingError.typeMismatch(expected: "Decimal", actual: value, location: location)
306+
guard let decimal = parseDecimal(trimmedValue) else {
307+
throw CSVDecodingError.typeMismatch(expected: "Decimal", actual: trimmedValue, location: location)
303308
}
304309
return decimal as! T
305310
}
306311

307312
// Handle UUID specially
308313
if type == UUID.self {
309-
guard let uuid = UUID(uuidString: value) else {
310-
throw CSVDecodingError.typeMismatch(expected: "UUID", actual: value, location: location)
314+
guard let uuid = UUID(uuidString: trimmedValue) else {
315+
throw CSVDecodingError.typeMismatch(expected: "UUID", actual: trimmedValue, location: location)
311316
}
312317
return uuid as! T
313318
}
314319

315320
// Handle URL specially
316321
if type == URL.self {
317-
guard let url = URL(string: value) else {
318-
throw CSVDecodingError.typeMismatch(expected: "URL", actual: value, location: location)
322+
guard let url = URL(string: trimmedValue) else {
323+
throw CSVDecodingError.typeMismatch(expected: "URL", actual: trimmedValue, location: location)
319324
}
320325
return url as! T
321326
}
@@ -325,26 +330,27 @@ struct CSVSingleValueContainer: SingleValueDecodingContainer {
325330
}
326331

327332
private func decodeDate() throws -> Date {
333+
let dateValue = trimmedValue
328334
switch configuration.dateDecodingStrategy {
329335
case .deferredToDate:
330-
throw CSVDecodingError.typeMismatch(expected: "Date (use a date strategy)", actual: value, location: location)
336+
throw CSVDecodingError.typeMismatch(expected: "Date (use a date strategy)", actual: dateValue, location: location)
331337

332338
case .secondsSince1970:
333-
guard let seconds = Double(value) else {
334-
throw CSVDecodingError.typeMismatch(expected: "Unix timestamp", actual: value, location: location)
339+
guard let seconds = Double(dateValue) else {
340+
throw CSVDecodingError.typeMismatch(expected: "Unix timestamp", actual: dateValue, location: location)
335341
}
336342
return Date(timeIntervalSince1970: seconds)
337343

338344
case .millisecondsSince1970:
339-
guard let milliseconds = Double(value) else {
340-
throw CSVDecodingError.typeMismatch(expected: "Unix timestamp (ms)", actual: value, location: location)
345+
guard let milliseconds = Double(dateValue) else {
346+
throw CSVDecodingError.typeMismatch(expected: "Unix timestamp (ms)", actual: dateValue, location: location)
341347
}
342348
return Date(timeIntervalSince1970: milliseconds / 1000)
343349

344350
case .iso8601:
345351
let formatter = ISO8601DateFormatter()
346-
guard let date = formatter.date(from: value) else {
347-
throw CSVDecodingError.typeMismatch(expected: "ISO8601 date", actual: value, location: location)
352+
guard let date = formatter.date(from: dateValue) else {
353+
throw CSVDecodingError.typeMismatch(expected: "ISO8601 date", actual: dateValue, location: location)
348354
}
349355
return date
350356

@@ -353,40 +359,40 @@ struct CSVSingleValueContainer: SingleValueDecodingContainer {
353359
formatter.dateFormat = format
354360
formatter.locale = Locale.autoupdatingCurrent
355361
formatter.timeZone = TimeZone.autoupdatingCurrent
356-
guard let date = formatter.date(from: value) else {
357-
throw CSVDecodingError.typeMismatch(expected: "Date with format \(format)", actual: value, location: location)
362+
guard let date = formatter.date(from: dateValue) else {
363+
throw CSVDecodingError.typeMismatch(expected: "Date with format \(format)", actual: dateValue, location: location)
358364
}
359365
return date
360366

361367
case .custom(let closure):
362-
return try closure(value)
368+
return try closure(dateValue)
363369

364370
case .flexible:
365-
guard let date = parseFlexibleDate(value, hint: nil) else {
366-
throw CSVDecodingError.typeMismatch(expected: "Date (no matching format found)", actual: value, location: location)
371+
guard let date = parseFlexibleDate(dateValue, hint: nil) else {
372+
throw CSVDecodingError.typeMismatch(expected: "Date (no matching format found)", actual: dateValue, location: location)
367373
}
368374
return date
369375

370376
case .flexibleWithHint(let preferred):
371-
guard let date = parseFlexibleDate(value, hint: preferred) else {
372-
throw CSVDecodingError.typeMismatch(expected: "Date (no matching format found)", actual: value, location: location)
377+
guard let date = parseFlexibleDate(dateValue, hint: preferred) else {
378+
throw CSVDecodingError.typeMismatch(expected: "Date (no matching format found)", actual: dateValue, location: location)
373379
}
374380
return date
375381

376382
case .localeAware(let locale, let style):
377383
if #available(iOS 15.0, macOS 12.0, tvOS 15.0, watchOS 8.0, *) {
378-
if let date = LocaleUtilities.parseDate(value, locale: locale, style: style) {
384+
if let date = LocaleUtilities.parseDate(dateValue, locale: locale, style: style) {
379385
return date
380386
}
381387
// Fall back to flexible parsing if locale-aware fails
382-
if let date = parseFlexibleDate(value, hint: nil) {
388+
if let date = parseFlexibleDate(dateValue, hint: nil) {
383389
return date
384390
}
385-
throw CSVDecodingError.typeMismatch(expected: "Date (locale-aware)", actual: value, location: location)
391+
throw CSVDecodingError.typeMismatch(expected: "Date (locale-aware)", actual: dateValue, location: location)
386392
} else {
387393
// Pre-iOS 15: use flexible parsing
388-
guard let date = parseFlexibleDate(value, hint: nil) else {
389-
throw CSVDecodingError.typeMismatch(expected: "Date", actual: value, location: location)
394+
guard let date = parseFlexibleDate(dateValue, hint: nil) else {
395+
throw CSVDecodingError.typeMismatch(expected: "Date", actual: dateValue, location: location)
390396
}
391397
return date
392398
}

0 commit comments

Comments
 (0)