From 1b7fface3a1cbea75414c7e8ae92ccd5e153b84e Mon Sep 17 00:00:00 2001 From: fibibot Date: Thu, 14 May 2026 23:46:11 +0000 Subject: [PATCH] BREAKING(csv): honour `fieldsPerRecord` in variable-length mode for `parse()` and `CsvParseStream` When `skipFirstRow` or `columns` is set together with `fieldsPerRecord: -1` (or `fieldsPerRecord` left undefined, which is the same variable-length mode), `convertRowToObject` previously threw "The record has X fields, but the header has Y fields" unconditionally on length mismatch, ignoring the `fieldsPerRecord` setting. It now respects the mode: when variable-length records are permitted, short rows yield `undefined` for missing header keys and extra fields beyond the header list are dropped. Strict modes (`fieldsPerRecord >= 0`) keep the existing length check. The mapped-row value type widens from `Record` to `Record` (and the same shift in `RecordWithColumn`) so the static type reflects the runtime behaviour. This is a TS-level breaking change for callers reading values cookie-style without `noUncheckedIndexedAccess`. Fixes denoland/std#6434. --- csv/_io.ts | 21 +++++--- csv/parse.ts | 34 +++++++++++-- csv/parse_stream.ts | 28 +++++++++-- csv/parse_stream_test.ts | 77 ++++++++++++++++++++-------- csv/parse_test.ts | 105 +++++++++++++++++++++++++++++++-------- 5 files changed, 209 insertions(+), 56 deletions(-) diff --git a/csv/_io.ts b/csv/_io.ts index 5e80959c55b0..03ead77412b6 100644 --- a/csv/_io.ts +++ b/csv/_io.ts @@ -228,15 +228,16 @@ export function convertRowToObject( row: readonly string[], headers: readonly string[], zeroBasedLine: number, + allowVariableLength: boolean = false, ) { - if (row.length !== headers.length) { + if (!allowVariableLength && row.length !== headers.length) { throw new Error( `Syntax error on line ${ zeroBasedLine + 1 }: The record has ${row.length} fields, but the header has ${headers.length} fields`, ); } - const out: Record = {}; + const out: Record = {}; for (const [index, header] of headers.entries()) { out[header] = row[index]; } @@ -249,23 +250,29 @@ export type ParseResult = T extends ParseOptions & { columns: readonly (infer C extends string)[] } ? RecordWithColumn[] // If `skipFirstRow` option is specified, the return type is Record type. - : T extends ParseOptions & { skipFirstRow: true } ? Record[] + : T extends ParseOptions & { skipFirstRow: true } + ? Record[] // If `columns` and `skipFirstRow` option is _not_ specified, the return type is string[][]. : T extends ParseOptions & { columns?: undefined; skipFirstRow?: false | undefined } ? string[][] // else, the return type is Record type or string[][]. - : Record[] | string[][]; + : Record[] | string[][]; /** * Record type with column type. * + * Values are typed as `string | undefined` because variable-length records + * (the default mode when `fieldsPerRecord` is undefined or negative) may + * yield rows that are shorter than the header list. Missing fields surface + * as `undefined`. + * * @example * ``` - * type RecordWithColumn<"aaa"|"bbb"> => Record<"aaa"|"bbb", string> + * type RecordWithColumn<"aaa"|"bbb"> => Record<"aaa"|"bbb", string | undefined> * type RecordWithColumn => Record * ``` */ export type RecordWithColumn = string extends C - ? Record - : Record; + ? Record + : Record; diff --git a/csv/parse.ts b/csv/parse.ts index 2ad28afd411e..a60fa0e0a664 100644 --- a/csv/parse.ts +++ b/csv/parse.ts @@ -388,7 +388,7 @@ export function parse(input: string): string[][]; * const result = parse(string, { skipFirstRow: true }); * * assertEquals(result, [{ a: "d", b: "e", c: "f" }]); - * assertType[]>>(true); + * assertType[]>>(true); * ``` * * @example Specify columns with `columns` option @@ -401,7 +401,7 @@ export function parse(input: string): string[][]; * const result = parse(string, { columns: ["x", "y", "z"] }); * * assertEquals(result, [{ x: "a", y: "b", z: "c" }, { x: "d", y: "e", z: "f" }]); - * assertType[]>>(true); + * assertType[]>>(true); * ``` * * @example Specify columns with `columns` option and skip first row with @@ -415,7 +415,7 @@ export function parse(input: string): string[][]; * const result = parse(string, { columns: ["x", "y", "z"], skipFirstRow: true }); * * assertEquals(result, [{ x: "d", y: "e", z: "f" }]); - * assertType[]>>(true); + * assertType[]>>(true); * ``` * * @example TSV (tab-separated values) with `separator: "\t"` @@ -489,12 +489,29 @@ export function parse(input: string): string[][]; * ); * ``` * + * @example Variable-length records with `skipFirstRow` or `columns` + * ```ts + * import { parse } from "@std/csv/parse"; + * import { assertEquals } from "@std/assert/equals"; + * + * const string = "name,age\nAlice,34\nBob\n"; + * const result = parse(string, { skipFirstRow: true }); + * + * assertEquals(result, [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: undefined }, + * ]); + * ``` + * * @typeParam T The options' type for parsing. * @param input The input to parse. * @param options The options for parsing. * @returns If you don't provide `options.skipFirstRow` or `options.columns`, it * returns `string[][]`. If you provide `options.skipFirstRow` or - * `options.columns`, it returns `Record[]`. + * `options.columns`, it returns `Record[]`. Values + * are typed as `string | undefined` to reflect that variable-length records + * (the default when `fieldsPerRecord` is undefined or negative) may produce + * rows shorter than the header list. Missing fields surface as `undefined`. */ export function parse( input: string, @@ -523,8 +540,15 @@ export function parse( } const zeroBasedFirstLineIndex = options.skipFirstRow ? 1 : 0; + const allowVariableLength = options.fieldsPerRecord === undefined || + options.fieldsPerRecord < 0; return r.map((row, i) => { - return convertRowToObject(row, headers, zeroBasedFirstLineIndex + i); + return convertRowToObject( + row, + headers, + zeroBasedFirstLineIndex + i, + allowVariableLength, + ); }) as ParseResult; } return r as ParseResult; diff --git a/csv/parse_stream.ts b/csv/parse_stream.ts index b94b9d9e0687..db5018a568fc 100644 --- a/csv/parse_stream.ts +++ b/csv/parse_stream.ts @@ -147,7 +147,7 @@ export type RowType = T extends undefined ? string[] * { name: "Alice", age: "34" }, * { name: "Bob", age: "24" }, * ]); - * assertType[]>>(true); + * assertType[]>>(true); * ``` * * @example Specify columns with `columns` option @@ -169,7 +169,7 @@ export type RowType = T extends undefined ? string[] * { name: "Alice", age: "34" }, * { name: "Bob", age: "24" }, * ]); - * assertType[]>>(true); + * assertType[]>>(true); * ``` * * @example Specify columns with `columns` option and skip first row with @@ -190,7 +190,7 @@ export type RowType = T extends undefined ? string[] * const result = await Array.fromAsync(stream); * * assertEquals(result, [{ name: "Bob", age: "24" }]); - * assertType[]>>(true); + * assertType[]>>(true); * ``` * * @example TSV (tab-separated values) with `separator: "\t"` @@ -336,6 +336,27 @@ export type RowType = T extends undefined ? string[] * ); * ``` * + * @example Variable-length records with `skipFirstRow` or `columns` + * ```ts + * import { CsvParseStream } from "@std/csv/parse-stream"; + * import { assertEquals } from "@std/assert/equals"; + * + * const source = ReadableStream.from([ + * "name,age\n", + * "Alice,34\n", + * "Bob\n", + * ]); + * const stream = source.pipeThrough( + * new CsvParseStream({ skipFirstRow: true }), + * ); + * const result = await Array.fromAsync(stream); + * + * assertEquals(result, [ + * { name: "Alice", age: "34" }, + * { name: "Bob", age: undefined }, + * ]); + * ``` + * * @typeParam T The type of options for the stream. */ export class CsvParseStream< @@ -461,6 +482,7 @@ export class CsvParseStream< record, this.#headers, this.#zeroBasedLineIndex, + this.#fieldsPerRecord === "ANY", )); } else { controller.enqueue(record); diff --git a/csv/parse_stream_test.ts b/csv/parse_stream_test.ts index 7eb95feff5d0..cb930faac4f3 100644 --- a/csv/parse_stream_test.ts +++ b/csv/parse_stream_test.ts @@ -347,26 +347,54 @@ x,,, columns: ["foo", "bar", "baz"], }, { - name: "mismatching number of headers and fields 1", + name: + "variable-length records: short row yields undefined for missing fields (skipFirstRow + columns)", input: "a,b,c\nd,e", + output: [{ foo: "d", bar: "e", baz: undefined }], skipFirstRow: true, columns: ["foo", "bar", "baz"], - error: { - klass: Error, - msg: - "Syntax error on line 2: The record has 2 fields, but the header has 3 fields", - }, }, { - name: "mismatching number of headers and fields 2", + name: + "variable-length records: extra fields are ignored (skipFirstRow + columns)", input: "a,b,c\nd,e,,g", + output: [{ foo: "d", bar: "e", baz: "" }], skipFirstRow: true, columns: ["foo", "bar", "baz"], - error: { - klass: Error, - msg: - "Syntax error on line 2: The record has 4 fields, but the header has 3 fields", - }, + }, + { + name: + "fieldsPerRecord: -1 with skipFirstRow: true tolerates short rows (issue #6434)", + input: "name,age\nAlice,34\nBob\n", + output: [ + { name: "Alice", age: "34" }, + { name: "Bob", age: undefined }, + ], + fieldsPerRecord: -1, + skipFirstRow: true, + }, + { + name: + "fieldsPerRecord: -1 with columns tolerates short rows (issue #6434)", + input: "Alice,34\nBob\n", + output: [ + { name: "Alice", age: "34" }, + { name: "Bob", age: undefined }, + ], + fieldsPerRecord: -1, + columns: ["name", "age"], + }, + { + name: + "fieldsPerRecord: -1 with skipFirstRow and columns tolerates short rows (issue #6434)", + input: "header1,header2\nAlice,34\nBob\n", + output: [ + { name: "Alice", age: "34" }, + { name: "Bob", age: undefined }, + ], + fieldsPerRecord: -1, + skipFirstRow: true, + columns: ["name", "age"], }, { name: "bad quote in bare field", @@ -491,13 +519,13 @@ Deno.test({ // `skipFirstRow` may be `true` or `false`. // `columns` may be `undefined` or `string[]`. // If you don't know exactly what the value of the option is, - // the return type is ReadableStream> + // the return type is ReadableStream> const options: CsvParseStreamOptions = {}; const { readable } = new CsvParseStream(options); type _ = AssertTrue< IsExact< typeof readable, - ReadableStream> + ReadableStream> > >; } @@ -520,7 +548,7 @@ Deno.test({ type _ = AssertTrue< IsExact< typeof readable, - ReadableStream> + ReadableStream> > >; } @@ -533,7 +561,10 @@ Deno.test({ { const { readable } = new CsvParseStream({ columns: ["aaa", "bbb"] }); type _ = AssertTrue< - IsExact>> + IsExact< + typeof readable, + ReadableStream> + > >; } { @@ -541,7 +572,7 @@ Deno.test({ type _ = AssertTrue< IsExact< typeof readable, - ReadableStream> + ReadableStream> > >; } @@ -556,7 +587,7 @@ Deno.test({ type _ = AssertTrue< IsExact< typeof readable, - ReadableStream> + ReadableStream> > >; } @@ -566,7 +597,10 @@ Deno.test({ columns: ["aaa"], }); type _ = AssertTrue< - IsExact>> + IsExact< + typeof readable, + ReadableStream> + > >; } { @@ -575,7 +609,10 @@ Deno.test({ columns: ["aaa"], }); type _ = AssertTrue< - IsExact>> + IsExact< + typeof readable, + ReadableStream> + > >; } }, diff --git a/csv/parse_test.ts b/csv/parse_test.ts index 1912758b8100..179d3f67b020 100644 --- a/csv/parse_test.ts +++ b/csv/parse_test.ts @@ -831,32 +831,95 @@ c"d,e`; }, }); await t.step({ - name: "mismatching number of headers and fields 1", + name: + "variable-length records: short row yields undefined for missing fields (skipFirstRow + columns)", fn() { const input = "a,b,c\nd,e"; - assertThrows( - () => - parse(input, { - skipFirstRow: true, - columns: ["foo", "bar", "baz"], - }), - Error, - "Syntax error on line 2: The record has 2 fields, but the header has 3 fields", + assertEquals( + parse(input, { + skipFirstRow: true, + columns: ["foo", "bar", "baz"], + }), + [{ foo: "d", bar: "e", baz: undefined }], ); }, }); await t.step({ - name: "mismatching number of headers and fields 2", + name: + "variable-length records: extra fields are ignored (skipFirstRow + columns)", fn() { const input = "a,b,c\nd,e,,g"; + assertEquals( + parse(input, { + skipFirstRow: true, + columns: ["foo", "bar", "baz"], + }), + [{ foo: "d", bar: "e", baz: "" }], + ); + }, + }); + await t.step({ + name: + "mismatching number of headers and fields throws when fieldsPerRecord enforces strict mode", + fn() { + const input = "a,b\nc,d"; assertThrows( () => parse(input, { - skipFirstRow: true, - columns: ["foo", "bar", "baz"], + columns: ["foo"], + fieldsPerRecord: 2, }), Error, - "Syntax error on line 2: The record has 4 fields, but the header has 3 fields", + "Syntax error on line 1: The record has 2 fields, but the header has 1 fields", + ); + }, + }); + await t.step({ + name: + "fieldsPerRecord: -1 with skipFirstRow: true tolerates short rows (issue #6434)", + fn() { + const input = "name,age\nAlice,34\nBob\n"; + assertEquals( + parse(input, { fieldsPerRecord: -1, skipFirstRow: true }), + [ + { name: "Alice", age: "34" }, + { name: "Bob", age: undefined }, + ], + ); + }, + }); + await t.step({ + name: + "fieldsPerRecord: -1 with columns tolerates short rows (issue #6434)", + fn() { + const input = "Alice,34\nBob\n"; + assertEquals( + parse(input, { + fieldsPerRecord: -1, + columns: ["name", "age"], + }), + [ + { name: "Alice", age: "34" }, + { name: "Bob", age: undefined }, + ], + ); + }, + }); + await t.step({ + name: + "fieldsPerRecord: -1 with skipFirstRow and columns tolerates short rows (issue #6434)", + fn() { + const input = "header1,header2\nAlice,34\nBob\n"; + assertEquals( + parse(input, { + fieldsPerRecord: -1, + skipFirstRow: true, + columns: ["name", "age"], + }), + [ + { name: "Alice", age: "34" }, + { name: "Bob", age: undefined }, + ], ); }, }); @@ -943,13 +1006,13 @@ Deno.test({ // `skipFirstRow` may be `true` or `false`. // `columns` may be `undefined` or `string[]`. // If you don't know exactly what the value of the option is, - // the return type is string[][] | Record[] + // the return type is string[][] | Record[] const options: ParseOptions = {}; const parsed = parse("a\nb", options); type _ = AssertTrue< IsExact< typeof parsed, - string[][] | Record[] + string[][] | Record[] > >; } @@ -970,7 +1033,7 @@ Deno.test({ { const parsed = parse("a\nb", { skipFirstRow: true }); type _ = AssertTrue< - IsExact[]> + IsExact[]> >; } @@ -982,13 +1045,13 @@ Deno.test({ { const parsed = parse("a,b\nc,d", { columns: ["aaa", "bbb"] }); type _ = AssertTrue< - IsExact[]> + IsExact[]> >; } { const parsed = parse("a\nb", { columns: ["aaa"] as string[] }); type _ = AssertTrue< - IsExact[]> + IsExact[]> >; } @@ -1006,19 +1069,19 @@ Deno.test({ { skipFirstRow: true }, ); type _ = AssertTrue< - IsExact[]> + IsExact[]> >; } { const parsed = parse("a\nb", { skipFirstRow: false, columns: ["aaa"] }); type _ = AssertTrue< - IsExact[]> + IsExact[]> >; } { const parsed = parse("a\nb", { skipFirstRow: true, columns: ["aaa"] }); type _ = AssertTrue< - IsExact[]> + IsExact[]> >; } },