Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions csv/_io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,16 @@ export function convertRowToObject(
row: readonly string[],
headers: readonly string[],
zeroBasedLine: number,
allowVariableLength: boolean = false,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Two call sites compute this flag independently — parse.ts does fieldsPerRecord === undefined || fieldsPerRecord < 0, the stream does #fieldsPerRecord === "ANY". Both are correct today, but they're an invariant pair (the stream's normalization step defines what ANY means). Consider exporting a small isVariableLength(fieldsPerRecord: number | undefined): boolean helper from _io.ts and using it on both sides, so if the rules ever change (e.g. someone wants fieldsPerRecord: null to also mean variable-length) there's a single place to touch. Not blocking.

) {
if (row.length !== headers.length) {
if (!allowVariableLength && row.length !== headers.length) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth a one-line comment that this check still earns its keep in strict mode: the outer parser already enforces record.length === fieldsPerRecord, but columns.length can differ from fieldsPerRecord (e.g. the fieldsPerRecord: 2, columns: ["foo"] test case), so this is the only place that catches that misuse. Otherwise a future cleanup might delete it as redundant.

throw new Error(
`Syntax error on line ${
zeroBasedLine + 1
}: The record has ${row.length} fields, but the header has ${headers.length} fields`,
);
}
const out: Record<string, unknown> = {};
const out: Record<string, string | undefined> = {};
for (const [index, header] of headers.entries()) {
out[header] = row[index];
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion : maybe add if (index === row.length) break; , so no field is assigned undefined, and you can avoid adding undefined to union type everywhere
see #7153

}
Expand All @@ -249,23 +250,29 @@ export type ParseResult<ParseOptions, T> =
T extends ParseOptions & { columns: readonly (infer C extends string)[] }
? RecordWithColumn<C>[]
// If `skipFirstRow` option is specified, the return type is Record type.
: T extends ParseOptions & { skipFirstRow: true } ? Record<string, string>[]
: T extends ParseOptions & { skipFirstRow: true }
? Record<string, string | undefined>[]
// If `columns` and `skipFirstRow` option is _not_ specified, the return type is string[][].
: T extends
ParseOptions & { columns?: undefined; skipFirstRow?: false | undefined }
? string[][]
// else, the return type is Record type or string[][].
: Record<string, string>[] | string[][];
: Record<string, string | undefined>[] | string[][];

/**
* Record type with column type.
*
* Values are typed as `string | undefined` because variable-length records
* (the default mode when `fieldsPerRecord` is undefined or negative) may
* yield rows that are shorter than the header list. Missing fields surface
* as `undefined`.
*
* @example
* ```
* type RecordWithColumn<"aaa"|"bbb"> => Record<"aaa"|"bbb", string>
* type RecordWithColumn<"aaa"|"bbb"> => Record<"aaa"|"bbb", string | undefined>
* type RecordWithColumn<string> => Record<string, string | undefined>
* ```
*/
export type RecordWithColumn<C extends string> = string extends C
? Record<string, string>
: Record<C, string>;
? Record<string, string | undefined>
: Record<C, string | undefined>;
34 changes: 29 additions & 5 deletions csv/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ export function parse(input: string): string[][];
* const result = parse(string, { skipFirstRow: true });
*
* assertEquals(result, [{ a: "d", b: "e", c: "f" }]);
* assertType<IsExact<typeof result, Record<string, string>[]>>(true);
* assertType<IsExact<typeof result, Record<string, string | undefined>[]>>(true);
* ```
*
* @example Specify columns with `columns` option
Expand All @@ -401,7 +401,7 @@ export function parse(input: string): string[][];
* const result = parse(string, { columns: ["x", "y", "z"] });
*
* assertEquals(result, [{ x: "a", y: "b", z: "c" }, { x: "d", y: "e", z: "f" }]);
* assertType<IsExact<typeof result, Record<"x" | "y" | "z", string>[]>>(true);
* assertType<IsExact<typeof result, Record<"x" | "y" | "z", string | undefined>[]>>(true);
* ```
*
* @example Specify columns with `columns` option and skip first row with
Expand All @@ -415,7 +415,7 @@ export function parse(input: string): string[][];
* const result = parse(string, { columns: ["x", "y", "z"], skipFirstRow: true });
*
* assertEquals(result, [{ x: "d", y: "e", z: "f" }]);
* assertType<IsExact<typeof result, Record<"x" | "y" | "z", string>[]>>(true);
* assertType<IsExact<typeof result, Record<"x" | "y" | "z", string | undefined>[]>>(true);
* ```
*
* @example TSV (tab-separated values) with `separator: "\t"`
Expand Down Expand Up @@ -489,12 +489,29 @@ export function parse(input: string): string[][];
* );
* ```
*
* @example Variable-length records with `skipFirstRow` or `columns`
* ```ts
* import { parse } from "@std/csv/parse";
* import { assertEquals } from "@std/assert/equals";
*
* const string = "name,age\nAlice,34\nBob\n";
* const result = parse(string, { skipFirstRow: true });
*
Copy link
Copy Markdown

@PengjuXu PengjuXu May 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion : add also an example when rows have more fields?
a \n b,c => {a:"b"} // extra "c" got dropped

* assertEquals(result, [
* { name: "Alice", age: "34" },
* { name: "Bob", age: undefined },
* ]);
* ```
*
* @typeParam T The options' type for parsing.
* @param input The input to parse.
* @param options The options for parsing.
* @returns If you don't provide `options.skipFirstRow` or `options.columns`, it
* returns `string[][]`. If you provide `options.skipFirstRow` or
* `options.columns`, it returns `Record<string, string>[]`.
* `options.columns`, it returns `Record<string, string | undefined>[]`. Values
* are typed as `string | undefined` to reflect that variable-length records
* (the default when `fieldsPerRecord` is undefined or negative) may produce
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice new example. Suggest also documenting the extra-fields shape here (fields beyond headers.length are silently dropped) — it's tested but not shown in user-facing docs, and "missing fields become undefined" naturally raises the question "what about extra ones?"

* rows shorter than the header list. Missing fields surface as `undefined`.
*/
export function parse<const T extends ParseOptions>(
input: string,
Expand Down Expand Up @@ -523,8 +540,15 @@ export function parse<const T extends ParseOptions>(
}

const zeroBasedFirstLineIndex = options.skipFirstRow ? 1 : 0;
const allowVariableLength = options.fieldsPerRecord === undefined ||
options.fieldsPerRecord < 0;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this intentionally excludes fieldsPerRecord: 0 (the "infer from first row, then enforce" mode) — which is correct, since once inferred it should behave strictly. Worth a brief inline comment to that effect; otherwise a reader who only knows the -1 semantics might think 0 was overlooked.

return r.map((row, i) => {
return convertRowToObject(row, headers, zeroBasedFirstLineIndex + i);
return convertRowToObject(
row,
headers,
zeroBasedFirstLineIndex + i,
allowVariableLength,
);
}) as ParseResult<ParseOptions, T>;
}
return r as ParseResult<ParseOptions, T>;
Expand Down
28 changes: 25 additions & 3 deletions csv/parse_stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ export type RowType<T> = T extends undefined ? string[]
* { name: "Alice", age: "34" },
* { name: "Bob", age: "24" },
* ]);
* assertType<IsExact<typeof result, Record<string, string>[]>>(true);
* assertType<IsExact<typeof result, Record<string, string | undefined>[]>>(true);
* ```
*
* @example Specify columns with `columns` option
Expand All @@ -169,7 +169,7 @@ export type RowType<T> = T extends undefined ? string[]
* { name: "Alice", age: "34" },
* { name: "Bob", age: "24" },
* ]);
* assertType<IsExact<typeof result, Record<"name" | "age", string>[]>>(true);
* assertType<IsExact<typeof result, Record<"name" | "age", string | undefined>[]>>(true);
* ```
*
* @example Specify columns with `columns` option and skip first row with
Expand All @@ -190,7 +190,7 @@ export type RowType<T> = T extends undefined ? string[]
* const result = await Array.fromAsync(stream);
*
* assertEquals(result, [{ name: "Bob", age: "24" }]);
* assertType<IsExact<typeof result, Record<"name" | "age", string>[]>>(true);
* assertType<IsExact<typeof result, Record<"name" | "age", string | undefined>[]>>(true);
* ```
*
* @example TSV (tab-separated values) with `separator: "\t"`
Expand Down Expand Up @@ -336,6 +336,27 @@ export type RowType<T> = T extends undefined ? string[]
* );
* ```
*
* @example Variable-length records with `skipFirstRow` or `columns`
* ```ts
* import { CsvParseStream } from "@std/csv/parse-stream";
* import { assertEquals } from "@std/assert/equals";
*
* const source = ReadableStream.from([
* "name,age\n",
* "Alice,34\n",
* "Bob\n",
* ]);
* const stream = source.pipeThrough(
* new CsvParseStream({ skipFirstRow: true }),
* );
* const result = await Array.fromAsync(stream);
*
* assertEquals(result, [
* { name: "Alice", age: "34" },
* { name: "Bob", age: undefined },
* ]);
* ```
*
* @typeParam T The type of options for the stream.
*/
export class CsvParseStream<
Expand Down Expand Up @@ -461,6 +482,7 @@ export class CsvParseStream<
record,
this.#headers,
this.#zeroBasedLineIndex,
this.#fieldsPerRecord === "ANY",
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor readability: lifting this to a named local would mirror the shape of the sync parse() call site and make the intent obvious without having to recall what "ANY" means in the discriminated union.

const allowVariableLength = this.#fieldsPerRecord === "ANY";
controller.enqueue(convertRowToObject(
  record,
  this.#headers,
  this.#zeroBasedLineIndex,
  allowVariableLength,
));

));
} else {
controller.enqueue(record);
Expand Down
77 changes: 57 additions & 20 deletions csv/parse_stream_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -347,26 +347,54 @@ x,,,
columns: ["foo", "bar", "baz"],
},
{
name: "mismatching number of headers and fields 1",
name:
"variable-length records: short row yields undefined for missing fields (skipFirstRow + columns)",
input: "a,b,c\nd,e",
output: [{ foo: "d", bar: "e", baz: undefined }],
skipFirstRow: true,
columns: ["foo", "bar", "baz"],
error: {
klass: Error,
msg:
"Syntax error on line 2: The record has 2 fields, but the header has 3 fields",
},
},
{
name: "mismatching number of headers and fields 2",
name:
"variable-length records: extra fields are ignored (skipFirstRow + columns)",
input: "a,b,c\nd,e,,g",
output: [{ foo: "d", bar: "e", baz: "" }],
skipFirstRow: true,
columns: ["foo", "bar", "baz"],
error: {
klass: Error,
msg:
"Syntax error on line 2: The record has 4 fields, but the header has 3 fields",
},
},
{
name:
"fieldsPerRecord: -1 with skipFirstRow: true tolerates short rows (issue #6434)",
input: "name,age\nAlice,34\nBob\n",
output: [
{ name: "Alice", age: "34" },
{ name: "Bob", age: undefined },
],
fieldsPerRecord: -1,
skipFirstRow: true,
},
{
name:
"fieldsPerRecord: -1 with columns tolerates short rows (issue #6434)",
input: "Alice,34\nBob\n",
output: [
{ name: "Alice", age: "34" },
{ name: "Bob", age: undefined },
],
fieldsPerRecord: -1,
columns: ["name", "age"],
},
{
name:
"fieldsPerRecord: -1 with skipFirstRow and columns tolerates short rows (issue #6434)",
input: "header1,header2\nAlice,34\nBob\n",
output: [
{ name: "Alice", age: "34" },
{ name: "Bob", age: undefined },
],
fieldsPerRecord: -1,
skipFirstRow: true,
columns: ["name", "age"],
},
{
name: "bad quote in bare field",
Expand Down Expand Up @@ -491,13 +519,13 @@ Deno.test({
// `skipFirstRow` may be `true` or `false`.
// `columns` may be `undefined` or `string[]`.
// If you don't know exactly what the value of the option is,
// the return type is ReadableStream<string[] | Record<string, string>>
// the return type is ReadableStream<string[] | Record<string, string | undefined>>
const options: CsvParseStreamOptions = {};
const { readable } = new CsvParseStream(options);
type _ = AssertTrue<
IsExact<
typeof readable,
ReadableStream<string[] | Record<string, string>>
ReadableStream<string[] | Record<string, string | undefined>>
>
>;
}
Expand All @@ -520,7 +548,7 @@ Deno.test({
type _ = AssertTrue<
IsExact<
typeof readable,
ReadableStream<Record<string, string>>
ReadableStream<Record<string, string | undefined>>
>
>;
}
Expand All @@ -533,15 +561,18 @@ Deno.test({
{
const { readable } = new CsvParseStream({ columns: ["aaa", "bbb"] });
type _ = AssertTrue<
IsExact<typeof readable, ReadableStream<Record<"aaa" | "bbb", string>>>
IsExact<
typeof readable,
ReadableStream<Record<"aaa" | "bbb", string | undefined>>
>
>;
}
{
const { readable } = new CsvParseStream({ columns: ["aaa"] as string[] });
type _ = AssertTrue<
IsExact<
typeof readable,
ReadableStream<Record<string, string>>
ReadableStream<Record<string, string | undefined>>
>
>;
}
Expand All @@ -556,7 +587,7 @@ Deno.test({
type _ = AssertTrue<
IsExact<
typeof readable,
ReadableStream<Record<string, string>>
ReadableStream<Record<string, string | undefined>>
>
>;
}
Expand All @@ -566,7 +597,10 @@ Deno.test({
columns: ["aaa"],
});
type _ = AssertTrue<
IsExact<typeof readable, ReadableStream<Record<"aaa", string>>>
IsExact<
typeof readable,
ReadableStream<Record<"aaa", string | undefined>>
>
>;
}
{
Expand All @@ -575,7 +609,10 @@ Deno.test({
columns: ["aaa"],
});
type _ = AssertTrue<
IsExact<typeof readable, ReadableStream<Record<"aaa", string>>>
IsExact<
typeof readable,
ReadableStream<Record<"aaa", string | undefined>>
>
>;
}
},
Expand Down
Loading
Loading