-
Notifications
You must be signed in to change notification settings - Fork 675
BREAKING(csv): honour fieldsPerRecord when mapping rows to objects
#7141
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -228,15 +228,16 @@ export function convertRowToObject( | |
| row: readonly string[], | ||
| headers: readonly string[], | ||
| zeroBasedLine: number, | ||
| allowVariableLength: boolean = false, | ||
| ) { | ||
| if (row.length !== headers.length) { | ||
| if (!allowVariableLength && row.length !== headers.length) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth a one-line comment that this check still earns its keep in strict mode: the outer parser already enforces |
||
| throw new Error( | ||
| `Syntax error on line ${ | ||
| zeroBasedLine + 1 | ||
| }: The record has ${row.length} fields, but the header has ${headers.length} fields`, | ||
| ); | ||
| } | ||
| const out: Record<string, unknown> = {}; | ||
| const out: Record<string, string | undefined> = {}; | ||
| for (const [index, header] of headers.entries()) { | ||
| out[header] = row[index]; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion : maybe add |
||
| } | ||
|
|
@@ -249,23 +250,29 @@ export type ParseResult<ParseOptions, T> = | |
| T extends ParseOptions & { columns: readonly (infer C extends string)[] } | ||
| ? RecordWithColumn<C>[] | ||
| // If `skipFirstRow` option is specified, the return type is Record type. | ||
| : T extends ParseOptions & { skipFirstRow: true } ? Record<string, string>[] | ||
| : T extends ParseOptions & { skipFirstRow: true } | ||
| ? Record<string, string | undefined>[] | ||
| // If `columns` and `skipFirstRow` option is _not_ specified, the return type is string[][]. | ||
| : T extends | ||
| ParseOptions & { columns?: undefined; skipFirstRow?: false | undefined } | ||
| ? string[][] | ||
| // else, the return type is Record type or string[][]. | ||
| : Record<string, string>[] | string[][]; | ||
| : Record<string, string | undefined>[] | string[][]; | ||
|
|
||
| /** | ||
| * Record type with column type. | ||
| * | ||
| * Values are typed as `string | undefined` because variable-length records | ||
| * (the default mode when `fieldsPerRecord` is undefined or negative) may | ||
| * yield rows that are shorter than the header list. Missing fields surface | ||
| * as `undefined`. | ||
| * | ||
| * @example | ||
| * ``` | ||
| * type RecordWithColumn<"aaa"|"bbb"> => Record<"aaa"|"bbb", string> | ||
| * type RecordWithColumn<"aaa"|"bbb"> => Record<"aaa"|"bbb", string | undefined> | ||
| * type RecordWithColumn<string> => Record<string, string | undefined> | ||
| * ``` | ||
| */ | ||
| export type RecordWithColumn<C extends string> = string extends C | ||
| ? Record<string, string> | ||
| : Record<C, string>; | ||
| ? Record<string, string | undefined> | ||
| : Record<C, string | undefined>; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -388,7 +388,7 @@ export function parse(input: string): string[][]; | |
| * const result = parse(string, { skipFirstRow: true }); | ||
| * | ||
| * assertEquals(result, [{ a: "d", b: "e", c: "f" }]); | ||
| * assertType<IsExact<typeof result, Record<string, string>[]>>(true); | ||
| * assertType<IsExact<typeof result, Record<string, string | undefined>[]>>(true); | ||
| * ``` | ||
| * | ||
| * @example Specify columns with `columns` option | ||
|
|
@@ -401,7 +401,7 @@ export function parse(input: string): string[][]; | |
| * const result = parse(string, { columns: ["x", "y", "z"] }); | ||
| * | ||
| * assertEquals(result, [{ x: "a", y: "b", z: "c" }, { x: "d", y: "e", z: "f" }]); | ||
| * assertType<IsExact<typeof result, Record<"x" | "y" | "z", string>[]>>(true); | ||
| * assertType<IsExact<typeof result, Record<"x" | "y" | "z", string | undefined>[]>>(true); | ||
| * ``` | ||
| * | ||
| * @example Specify columns with `columns` option and skip first row with | ||
|
|
@@ -415,7 +415,7 @@ export function parse(input: string): string[][]; | |
| * const result = parse(string, { columns: ["x", "y", "z"], skipFirstRow: true }); | ||
| * | ||
| * assertEquals(result, [{ x: "d", y: "e", z: "f" }]); | ||
| * assertType<IsExact<typeof result, Record<"x" | "y" | "z", string>[]>>(true); | ||
| * assertType<IsExact<typeof result, Record<"x" | "y" | "z", string | undefined>[]>>(true); | ||
| * ``` | ||
| * | ||
| * @example TSV (tab-separated values) with `separator: "\t"` | ||
|
|
@@ -489,12 +489,29 @@ export function parse(input: string): string[][]; | |
| * ); | ||
| * ``` | ||
| * | ||
| * @example Variable-length records with `skipFirstRow` or `columns` | ||
| * ```ts | ||
| * import { parse } from "@std/csv/parse"; | ||
| * import { assertEquals } from "@std/assert/equals"; | ||
| * | ||
| * const string = "name,age\nAlice,34\nBob\n"; | ||
| * const result = parse(string, { skipFirstRow: true }); | ||
| * | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion : add also an example when rows have more fields? |
||
| * assertEquals(result, [ | ||
| * { name: "Alice", age: "34" }, | ||
| * { name: "Bob", age: undefined }, | ||
| * ]); | ||
| * ``` | ||
| * | ||
| * @typeParam T The options' type for parsing. | ||
| * @param input The input to parse. | ||
| * @param options The options for parsing. | ||
| * @returns If you don't provide `options.skipFirstRow` or `options.columns`, it | ||
| * returns `string[][]`. If you provide `options.skipFirstRow` or | ||
| * `options.columns`, it returns `Record<string, string>[]`. | ||
| * `options.columns`, it returns `Record<string, string | undefined>[]`. Values | ||
| * are typed as `string | undefined` to reflect that variable-length records | ||
| * (the default when `fieldsPerRecord` is undefined or negative) may produce | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice new example. Suggest also documenting the extra-fields shape here (fields beyond |
||
| * rows shorter than the header list. Missing fields surface as `undefined`. | ||
| */ | ||
| export function parse<const T extends ParseOptions>( | ||
| input: string, | ||
|
|
@@ -523,8 +540,15 @@ export function parse<const T extends ParseOptions>( | |
| } | ||
|
|
||
| const zeroBasedFirstLineIndex = options.skipFirstRow ? 1 : 0; | ||
| const allowVariableLength = options.fieldsPerRecord === undefined || | ||
| options.fieldsPerRecord < 0; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that this intentionally excludes |
||
| return r.map((row, i) => { | ||
| return convertRowToObject(row, headers, zeroBasedFirstLineIndex + i); | ||
| return convertRowToObject( | ||
| row, | ||
| headers, | ||
| zeroBasedFirstLineIndex + i, | ||
| allowVariableLength, | ||
| ); | ||
| }) as ParseResult<ParseOptions, T>; | ||
| } | ||
| return r as ParseResult<ParseOptions, T>; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -147,7 +147,7 @@ export type RowType<T> = T extends undefined ? string[] | |
| * { name: "Alice", age: "34" }, | ||
| * { name: "Bob", age: "24" }, | ||
| * ]); | ||
| * assertType<IsExact<typeof result, Record<string, string>[]>>(true); | ||
| * assertType<IsExact<typeof result, Record<string, string | undefined>[]>>(true); | ||
| * ``` | ||
| * | ||
| * @example Specify columns with `columns` option | ||
|
|
@@ -169,7 +169,7 @@ export type RowType<T> = T extends undefined ? string[] | |
| * { name: "Alice", age: "34" }, | ||
| * { name: "Bob", age: "24" }, | ||
| * ]); | ||
| * assertType<IsExact<typeof result, Record<"name" | "age", string>[]>>(true); | ||
| * assertType<IsExact<typeof result, Record<"name" | "age", string | undefined>[]>>(true); | ||
| * ``` | ||
| * | ||
| * @example Specify columns with `columns` option and skip first row with | ||
|
|
@@ -190,7 +190,7 @@ export type RowType<T> = T extends undefined ? string[] | |
| * const result = await Array.fromAsync(stream); | ||
| * | ||
| * assertEquals(result, [{ name: "Bob", age: "24" }]); | ||
| * assertType<IsExact<typeof result, Record<"name" | "age", string>[]>>(true); | ||
| * assertType<IsExact<typeof result, Record<"name" | "age", string | undefined>[]>>(true); | ||
| * ``` | ||
| * | ||
| * @example TSV (tab-separated values) with `separator: "\t"` | ||
|
|
@@ -336,6 +336,27 @@ export type RowType<T> = T extends undefined ? string[] | |
| * ); | ||
| * ``` | ||
| * | ||
| * @example Variable-length records with `skipFirstRow` or `columns` | ||
| * ```ts | ||
| * import { CsvParseStream } from "@std/csv/parse-stream"; | ||
| * import { assertEquals } from "@std/assert/equals"; | ||
| * | ||
| * const source = ReadableStream.from([ | ||
| * "name,age\n", | ||
| * "Alice,34\n", | ||
| * "Bob\n", | ||
| * ]); | ||
| * const stream = source.pipeThrough( | ||
| * new CsvParseStream({ skipFirstRow: true }), | ||
| * ); | ||
| * const result = await Array.fromAsync(stream); | ||
| * | ||
| * assertEquals(result, [ | ||
| * { name: "Alice", age: "34" }, | ||
| * { name: "Bob", age: undefined }, | ||
| * ]); | ||
| * ``` | ||
| * | ||
| * @typeParam T The type of options for the stream. | ||
| */ | ||
| export class CsvParseStream< | ||
|
|
@@ -461,6 +482,7 @@ export class CsvParseStream< | |
| record, | ||
| this.#headers, | ||
| this.#zeroBasedLineIndex, | ||
| this.#fieldsPerRecord === "ANY", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor readability: lifting this to a named local would mirror the shape of the sync const allowVariableLength = this.#fieldsPerRecord === "ANY";
controller.enqueue(convertRowToObject(
record,
this.#headers,
this.#zeroBasedLineIndex,
allowVariableLength,
)); |
||
| )); | ||
| } else { | ||
| controller.enqueue(record); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Two call sites compute this flag independently —
parse.tsdoesfieldsPerRecord === undefined || fieldsPerRecord < 0, the stream does#fieldsPerRecord === "ANY". Both are correct today, but they're an invariant pair (the stream's normalization step defines whatANYmeans). Consider exporting a smallisVariableLength(fieldsPerRecord: number | undefined): booleanhelper from_io.tsand using it on both sides, so if the rules ever change (e.g. someone wantsfieldsPerRecord: nullto also mean variable-length) there's a single place to touch. Not blocking.