From 4b9eece79c184692b2b8946b61f54c6ff97fe772 Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Fri, 15 Aug 2025 15:51:18 -0400 Subject: [PATCH 1/2] pass 'utf8' option to the hyparquet methods --- src/lib/tableProvider.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/tableProvider.ts b/src/lib/tableProvider.ts index a1c30260..0713e943 100644 --- a/src/lib/tableProvider.ts +++ b/src/lib/tableProvider.ts @@ -1,6 +1,6 @@ import { DataFrame, DataFrameEvents, ResolvedValue, UnsortableDataFrame, createEventTarget, sortableDataFrame } from 'hightable' import type { ColumnData } from 'hyparquet' -import { FileMetaData, parquetSchema } from 'hyparquet' +import { FileMetaData, parquetSchema, ParquetReadOptions } from 'hyparquet' import { parquetReadWorker } from './workers/parquetWorkerClient.js' import type { AsyncBufferFrom } from './workers/types.d.ts' @@ -21,7 +21,7 @@ interface VirtualRowGroup { /** * Convert a parquet file into a dataframe. */ -export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): DataFrame { +export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData, options?: Pick): DataFrame { const { children } = parquetSchema(metadata) const header = children.map(child => child.element.name) const eventTarget = createEventTarget() @@ -54,7 +54,7 @@ export function parquetDataFrame(from: AsyncBufferFrom, metadata: FileMetaData): // TODO(SL): pass AbortSignal to the worker? if (columnsToFetch.length > 0) { - const commonPromise = parquetReadWorker({ from, metadata, rowStart: groupStart, rowEnd: groupEnd, columns: columnsToFetch, onChunk }) + const commonPromise = parquetReadWorker({ ...options, from, metadata, rowStart: groupStart, rowEnd: groupEnd, columns: columnsToFetch, onChunk }) columnsToFetch.forEach(column => { state.set(column, { kind: 'fetching', promise: commonPromise }) }) From 0cb795d985017b896e7c0605808f12775e1b92fc Mon Sep 17 00:00:00 2001 From: Sylvain Lesage Date: Fri, 15 Aug 2025 15:53:01 -0400 Subject: [PATCH 2/2] format --- src/lib/tableProvider.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/tableProvider.ts b/src/lib/tableProvider.ts index 0713e943..2175ecba 100644 --- a/src/lib/tableProvider.ts +++ b/src/lib/tableProvider.ts @@ -1,6 +1,6 @@ import { DataFrame, DataFrameEvents, ResolvedValue, UnsortableDataFrame, createEventTarget, sortableDataFrame } from 'hightable' import type { ColumnData } from 'hyparquet' -import { FileMetaData, parquetSchema, ParquetReadOptions } from 'hyparquet' +import { FileMetaData, ParquetReadOptions, parquetSchema } from 'hyparquet' import { parquetReadWorker } from './workers/parquetWorkerClient.js' import type { AsyncBufferFrom } from './workers/types.d.ts'