diff --git a/buckaroo/artifact.py b/buckaroo/artifact.py index fbcf4851..aafece5d 100644 --- a/buckaroo/artifact.py +++ b/buckaroo/artifact.py @@ -29,7 +29,7 @@ def _df_to_parquet_b64_tagged(df: pd.DataFrame) -> dict: JSON-encoded per cell (same convention as sd_to_parquet_b64) so the JS side can decode them uniformly via parseParquetRow(). - Returns {'format': 'parquet_b64', 'data': ''} + Returns {'format': 'parquet_b64', 'layout': 'row', 'data': ''} """ df2 = prepare_df_for_serialization(df) if not isinstance(df.index, pd.MultiIndex): @@ -55,7 +55,7 @@ def _df_to_parquet_b64_tagged(df: pd.DataFrame) -> dict: df2.to_parquet(buf, engine='pyarrow') buf.seek(0) b64 = base64.b64encode(buf.read()).decode('ascii') - return {'format': 'parquet_b64', 'data': b64} + return {'format': 'parquet_b64', 'layout': 'row', 'data': b64} def prepare_buckaroo_artifact(df, column_config_overrides=None, diff --git a/buckaroo/serialization_utils.py b/buckaroo/serialization_utils.py index 633c044c..29b7158b 100644 --- a/buckaroo/serialization_utils.py +++ b/buckaroo/serialization_utils.py @@ -271,35 +271,37 @@ def _json_encode_cell(val): def sd_to_parquet_b64(sd: Dict[str, Any]) -> Dict[str, str]: """Convert a summary stats dict to a tagged parquet-b64 payload. - Summary stats DataFrames have mixed-type columns (strings, numbers, lists) - which fastparquet can't handle directly. We JSON-encode every cell value - first so each column becomes a pure string column, then use pyarrow for - parquet serialization. The JS side decodes parquet then JSON.parse's each cell. + Uses a wide-column layout: one parquet column per (col, stat) pair. + Column names are ``{short_col}__{stat_name}`` (e.g. ``a__mean``). + The parquet file has a single row. All cell values are JSON-encoded + via ``_json_encode_cell()`` so the JS side can ``JSON.parse`` each one. - Returns {'format': 'parquet_b64', 'data': ''} + Returns ``{'format': 'parquet_b64', 'layout': 'wide', 'data': ''}`` Falls back to JSON if parquet serialization fails. """ - # JSON-encode every value so parquet sees only string columns - json_sd: Dict[str, Any] = {} - for col, stats in sd.items(): - if isinstance(stats, dict): - json_sd[col] = {k: _json_encode_cell(v) for k, v in stats.items()} - else: - json_sd[col] = stats + import pyarrow as pa + import pyarrow.parquet as pq - df = pd.DataFrame(json_sd) - df2 = prepare_df_for_serialization(df) - # Add level_0 for backwards compatibility with JSON path (pd_to_obj adds it) - if not isinstance(df.index, pd.MultiIndex): - df2['level_0'] = df2['index'] + col_mapping = [(orig, to_chars(i)) for i, orig in enumerate(sd.keys())] + names: List[str] = [] + arrays: List = [] + + for orig_col, short_col in col_mapping: + stats = sd[orig_col] + if not isinstance(stats, dict): + continue + for stat_name, val in stats.items(): + names.append(f"{short_col}__{stat_name}") + arrays.append(pa.array([_json_encode_cell(val)])) try: + table = pa.table(dict(zip(names, arrays))) data = BytesIO() - df2.to_parquet(data, engine='pyarrow') + pq.write_table(table, data) data.seek(0) raw_bytes = data.read() b64 = base64.b64encode(raw_bytes).decode('ascii') - return {'format': 'parquet_b64', 'data': b64} + return {'format': 'parquet_b64', 'layout': 'wide', 'data': b64} except Exception as e: logger.warning("Failed to serialize summary stats as parquet, falling back to JSON: %r", e) return pd_to_obj(pd.DataFrame(sd)) diff --git a/packages/buckaroo-js-core/pw-tests/static-embed.spec.ts b/packages/buckaroo-js-core/pw-tests/static-embed.spec.ts index 17c18347..9eeb8fd7 100644 --- a/packages/buckaroo-js-core/pw-tests/static-embed.spec.ts +++ b/packages/buckaroo-js-core/pw-tests/static-embed.spec.ts @@ -15,8 +15,21 @@ import { waitForCells, getRowCount } from './ag-pw-utils'; test.describe('Static embed renders', () => { test('AG-Grid table appears with data rows', async ({ page }) => { + // Capture all console messages for debugging + const logs: string[] = []; + page.on('console', msg => logs.push(`[${msg.type()}] ${msg.text()}`)); + page.on('pageerror', err => logs.push(`[PAGE_ERROR] ${err.message}`)); + await page.goto('/static-test.html'); + // Give 5s for initial load, then dump console + await page.waitForTimeout(5000); + console.log('--- Browser console output ---'); + for (const log of logs) console.log(log); + console.log('--- End browser console ---'); + console.log('Page title:', await page.title()); + console.log('Body text (first 500):', (await page.locator('body').innerText()).slice(0, 500)); + // Wait for the AG-Grid cells to render (parquet decode + React mount) await waitForCells(page); diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/ChartCell.tsx b/packages/buckaroo-js-core/src/components/DFViewerParts/ChartCell.tsx index 3e9559ff..904089f4 100644 --- a/packages/buckaroo-js-core/src/components/DFViewerParts/ChartCell.tsx +++ b/packages/buckaroo-js-core/src/components/DFViewerParts/ChartCell.tsx @@ -19,7 +19,7 @@ export const formatter = (value: any, name: any, props: any) => { } }; -export function FloatingTooltip({ items, x, y }: any) { +export function FloatingTooltip({ items, x, y }: any): React.ReactPortal { const offset = 30; const renderedItems = items.map((name: [string, number], _value: number | string) => { const [realName, realValue] = name; diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/DFWhole.ts b/packages/buckaroo-js-core/src/components/DFViewerParts/DFWhole.ts index 1d5fa123..9cd65f36 100644 --- a/packages/buckaroo-js-core/src/components/DFViewerParts/DFWhole.ts +++ b/packages/buckaroo-js-core/src/components/DFViewerParts/DFWhole.ts @@ -216,6 +216,7 @@ export type DFData = DFDataRow[]; export interface ParquetB64Payload { format: 'parquet_b64'; data: string; // base64-encoded parquet bytes + layout?: 'wide' | 'row'; // 'wide' = summary stats (col__stat columns), 'row' = normal rows } // A value in df_data_dict can be plain JSON (DFData) or a tagged parquet payload diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/HistogramCell.tsx b/packages/buckaroo-js-core/src/components/DFViewerParts/HistogramCell.tsx index f9854b14..7b226a19 100644 --- a/packages/buckaroo-js-core/src/components/DFViewerParts/HistogramCell.tsx +++ b/packages/buckaroo-js-core/src/components/DFViewerParts/HistogramCell.tsx @@ -20,7 +20,7 @@ export const formatter = (value: any, name: any, props: any) => { } }; -export function FloatingTooltip({ items, x, y }: any) { +export function FloatingTooltip({ items, x, y }: any): React.ReactPortal { const offset = 30; const renderedItems = items.map((name: [string, number], _value: number | string) => { const [realName, realValue] = name; diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.test.ts b/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.test.ts index 807db3cc..46edf653 100644 --- a/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.test.ts +++ b/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.test.ts @@ -1,9 +1,8 @@ import { parquetRead, parquetMetadata } from 'hyparquet'; -import { resolveDFData, resolveDFDataAsync } from './resolveDFData'; +import { resolveDFData, resolveDFDataAsync, pivotWideSummaryStats } from './resolveDFData'; import { DFData, DFDataRow, ParquetB64Payload } from './DFWhole'; -// Fixture generated by Python's sd_to_parquet_b64() with a summary stats dict -// containing numeric histogram data for one column. +// Fixture generated by Python's sd_to_parquet_b64() with wide-column layout. // eslint-disable-next-line @typescript-eslint/no-var-requires const fixture = require('./test-fixtures/summary_stats_parquet_b64.json'); const parquetPayload: ParquetB64Payload = fixture as ParquetB64Payload; @@ -29,9 +28,7 @@ describe('resolveDFData', () => { expect(resolveDFData(data)).toBe(data); }); - it('hyparquet can read the parquet_b64 fixture', async () => { - // Verify the fixture is valid and hyparquet can decode it. - // This is independent of resolveDFData — it tests the raw decode path. + it('hyparquet can read the wide-format parquet_b64 fixture', async () => { const buf = b64ToArrayBuffer(parquetPayload.data); const metadata = parquetMetadata(buf); expect(metadata.row_groups.length).toBeGreaterThan(0); @@ -44,60 +41,124 @@ describe('resolveDFData', () => { onComplete: (data: any[]) => { rows.push(...data); }, }); - expect(rows.length).toBeGreaterThan(0); + // Wide format: single row with col__stat columns + expect(rows.length).toBe(1); + const keys = Object.keys(rows[0]); + expect(keys.some(k => k.includes('__'))).toBe(true); + expect(keys).toContain('a__mean'); + expect(keys).toContain('b__dtype'); + }); - // Should have an 'index' column with stat names - const indices = rows.map(r => r.index).filter(Boolean); - expect(indices).toContain('histogram'); - expect(indices).toContain('dtype'); + it('sync resolveDFData returns [] for parquet_b64 (known async limitation)', () => { + const result = resolveDFData(parquetPayload); + expect(result.length).toBe(0); }); - it('parquet_b64 histogram data round-trips with correct types', async () => { - // Decode the fixture and verify histogram arrays have the right structure. - const buf = b64ToArrayBuffer(parquetPayload.data); - const metadata = parquetMetadata(buf); + it('async resolveDFDataAsync returns pivoted DFData for wide-format parquet', async () => { + const result = await resolveDFDataAsync(parquetPayload); + expect(result.length).toBeGreaterThan(0); - const rows: DFDataRow[] = []; - await parquetRead({ - file: buf, - metadata, - rowFormat: 'object', - onComplete: (data: any[]) => { rows.push(...data); }, - }); + // Should have row-based format with index column + const meanRow = result.find(r => r.index === 'mean'); + expect(meanRow).toBeDefined(); + expect(meanRow!.a).toBe(50.0); + expect(meanRow!.b).toBe(22.0); - const histRow = rows.find(r => r.index === 'histogram'); - expect(histRow).toBeDefined(); + const dtypeRow = result.find(r => r.index === 'dtype'); + expect(dtypeRow).toBeDefined(); + expect(dtypeRow!.a).toBe('float64'); + expect(dtypeRow!.b).toBe('int64'); + }); - // Column 'a' contains the JSON-encoded histogram array - const rawCell = histRow!['a']; - expect(typeof rawCell).toBe('string'); + it('async decode produces histogram arrays from JSON strings', async () => { + const result = await resolveDFDataAsync(parquetPayload); - const parsed = JSON.parse(rawCell as string); - expect(Array.isArray(parsed)).toBe(true); - expect(parsed.length).toBeGreaterThan(0); + const histRow = result.find(r => r.index === 'histogram'); + expect(histRow).toBeDefined(); + expect(Array.isArray(histRow!.a)).toBe(true); + const hist = histRow!.a as any[]; + expect(hist.length).toBe(5); + expect(typeof hist[0].population).toBe('number'); + expect(hist[0].name).toBe('0-20'); + }); - // Verify types: population should be a number, not a string - const popBar = parsed.find((b: any) => b.population !== undefined); - expect(popBar).toBeDefined(); - expect(typeof popBar.population).toBe('number'); - expect(typeof parsed[0].name).toBe('string'); + it('async decode produces histogram_bins arrays', async () => { + const result = await resolveDFDataAsync(parquetPayload); + + const binsRow = result.find(r => r.index === 'histogram_bins'); + expect(binsRow).toBeDefined(); + expect(Array.isArray(binsRow!.a)).toBe(true); + expect((binsRow!.a as number[]).length).toBe(6); }); +}); - it('sync resolveDFData returns [] for parquet_b64 (known async limitation)', () => { - // Documents #630: parquetRead is async so the sync wrapper returns []. - // Widget components use useResolvedDFDataDict which falls back to async. - // The static embed path uses resolveDFDataAsync which works correctly. - const result = resolveDFData(parquetPayload); - expect(result.length).toBe(0); +describe('pivotWideSummaryStats', () => { + it('pivots a wide row into row-based DFData', () => { + const wideRow = { + a__mean: 42.5, + a__dtype: 'float64', + b__mean: 10.0, + b__dtype: 'int64', + }; + const result = pivotWideSummaryStats(wideRow); + + const meanRow = result.find(r => r.index === 'mean'); + expect(meanRow).toBeDefined(); + expect(meanRow!.a).toBe(42.5); + expect(meanRow!.b).toBe(10.0); + expect(meanRow!.level_0).toBe('mean'); + + const dtypeRow = result.find(r => r.index === 'dtype'); + expect(dtypeRow).toBeDefined(); + expect(dtypeRow!.a).toBe('float64'); + expect(dtypeRow!.b).toBe('int64'); }); - it('async resolveDFDataAsync returns non-empty result for parquet_b64', async () => { - const result = await resolveDFDataAsync(parquetPayload); - expect(result.length).toBeGreaterThan(0); + it('JSON-parses list/object values in string cells', () => { + const wideRow = { + a__histogram: '[{"name": "foo", "population": 10}]', + a__dtype: 'float64', + }; + const result = pivotWideSummaryStats(wideRow); - // Verify the histogram row was JSON-parsed correctly const histRow = result.find(r => r.index === 'histogram'); expect(histRow).toBeDefined(); - expect(Array.isArray(histRow!['a'])).toBe(true); + expect(Array.isArray(histRow!.a)).toBe(true); + expect((histRow!.a as any[])[0].population).toBe(10); + }); + + it('keeps plain strings as strings (not JSON-parsed)', () => { + const wideRow = { + a__dtype: 'float64', + }; + const result = pivotWideSummaryStats(wideRow); + const row = result.find(r => r.index === 'dtype'); + expect(row!.a).toBe('float64'); + }); + + it('handles null values', () => { + const wideRow = { + a__mean: null, + a__dtype: 'float64', + }; + const result = pivotWideSummaryStats(wideRow); + const meanRow = result.find(r => r.index === 'mean'); + expect(meanRow!.a).toBeNull(); + }); + + it('fills missing columns with null', () => { + const wideRow = { + a__mean: 42, + b__dtype: 'int64', + }; + const result = pivotWideSummaryStats(wideRow); + + const meanRow = result.find(r => r.index === 'mean'); + expect(meanRow!.a).toBe(42); + expect(meanRow!.b).toBeNull(); + + const dtypeRow = result.find(r => r.index === 'dtype'); + expect(dtypeRow!.a).toBeNull(); + expect(dtypeRow!.b).toBe('int64'); }); }); diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.ts b/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.ts index 254d20ce..53ccdc46 100644 --- a/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.ts +++ b/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.ts @@ -41,20 +41,69 @@ function b64ToArrayBuffer(b64: string): ArrayBuffer { } /** - * JSON-parse each cell value in a row from parquet-decoded data. + * Pivot a wide single-row parquet result (col__stat columns) back to + * row-based DFData that downstream consumers expect. * - * The Python side JSON-encodes every cell before writing to parquet - * (because summary stats have mixed types per column). We need to - * JSON.parse each value back to its original type. + * Input: single row object like {a__mean: 42, a__dtype: "float64", b__mean: 10, ...} + * Output: DFData rows like [{index: "mean", level_0: "mean", a: 42, b: 10}, ...] + */ +export function pivotWideSummaryStats(wideRow: Record): DFData { + // Group values by stat name: stat -> {col -> value} + const statCols: Record> = {}; + const allCols = new Set(); + + for (const [key, rawVal] of Object.entries(wideRow)) { + const sepIdx = key.indexOf('__'); + if (sepIdx === -1) continue; + const col = key.substring(0, sepIdx); + const stat = key.substring(sepIdx + 2); + allCols.add(col); + if (!statCols[stat]) statCols[stat] = {}; + + let val: any = rawVal; + // JSON-parse all string values (cells are JSON-encoded in parquet) + if (typeof val === 'string') { + try { + val = JSON.parse(val); + } catch { + // not JSON, keep as string + } + } + // BigInt conversion (hyparquet INT64) + if (typeof val === 'bigint') { + const MAX_SAFE = BigInt(Number.MAX_SAFE_INTEGER); + statCols[stat][col] = val >= -MAX_SAFE && val <= MAX_SAFE + ? Number(val) : String(val); + continue; + } + statCols[stat][col] = val; + } + + // Build DFData: one row per stat + const colList = Array.from(allCols); + const rows: DFData = []; + for (const [stat, cols] of Object.entries(statCols)) { + const row: DFDataRow = { index: stat, level_0: stat }; + for (let i = 0; i < colList.length; i++) { + const col = colList[i]; + row[col] = cols[col] ?? null; + } + rows.push(row); + } + return rows; +} + +/** + * JSON-parse each cell value in a row from parquet-decoded data. * - * The 'index' column is left as a plain string (stat name like 'mean', 'dtype'). + * For non-wide parquet data (e.g. main DataFrame), object/category columns + * are JSON-encoded on the Python side and need to be parsed back. + * The 'index' and 'level_0' columns are kept as-is. */ function parseParquetRow(row: Record): DFDataRow { const parsed: DFDataRow = {}; for (const [key, val] of Object.entries(row)) { if (key === 'index' || key === 'level_0') { - // index/level_0 columns are stat names — keep as-is - // BigInt from hyparquet INT64 columns must be converted to Number parsed[key] = typeof val === 'bigint' ? Number(val) : val; } else if (typeof val === 'string') { try { @@ -63,8 +112,6 @@ function parseParquetRow(row: Record): DFDataRow { parsed[key] = val; } } else if (typeof val === 'bigint') { - // hyparquet decodes INT64 as BigInt; use Number only if safe, - // otherwise stringify to preserve precision (fixes #627) const MAX_SAFE = BigInt(Number.MAX_SAFE_INTEGER); parsed[key] = val >= -MAX_SAFE && val <= MAX_SAFE ? Number(val) : String(val); @@ -75,6 +122,10 @@ function parseParquetRow(row: Record): DFDataRow { return parsed; } +function isWideFormat(layout?: 'wide' | 'row'): boolean { + return layout === 'wide'; +} + /** * Synchronously resolve a DFDataOrPayload to DFData. * @@ -106,8 +157,11 @@ export function resolveDFData(val: DFDataOrPayload | undefined | null): DFData { metadata, rowFormat: 'object', onComplete: (data: any[]) => { - // JSON-parse each cell to recover typed values - result = (data as DFDataRow[]).map(parseParquetRow); + if (isWideFormat(val.layout)) { + result = pivotWideSummaryStats(data[0] as Record); + } else { + result = (data as DFDataRow[]).map(parseParquetRow); + } cacheSet(val.data, result); }, }); @@ -156,7 +210,12 @@ export async function resolveDFDataAsync(val: DFDataOrPayload | undefined | null reject(e); } }); - const result = (data as DFDataRow[]).map(parseParquetRow); + let result: DFData; + if (isWideFormat(val.layout)) { + result = pivotWideSummaryStats(data[0] as Record); + } else { + result = (data as DFDataRow[]).map(parseParquetRow); + } cacheSet(val.data, result); return result; } catch (e) { diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/test-fixtures/summary_stats_parquet_b64.json b/packages/buckaroo-js-core/src/components/DFViewerParts/test-fixtures/summary_stats_parquet_b64.json index 116c4285..366c7f03 100644 --- a/packages/buckaroo-js-core/src/components/DFViewerParts/test-fixtures/summary_stats_parquet_b64.json +++ b/packages/buckaroo-js-core/src/components/DFViewerParts/test-fixtures/summary_stats_parquet_b64.json @@ -1 +1,5 @@ -{"format": "parquet_b64", "data": "UEFSMRUEFewEFeoCTBUMFQASAAC2AlAJAAAAImZsb2F0NjQiBAAAAHRydWUBCLA1MC4wCQEAAFt7Im5hbWUiOiAiMC4wIC0gMS4wIiwgInRhaWwiOiAxfSwgeyIRIggxLTIFHURwb3B1bGF0aW9uIjogMTUuMH0uJgAMMjAtNEYnAAAyPicADDQwLTZGJwAEMzA6TgAMNjAtOEYnAAAyPicAEDgwLTk5Abk2nAA+JwAEOTkJ5QQwMDrnADBdAgAAAHt9AgAAAFtdFQAVFhUaLBUMFRAVBhUGHDYAKAJ7fRgJImZsb2F0NjQiAAAACygCAAAADAEDA4jGAhUEFaABFYIBTBUMFQASAABQsAUAAABkdHlwZQoAAABpc19udW1lcmljBAAAAG1lYW4JAAAAaGlzdG9ncmFtDi4NABRfYXJncw4yEgAMYmlucxUAFRYVGiwVDBUQFQYVBhw2ACgEbWVhbhgFZHR5cGUAAAALKAIAAAAMAQMDiMYCFQQVoAEVggFMFQwVABIAAFCwBQAAAGR0eXBlCgAAAGlzX251bWVyaWMEAAAAbWVhbgkAAABoaXN0b2dyYW0OLg0AFF9hcmdzDjISAAxiaW5zFQAVFhUaLBUMFRAVBhUGHDYAKARtZWFuGAVkdHlwZQAAAAsoAgAAAAwBAwOIxgIVBBWgARWCAUwVDBUAEgAAULAFAAAAZHR5cGUKAAAAaXNfbnVtZXJpYwQAAABtZWFuCQAAAGhpc3RvZ3JhbQ4uDQAUX2FyZ3MOMhIADGJpbnMVABUWFRosFQwVEBUGFQYcNgAoBG1lYW4YBWR0eXBlAAAACygCAAAADAEDA4jGAhUEGVw1ABgGc2NoZW1hFQgAFQwlAhgBYSUATBwAAAAVDCUCGAVpbmRleCUATBwAAAAVDCUCGAdsZXZlbF8wJQBMHAAAABUMJQIYEV9faW5kZXhfbGV2ZWxfMF9fJQBMHAAAABYMGRwZTCYAHBUMGTUABhAZGAFhFQIWDBbqBRbsAyaSAyYIHDYAKAJ7fRgJImZsb2F0NjQiABksFQQVABUCABUAFRAVAgA8FrwEGQYZJgAMAAAAJgAcFQwZNQAGEBkYBWluZGV4FQIWDBaaAhaAAiaWBSb0Axw2ACgEbWVhbhgFZHR5cGUAGSwVBBUAFQIAFQAVEBUCADwWcBkGGSYADAAAACYAHBUMGTUABhAZGAdsZXZlbF8wFQIWDBaaAhaAAiaWByb0BRw2ACgEbWVhbhgFZHR5cGUAGSwVBBUAFQIAFQAVEBUCADwWcBkGGSYADAAAACYAHBUMGTUABhAZGBFfX2luZGV4X2xldmVsXzBfXxUCFgwWmgIWgAImlgkm9AccNgAoBG1lYW4YBWR0eXBlABksFQQVABUCABUAFRAVAgA8FnAZBhkmAAwAAAAWuAwWDCYIFuwJABksGAZwYW5kYXMY0gV7ImluZGV4X2NvbHVtbnMiOiBbIl9faW5kZXhfbGV2ZWxfMF9fIl0sICJjb2x1bW5faW5kZXhlcyI6IFt7Im5hbWUiOiBudWxsLCAiZmllbGRfbmFtZSI6IG51bGwsICJwYW5kYXNfdHlwZSI6ICJ1bmljb2RlIiwgIm51bXB5X3R5cGUiOiAib2JqZWN0IiwgIm1ldGFkYXRhIjogeyJlbmNvZGluZyI6ICJVVEYtOCJ9fV0sICJjb2x1bW5zIjogW3sibmFtZSI6ICJhIiwgImZpZWxkX25hbWUiOiAiYSIsICJwYW5kYXNfdHlwZSI6ICJ1bmljb2RlIiwgIm51bXB5X3R5cGUiOiAib2JqZWN0IiwgIm1ldGFkYXRhIjogbnVsbH0sIHsibmFtZSI6ICJpbmRleCIsICJmaWVsZF9uYW1lIjogImluZGV4IiwgInBhbmRhc190eXBlIjogInVuaWNvZGUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiBudWxsfSwgeyJuYW1lIjogImxldmVsXzAiLCAiZmllbGRfbmFtZSI6ICJsZXZlbF8wIiwgInBhbmRhc190eXBlIjogInVuaWNvZGUiLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiBudWxsfSwgeyJuYW1lIjogbnVsbCwgImZpZWxkX25hbWUiOiAiX19pbmRleF9sZXZlbF8wX18iLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSIsICJudW1weV90eXBlIjogIm9iamVjdCIsICJtZXRhZGF0YSI6IG51bGx9XSwgImNyZWF0b3IiOiB7ImxpYnJhcnkiOiAicHlhcnJvdyIsICJ2ZXJzaW9uIjogIjIxLjAuMCJ9LCAicGFuZGFzX3ZlcnNpb24iOiAiMi4yLjMifQAYDEFSUk9XOnNjaGVtYRjsCi8vLy8vd2dFQUFBUUFBQUFBQUFLQUE0QUJnQUZBQWdBQ2dBQUFBQUJCQUFRQUFBQUFBQUtBQXdBQUFBRUFBZ0FDZ0FBQUFnREFBQUVBQUFBQVFBQUFBd0FBQUFJQUF3QUJBQUlBQWdBQUFEZ0FnQUFCQUFBQU5JQ0FBQjdJbWx1WkdWNFgyTnZiSFZ0Ym5NaU9pQmJJbDlmYVc1a1pYaGZiR1YyWld4Zk1GOWZJbDBzSUNKamIyeDFiVzVmYVc1a1pYaGxjeUk2SUZ0N0ltNWhiV1VpT2lCdWRXeHNMQ0FpWm1sbGJHUmZibUZ0WlNJNklHNTFiR3dzSUNKd1lXNWtZWE5mZEhsd1pTSTZJQ0oxYm1samIyUmxJaXdnSW01MWJYQjVYM1I1Y0dVaU9pQWliMkpxWldOMElpd2dJbTFsZEdGa1lYUmhJam9nZXlKbGJtTnZaR2x1WnlJNklDSlZWRVl0T0NKOWZWMHNJQ0pqYjJ4MWJXNXpJam9nVzNzaWJtRnRaU0k2SUNKaElpd2dJbVpwWld4a1gyNWhiV1VpT2lBaVlTSXNJQ0p3WVc1a1lYTmZkSGx3WlNJNklDSjFibWxqYjJSbElpd2dJbTUxYlhCNVgzUjVjR1VpT2lBaWIySnFaV04wSWl3Z0ltMWxkR0ZrWVhSaElqb2diblZzYkgwc0lIc2libUZ0WlNJNklDSnBibVJsZUNJc0lDSm1hV1ZzWkY5dVlXMWxJam9nSW1sdVpHVjRJaXdnSW5CaGJtUmhjMTkwZVhCbElqb2dJblZ1YVdOdlpHVWlMQ0FpYm5WdGNIbGZkSGx3WlNJNklDSnZZbXBsWTNRaUxDQWliV1YwWVdSaGRHRWlPaUJ1ZFd4c2ZTd2dleUp1WVcxbElqb2dJbXhsZG1Wc1h6QWlMQ0FpWm1sbGJHUmZibUZ0WlNJNklDSnNaWFpsYkY4d0lpd2dJbkJoYm1SaGMxOTBlWEJsSWpvZ0luVnVhV052WkdVaUxDQWliblZ0Y0hsZmRIbHdaU0k2SUNKdlltcGxZM1FpTENBaWJXVjBZV1JoZEdFaU9pQnVkV3hzZlN3Z2V5SnVZVzFsSWpvZ2JuVnNiQ3dnSW1acFpXeGtYMjVoYldVaU9pQWlYMTlwYm1SbGVGOXNaWFpsYkY4d1gxOGlMQ0FpY0dGdVpHRnpYM1I1Y0dVaU9pQWlkVzVwWTI5a1pTSXNJQ0p1ZFcxd2VWOTBlWEJsSWpvZ0ltOWlhbVZqZENJc0lDSnRaWFJoWkdGMFlTSTZJRzUxYkd4OVhTd2dJbU55WldGMGIzSWlPaUI3SW14cFluSmhjbmtpT2lBaWNIbGhjbkp2ZHlJc0lDSjJaWEp6YVc5dUlqb2dJakl4TGpBdU1DSjlMQ0FpY0dGdVpHRnpYM1psY25OcGIyNGlPaUFpTWk0eUxqTWlmUUFBQmdBQUFIQmhibVJoY3dBQUJBQUFBS1FBQUFCb0FBQUFQQUFBQUFRQUFBQjgvLy8vQUFBQkJSQUFBQUFrQUFBQUJBQUFBQUFBQUFBUkFBQUFYMTlwYm1SbGVGOXNaWFpsYkY4d1gxOEFBQUI4Ly8vL3NQLy8vd0FBQVFVUUFBQUFHQUFBQUFRQUFBQUFBQUFBQndBQUFHeGxkbVZzWHpBQXBQLy8vOWovLy84QUFBRUZFQUFBQUJnQUFBQUVBQUFBQUFBQUFBVUFBQUJwYm1SbGVBQUFBTXovLy84UUFCUUFDQUFHQUFjQURBQUFBQkFBRUFBQUFBQUFBUVVRQUFBQUdBQUFBQVFBQUFBQUFBQUFBUUFBQUdFQUFBQUVBQVFBQkFBQUFBQUFBQUE9ABggcGFycXVldC1jcHAtYXJyb3cgdmVyc2lvbiAyMS4wLjAZTBwAABwAABwAABwAAABLCgAAUEFSMQ=="} +{ + "format": "parquet_b64", + "layout": "wide", + "data": "UEFSMRUEFRoVHkwVAhUAEgAADTAJAAAAImZsb2F0NjQiFQAVEhUWLBUCFRAVBhUGHDYAKAkiZmxvYXQ2NCIYCSJmbG9hdDY0IgAAAAkgAgAAAAIBAQIAFQQVEBUUTBUCFQASAAAIHAQAAAA1MC4wFQAVEhUWLBUCFRAVBhUGHDYAKAQ1MC4wGAQ1MC4wAAAACSACAAAAAgEBAgAVBBUOFRJMFQIVABIAAAcYAwAAADIuMBUAFRIVFiwVAhUQFQYVBhw2ACgDMi4wGAMyLjAAAAAJIAIAAAACAQECABUEFRAVFEwVAhUAEgAACBwEAAAAOTkuMBUAFRIVFiwVAhUQFQYVBhw2ACgEOTkuMBgEOTkuMAAAAAkgAgAAAAIBAQIAFQQVEBUUTBUCFQASAAAIHAQAAAB0cnVlFQAVEhUWLBUCFRAVBhUGHDYAKAR0cnVlGAR0cnVlAAAACSACAAAAAgEBAgAVBBUMFRBMFQIVABIAAAYUAgAAADUwFQAVEhUWLBUCFRAVBhUGHDYAKAI1MBgCNTAAAAAJIAIAAAACAQECABUEFY4DFdoBTBUCFQASAADHAbDDAAAAW3sibmFtZSI6ICIwLTIwIiwgInBvcHVsYXRpb24iOiAxNS4wfSwgeyIRJgwyMC00RicAADI+JwAMNDAtNkYnABAzMC4wfS5OAAw2MC04RicAADI+JwAQODAtMTBGKAAUMTAuMH1dFQAVEhUWLBUCFRAVBhUGHDYAKMMBW3sibmFtZSI6ICIwLTIwIiwgInBvcHVsYXRpb24iOiAxNS4wfSwgeyJuYW1lIjogIjIwLTQwIiwgInBvcHVsYXRpb24iOiAyNS4wfSwgeyJuYW1lIjogIjQwLTYwIiwgInBvcHVsYXRpb24iOiAzMC4wfSwgeyJuYW1lIjogIjYwLTgwIiwgInBvcHVsYXRpb24iOiAyMC4wfSwgeyJuYW1lIjogIjgwLTEwMCIsICJwb3B1bGF0aW9uIjogMTAuMH1dGMMBW3sibmFtZSI6ICIwLTIwIiwgInBvcHVsYXRpb24iOiAxNS4wfSwgeyJuYW1lIjogIjIwLTQwIiwgInBvcHVsYXRpb24iOiAyNS4wfSwgeyJuYW1lIjogIjQwLTYwIiwgInBvcHVsYXRpb24iOiAzMC4wfSwgeyJuYW1lIjogIjYwLTgwIiwgInBvcHVsYXRpb24iOiAyMC4wfSwgeyJuYW1lIjogIjgwLTEwMCIsICJwb3B1bGF0aW9uIjogMTAuMH1dAAAACSACAAAAAgEBAgAVBBVOFVJMFQIVABIAACeYIwAAAFsyLjAsIDIxLjUsIDQxLjAsIDYwLjUsIDgwLjAsIDk5LjBdFQAVEhUWLBUCFRAVBhUGHDYAKCNbMi4wLCAyMS41LCA0MS4wLCA2MC41LCA4MC4wLCA5OS4wXRgjWzIuMCwgMjEuNSwgNDEuMCwgNjAuNSwgODAuMCwgOTkuMF0AAAAJIAIAAAACAQECABUEFToVOEwVAhUAEgAAHSgZAAAAWzAuMywgMQkFLDYsIDEuOSwgMS4wXRUAFRIVFiwVAhUQFQYVBhw2ACgZWzAuMywgMS4zLCAxLjYsIDEuOSwgMS4wXRgZWzAuMywgMS4zLCAxLjYsIDEuOSwgMS4wXQAAAAkgAgAAAAIBAQIAFQQVFhUaTBUCFQASAAALKAcAAAAiaW50NjQiFQAVEhUWLBUCFRAVBhUGHDYAKAciaW50NjQiGAciaW50NjQiAAAACSACAAAAAgEBAgAVBBUQFRRMFQIVABIAAAgcBAAAADIyLjAVABUSFRYsFQIVEBUGFQYcNgAoBDIyLjAYBDIyLjAAAAAJIAIAAAACAQECABUEFQ4VEkwVAhUAEgAABxgDAAAAMS4wFQAVEhUWLBUCFRAVBhUGHDYAKAMxLjAYAzEuMAAAAAkgAgAAAAIBAQIAFQQVEBUUTBUCFQASAAAIHAQAAAA1MC4wFQAVEhUWLBUCFRAVBhUGHDYAKAQ1MC4wGAQ1MC4wAAAACSACAAAAAgEBAgAVBBUQFRRMFQIVABIAAAgcBAAAAHRydWUVABUSFRYsFQIVEBUGFQYcNgAoBHRydWUYBHRydWUAAAAJIAIAAAACAQECABUEFQwVEEwVAhUAEgAABhQCAAAANTAVABUSFRYsFQIVEBUGFQYcNgAoAjUwGAI1MAAAAAkgAgAAAAIBAQIAFQQVogEVeEwVAhUAEgAAUbBNAAAAW3sibmFtZSI6ICIwLTI1IiwgInBvcHVsYXRpb24iOiA1MC4wfSwgeyIRJhAyNS01MFYnAABdFQAVEhUWLBUCFRAVBhUGHDYAKE1beyJuYW1lIjogIjAtMjUiLCAicG9wdWxhdGlvbiI6IDUwLjB9LCB7Im5hbWUiOiAiMjUtNTAiLCAicG9wdWxhdGlvbiI6IDUwLjB9XRhNW3sibmFtZSI6ICIwLTI1IiwgInBvcHVsYXRpb24iOiA1MC4wfSwgeyJuYW1lIjogIjI1LTUwIiwgInBvcHVsYXRpb24iOiA1MC4wfV0AAAAJIAIAAAACAQECABUEFSoVLkwVAhUAEgAAFVARAAAAWzEuMCwgMjUuMCwgNTAuMF0VABUSFRYsFQIVEBUGFQYcNgAoEVsxLjAsIDI1LjAsIDUwLjBdGBFbMS4wLCAyNS4wLCA1MC4wXQAAAAkgAgAAAAIBAQIAFQQVJhUqTBUCFQASAAATSA8AAABbMC4wLCAxLjQsIDEuN10VABUSFRYsFQIVEBUGFQYcNgAoD1swLjAsIDEuNCwgMS43XRgPWzAuMCwgMS40LCAxLjddAAAACSACAAAAAgEBAgAVBBn8EzUAGAZzY2hlbWEVJAAVDCUCGAhhX19kdHlwZSUATBwAAAAVDCUCGAdhX19tZWFuJQBMHAAAABUMJQIYBmFfX21pbiUATBwAAAAVDCUCGAZhX19tYXglAEwcAAAAFQwlAhgNYV9faXNfbnVtZXJpYyUATBwAAAAVDCUCGAlhX19sZW5ndGglAEwcAAAAFQwlAhgMYV9faGlzdG9ncmFtJQBMHAAAABUMJQIYEWFfX2hpc3RvZ3JhbV9iaW5zJQBMHAAAABUMJQIYFWFfX2hpc3RvZ3JhbV9sb2dfYmlucyUATBwAAAAVDCUCGAhiX19kdHlwZSUATBwAAAAVDCUCGAdiX19tZWFuJQBMHAAAABUMJQIYBmJfX21pbiUATBwAAAAVDCUCGAZiX19tYXglAEwcAAAAFQwlAhgNYl9faXNfbnVtZXJpYyUATBwAAAAVDCUCGAliX19sZW5ndGglAEwcAAAAFQwlAhgMYl9faGlzdG9ncmFtJQBMHAAAABUMJQIYEWJfX2hpc3RvZ3JhbV9iaW5zJQBMHAAAABUMJQIYFWJfX2hpc3RvZ3JhbV9sb2dfYmlucyUATBwAAAAWAhkcGfwSJgAcFQwZNQAGEBkYCGFfX2R0eXBlFQIWAhaeARamASZCJggcNgAoCSJmbG9hdDY0IhgJImZsb2F0NjQiABksFQQVABUCABUAFRAVAgA8FhIZBhkmAAIAAAAmABwVDBk1AAYQGRgHYV9fbWVhbhUCFgIWgAEWiAEm3gEmrgEcNgAoBDUwLjAYBDUwLjAAGSwVBBUAFQIAFQAVEBUCADwWCBkGGSYAAgAAACYAHBUMGTUABhAZGAZhX19taW4VAhYCFnoWggEm5AImtgIcNgAoAzIuMBgDMi4wABksFQQVABUCABUAFRAVAgA8FgYZBhkmAAIAAAAmABwVDBk1AAYQGRgGYV9fbWF4FQIWAhaAARaIASboAya4Axw2ACgEOTkuMBgEOTkuMAAZLBUEFQAVAgAVABUQFQIAPBYIGQYZJgACAAAAJgAcFQwZNQAGEBkYDWFfX2lzX251bWVyaWMVAhYCFoABFogBJvAEJsAEHDYAKAR0cnVlGAR0cnVlABksFQQVABUCABUAFRAVAgA8FggZBhkmAAIAAAAmABwVDBk1AAYQGRgJYV9fbGVuZ3RoFQIWAhZ0Fnwm9AUmyAUcNgAoAjUwGAI1MAAZLBUEFQAVAgAVABUQFQIAPBYEGQYZJgACAAAAJgAcFQwZNQAGEBkYDGFfX2hpc3RvZ3JhbRUCFgIWggoW0ggmvggmxAYcNgAowwFbeyJuYW1lIjogIjAtMjAiLCAicG9wdWxhdGlvbiI6IDE1LjB9LCB7Im5hbWUiOiAiMjAtNDAiLCAicG9wdWxhdGlvbiI6IDI1LjB9LCB7Im5hbWUiOiAiNDAtNjAiLCAicG9wdWxhdGlvbiI6IDMwLjB9LCB7Im5hbWUiOiAiNjAtODAiLCAicG9wdWxhdGlvbiI6IDIwLjB9LCB7Im5hbWUiOiAiODAtMTAwIiwgInBvcHVsYXRpb24iOiAxMC4wfV0YwwFbeyJuYW1lIjogIjAtMjAiLCAicG9wdWxhdGlvbiI6IDE1LjB9LCB7Im5hbWUiOiAiMjAtNDAiLCAicG9wdWxhdGlvbiI6IDI1LjB9LCB7Im5hbWUiOiAiNDAtNjAiLCAicG9wdWxhdGlvbiI6IDMwLjB9LCB7Im5hbWUiOiAiNjAtODAiLCAicG9wdWxhdGlvbiI6IDIwLjB9LCB7Im5hbWUiOiAiODAtMTAwIiwgInBvcHVsYXRpb24iOiAxMC4wfV0AGSwVBBUAFQIAFQAVEBUCADwWhgMZBhkmAAIAAAAmABwVDBk1AAYQGRgRYV9faGlzdG9ncmFtX2JpbnMVAhYCFroCFsICJoQQJpYPHDYAKCNbMi4wLCAyMS41LCA0MS4wLCA2MC41LCA4MC4wLCA5OS4wXRgjWzIuMCwgMjEuNSwgNDEuMCwgNjAuNSwgODAuMCwgOTkuMF0AGSwVBBUAFQIAFQAVEBUCADwWRhkGGSYAAgAAACYAHBUMGTUABhAZGBVhX19oaXN0b2dyYW1fbG9nX2JpbnMVAhYCFv4BFoACJqwSJtgRHDYAKBlbMC4zLCAxLjMsIDEuNiwgMS45LCAxLjBdGBlbMC4zLCAxLjMsIDEuNiwgMS45LCAxLjBdABksFQQVABUCABUAFRAVAgA8FjIZBhkmAAIAAAAmABwVDBk1AAYQGRgIYl9fZHR5cGUVAhYCFpIBFpoBJo4UJtgTHDYAKAciaW50NjQiGAciaW50NjQiABksFQQVABUCABUAFRAVAgA8Fg4ZBhkmAAIAAAAmABwVDBk1AAYQGRgHYl9fbWVhbhUCFgIWgAEWiAEmohUm8hQcNgAoBDIyLjAYBDIyLjAAGSwVBBUAFQIAFQAVEBUCADwWCBkGGSYAAgAAACYAHBUMGTUABhAZGAZiX19taW4VAhYCFnoWggEmqBYm+hUcNgAoAzEuMBgDMS4wABksFQQVABUCABUAFRAVAgA8FgYZBhkmAAIAAAAmABwVDBk1AAYQGRgGYl9fbWF4FQIWAhaAARaIASasFyb8Fhw2ACgENTAuMBgENTAuMAAZLBUEFQAVAgAVABUQFQIAPBYIGQYZJgACAAAAJgAcFQwZNQAGEBkYDWJfX2lzX251bWVyaWMVAhYCFoABFogBJrQYJoQYHDYAKAR0cnVlGAR0cnVlABksFQQVABUCABUAFRAVAgA8FggZBhkmAAIAAAAmABwVDBk1AAYQGRgJYl9fbGVuZ3RoFQIWAhZ0FnwmuBkmjBkcNgAoAjUwGAI1MAAZLBUEFQAVAgAVABUQFQIAPBYEGQYZJgACAAAAJgAcFQwZNQAGEBkYDGJfX2hpc3RvZ3JhbRUCFgIWuAQWkgQmnhsmiBocNgAoTVt7Im5hbWUiOiAiMC0yNSIsICJwb3B1bGF0aW9uIjogNTAuMH0sIHsibmFtZSI6ICIyNS01MCIsICJwb3B1bGF0aW9uIjogNTAuMH1dGE1beyJuYW1lIjogIjAtMjUiLCAicG9wdWxhdGlvbiI6IDUwLjB9LCB7Im5hbWUiOiAiMjUtNTAiLCAicG9wdWxhdGlvbiI6IDUwLjB9XQAZLBUEFQAVAgAVABUQFQIAPBaaARkGGSYAAgAAACYAHBUMGTUABhAZGBFiX19oaXN0b2dyYW1fYmlucxUCFgIWzgEW1gEm5B4mmh4cNgAoEVsxLjAsIDI1LjAsIDUwLjBdGBFbMS4wLCAyNS4wLCA1MC4wXQAZLBUEFQAVAgAVABUQFQIAPBYiGQYZJgACAAAAJgAcFQwZNQAGEBkYFWJfX2hpc3RvZ3JhbV9sb2dfYmlucxUCFgIWwgEWygEmtiAm8B8cNgAoD1swLjAsIDEuNCwgMS43XRgPWzAuMCwgMS40LCAxLjddABksFQQVABUCABUAFRAVAgA8Fh4ZBhkmAAIAAAAWjiIWAiYIFrIhABkcGAxBUlJPVzpzY2hlbWEYmAovLy8vLzhnREFBQVFBQUFBQUFBS0FBd0FCZ0FGQUFnQUNnQUFBQUFCQkFBTUFBQUFDQUFJQUFBQUJBQUlBQUFBQkFBQUFCSUFBQUJrQXdBQUtBTUFBUHdDQUFEUUFnQUFuQUlBQUd3Q0FBQTRBZ0FBQUFJQUFNUUJBQUNVQVFBQWFBRUFBRHdCQUFBUUFRQUEzQUFBQUt3QUFBQjRBQUFBUUFBQUFBUUFBQUQwL1AvL0FBQUJCUkFBQUFBb0FBQUFCQUFBQUFBQUFBQVZBQUFBWWw5ZmFHbHpkRzluY21GdFgyeHZaMTlpYVc1ekFBQUE4UHovL3l6OS8vOEFBQUVGRUFBQUFDUUFBQUFFQUFBQUFBQUFBQkVBQUFCaVgxOW9hWE4wYjJkeVlXMWZZbWx1Y3dBQUFDVDkvLzlnL2YvL0FBQUJCUkFBQUFBZ0FBQUFCQUFBQUFBQUFBQU1BQUFBWWw5ZmFHbHpkRzluY21GdEFBQUFBRlQ5Ly8rUS9mLy9BQUFCQlJBQUFBQWNBQUFBQkFBQUFBQUFBQUFKQUFBQVlsOWZiR1Z1WjNSb0FBQUFnUDMvLzd6OS8vOEFBQUVGRUFBQUFDQUFBQUFFQUFBQUFBQUFBQTBBQUFCaVgxOXBjMTl1ZFcxbGNtbGpBQUFBc1AzLy8rejkvLzhBQUFFRkVBQUFBQmdBQUFBRUFBQUFBQUFBQUFZQUFBQmlYMTl0WVhnQUFOajkvLzhVL3YvL0FBQUJCUkFBQUFBWUFBQUFCQUFBQUFBQUFBQUdBQUFBWWw5ZmJXbHVBQUFBL3YvL1BQNy8vd0FBQVFVUUFBQUFHQUFBQUFRQUFBQUFBQUFBQndBQUFHSmZYMjFsWVc0QUtQNy8vMlQrLy84QUFBRUZFQUFBQUJ3QUFBQUVBQUFBQUFBQUFBZ0FBQUJpWDE5a2RIbHdaUUFBQUFCVS92Ly9rUDcvL3dBQUFRVVFBQUFBS0FBQUFBUUFBQUFBQUFBQUZRQUFBR0ZmWDJocGMzUnZaM0poYlY5c2IyZGZZbWx1Y3dBQUFJeisvLy9JL3YvL0FBQUJCUkFBQUFBa0FBQUFCQUFBQUFBQUFBQVJBQUFBWVY5ZmFHbHpkRzluY21GdFgySnBibk1BQUFEQS92Ly8vUDcvL3dBQUFRVVFBQUFBSUFBQUFBUUFBQUFBQUFBQURBQUFBR0ZmWDJocGMzUnZaM0poYlFBQUFBRHcvdi8vTFAvLy93QUFBUVVRQUFBQUhBQUFBQVFBQUFBQUFBQUFDUUFBQUdGZlgyeGxibWQwYUFBQUFCei8vLzlZLy8vL0FBQUJCUkFBQUFBZ0FBQUFCQUFBQUFBQUFBQU5BQUFBWVY5ZmFYTmZiblZ0WlhKcFl3QUFBRXovLy8rSS8vLy9BQUFCQlJBQUFBQVlBQUFBQkFBQUFBQUFBQUFHQUFBQVlWOWZiV0Y0QUFCMC8vLy9zUC8vL3dBQUFRVVFBQUFBR0FBQUFBUUFBQUFBQUFBQUJnQUFBR0ZmWDIxcGJnQUFuUC8vLzlqLy8vOEFBQUVGRUFBQUFCZ0FBQUFFQUFBQUFBQUFBQWNBQUFCaFgxOXRaV0Z1QU1ULy8vOFFBQlFBQ0FBR0FBY0FEQUFBQUJBQUVBQUFBQUFBQVFVUUFBQUFJQUFBQUFRQUFBQUFBQUFBQ0FBQUFHRmZYMlIwZVhCbEFBQUFBQVFBQkFBRUFBQUFBQUFBQUE9PQAYIHBhcnF1ZXQtY3BwLWFycm93IHZlcnNpb24gMjEuMC4wGfwSHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAHAAAAOwPAABQQVIx" +} \ No newline at end of file diff --git a/packages/js/static-embed.tsx b/packages/js/static-embed.tsx index 5e57f376..41216c95 100644 --- a/packages/js/static-embed.tsx +++ b/packages/js/static-embed.tsx @@ -14,11 +14,18 @@ async function main() { if (!rootEl) throw new Error("No #root element found"); // Pre-resolve parquet_b64 payloads before React render + console.log("[static-embed] artifact keys:", Object.keys(artifact)); + console.log("[static-embed] df_data format:", artifact.df_data?.format, "type:", typeof artifact.df_data, "isArray:", Array.isArray(artifact.df_data)); + console.log("[static-embed] summary_stats format:", artifact.summary_stats_data?.format, "type:", typeof artifact.summary_stats_data); + const [dfData, summaryStatsData] = await Promise.all([ resolveDFDataAsync(artifact.df_data), resolveDFDataAsync(artifact.summary_stats_data), ]); + console.log("[static-embed] dfData rows:", dfData.length, "first:", JSON.stringify(dfData[0])?.slice(0, 200)); + console.log("[static-embed] summaryStats rows:", summaryStatsData.length, "first:", JSON.stringify(summaryStatsData[0])?.slice(0, 200)); + const resolved: any = { embed_type: artifact.embed_type || "DFViewer", df_data: dfData, diff --git a/tests/unit/lazy_infinite_polars_widget_test.py b/tests/unit/lazy_infinite_polars_widget_test.py index e5f4a2eb..82541eb3 100644 --- a/tests/unit/lazy_infinite_polars_widget_test.py +++ b/tests/unit/lazy_infinite_polars_widget_test.py @@ -1,5 +1,4 @@ import polars as pl -import pandas as pd import base64 from io import BytesIO import json @@ -13,14 +12,47 @@ def _resolve_all_stats(all_stats): - """Resolve all_stats to a list of row dicts, whether it's JSON or parquet_b64.""" + """Resolve all_stats to a list of row dicts, whether it's JSON or parquet_b64. + + Handles both old row-based and new wide-column (col__stat) formats. + """ if isinstance(all_stats, list): return all_stats if isinstance(all_stats, dict) and all_stats.get('format') == 'parquet_b64': + import pyarrow.parquet as pq raw = base64.b64decode(all_stats['data']) - df = pd.read_parquet(BytesIO(raw), engine='pyarrow') + table = pq.read_table(BytesIO(raw)) + col_names = table.column_names + + # Detect wide format: column names contain '__' + if any('__' in c for c in col_names): + row_dict = table.to_pydict() + stat_cols = {} + all_cols = set() + for key in col_names: + sep = key.index('__') + col, stat = key[:sep], key[sep+2:] + all_cols.add(col) + if stat not in stat_cols: + stat_cols[stat] = {} + val = row_dict[key][0] + if isinstance(val, str): + try: + val = json.loads(val) + except (json.JSONDecodeError, ValueError): + pass + stat_cols[stat][col] = val + rows = [] + for stat, cols in stat_cols.items(): + row = {'index': stat, 'level_0': stat} + for c in sorted(all_cols): + row[c] = cols.get(c) + rows.append(row) + return rows + + # Old row-based format fallback + df = table.to_pandas() rows = json.loads(df.to_json(orient='records')) - # JSON-parse each cell (they were JSON-encoded on the Python side) parsed_rows = [] for row in rows: parsed = {} diff --git a/tests/unit/polars_basic_widget_test.py b/tests/unit/polars_basic_widget_test.py index 361a367c..d6943425 100644 --- a/tests/unit/polars_basic_widget_test.py +++ b/tests/unit/polars_basic_widget_test.py @@ -5,7 +5,6 @@ import polars as pl from polars import functions as F import numpy as np -import pandas as pd from buckaroo.pluggable_analysis_framework.polars_analysis_management import ( PolarsAnalysis, polars_produce_series_df) from buckaroo.pluggable_analysis_framework.col_analysis import ( @@ -19,12 +18,46 @@ def _resolve_all_stats(all_stats): - """Resolve all_stats to a list of row dicts, whether it's JSON or parquet_b64.""" + """Resolve all_stats to a list of row dicts, whether it's JSON or parquet_b64. + + Handles both old row-based and new wide-column (col__stat) formats. + """ if isinstance(all_stats, list): return all_stats if isinstance(all_stats, dict) and all_stats.get('format') == 'parquet_b64': + import pyarrow.parquet as pq raw = base64.b64decode(all_stats['data']) - df = pd.read_parquet(BytesIO(raw), engine='pyarrow') + table = pq.read_table(BytesIO(raw)) + col_names = table.column_names + + # Detect wide format: column names contain '__' + if any('__' in c for c in col_names): + row_dict = table.to_pydict() + stat_cols = {} # stat -> {col -> value} + all_cols = set() + for key in col_names: + sep = key.index('__') + col, stat = key[:sep], key[sep+2:] + all_cols.add(col) + if stat not in stat_cols: + stat_cols[stat] = {} + val = row_dict[key][0] + if isinstance(val, str): + try: + val = json.loads(val) + except (json.JSONDecodeError, ValueError): + pass + stat_cols[stat][col] = val + rows = [] + for stat, cols in stat_cols.items(): + row = {'index': stat, 'level_0': stat} + for c in sorted(all_cols): + row[c] = cols.get(c) + rows.append(row) + return rows + + # Old row-based format fallback + df = table.to_pandas() rows = json.loads(df.to_json(orient='records')) parsed_rows = [] for row in rows: diff --git a/tests/unit/test_sd_to_parquet_b64.py b/tests/unit/test_sd_to_parquet_b64.py index 55a2eec0..078d2e9d 100644 --- a/tests/unit/test_sd_to_parquet_b64.py +++ b/tests/unit/test_sd_to_parquet_b64.py @@ -1,26 +1,26 @@ -"""Tests for sd_to_parquet_b64 summary stats serialization. +"""Tests for sd_to_parquet_b64 wide-column summary stats serialization. These verify the Python side of the parquet_b64 transport: encoding -summary stats (including histograms) to parquet and verifying the -round-trip through pyarrow produces correct data that the JS side's -resolveDFData/JSON.parse can consume. +summary stats using one parquet column per (col, stat) pair, with +JSON-encoded cell values. """ import json import base64 from io import BytesIO import numpy as np +import pandas as pd import pyarrow.parquet as pq from buckaroo.serialization_utils import sd_to_parquet_b64 def _decode_parquet_b64(result): - """Decode a parquet_b64 payload back to a DataFrame.""" + """Decode a parquet_b64 payload back to a pyarrow Table.""" assert isinstance(result, dict) assert result['format'] == 'parquet_b64' raw = base64.b64decode(result['data']) - return pq.read_table(BytesIO(raw)).to_pandas() + return pq.read_table(BytesIO(raw)) def test_sd_to_parquet_b64_returns_tagged_dict(): @@ -30,31 +30,46 @@ def test_sd_to_parquet_b64_returns_tagged_dict(): assert isinstance(result['data'], str) -def test_sd_to_parquet_b64_round_trip_scalars(): +def test_sd_to_parquet_b64_wide_column_layout(): + """Verify the wide-column layout: one column per (col, stat) pair.""" sd = { 'col_a': { 'dtype': 'float64', 'mean': np.float64(42.0), - 'min': np.float64(0.0), - 'max': np.float64(100.0), }, } result = sd_to_parquet_b64(sd) - df = _decode_parquet_b64(result) + table = _decode_parquet_b64(result) - # Find the mean row and verify the value round-trips - mean_row = df[df['index'] == 'mean'] - assert len(mean_row) == 1 - cell = mean_row.iloc[0]['a'] # column 'col_a' becomes 'a' - assert json.loads(cell) == 42.0 + assert table.num_rows == 1 + col_names = table.column_names + assert 'a__dtype' in col_names + assert 'a__mean' in col_names -def test_sd_to_parquet_b64_histogram_round_trip(): - """Verify histogram arrays survive the parquet_b64 round-trip. +def test_sd_to_parquet_b64_scalars_round_trip(): + """Scalar values round-trip through JSON encoding in parquet.""" + sd = { + 'col_a': { + 'dtype': 'float64', + 'mean': np.float64(42.0), + 'is_numeric': True, + 'length': 50, + }, + } + result = sd_to_parquet_b64(sd) + table = _decode_parquet_b64(result) + row = table.to_pydict() + + # All values are JSON-encoded strings in parquet + assert json.loads(row['a__mean'][0]) == 42.0 + assert json.loads(row['a__dtype'][0]) == 'float64' + assert json.loads(row['a__is_numeric'][0]) is True + assert json.loads(row['a__length'][0]) == 50 - This is the key test for #630: histogram data must be JSON-decodable - from the parquet payload with correct types (numbers, not strings). - """ + +def test_sd_to_parquet_b64_histogram_round_trip(): + """Histogram arrays survive the round-trip as JSON strings.""" histogram = [ {'name': '0.0 - 1.0', 'tail': 1}, {'name': '1-20', 'population': np.float64(15.0)}, @@ -68,23 +83,13 @@ def test_sd_to_parquet_b64_histogram_round_trip(): }, } result = sd_to_parquet_b64(sd) - df = _decode_parquet_b64(result) + table = _decode_parquet_b64(result) + row = table.to_pydict() - hist_row = df[df['index'] == 'histogram'] - assert len(hist_row) == 1 - - cell = hist_row.iloc[0]['a'] - assert isinstance(cell, str), "histogram cell should be a JSON string in parquet" - - parsed = json.loads(cell) - assert isinstance(parsed, list), "histogram should parse as a list" + parsed = json.loads(row['a__histogram'][0]) + assert isinstance(parsed, list) assert len(parsed) == 4 - - # Verify types: numbers must be numbers, not strings assert parsed[0] == {'name': '0.0 - 1.0', 'tail': 1} - assert isinstance(parsed[0]['tail'], int) - - assert parsed[1]['name'] == '1-20' assert isinstance(parsed[1]['population'], float) assert parsed[1]['population'] == 15.0 @@ -98,11 +103,10 @@ def test_sd_to_parquet_b64_categorical_histogram(): ] sd = {'col': {'histogram': histogram, 'dtype': 'object'}} result = sd_to_parquet_b64(sd) - df = _decode_parquet_b64(result) - - hist_row = df[df['index'] == 'histogram'] - parsed = json.loads(hist_row.iloc[0]['a']) + table = _decode_parquet_b64(result) + row = table.to_pydict() + parsed = json.loads(row['a__histogram'][0]) assert parsed[0] == {'name': 'foo', 'cat_pop': 40.0} assert isinstance(parsed[0]['cat_pop'], float) assert parsed[2] == {'name': 'longtail', 'longtail': 15.0} @@ -114,9 +118,39 @@ def test_sd_to_parquet_b64_multiple_columns(): 'y': {'mean': np.float64(2.0), 'dtype': 'int64'}, } result = sd_to_parquet_b64(sd) - df = _decode_parquet_b64(result) + table = _decode_parquet_b64(result) + row = table.to_pydict() + + assert json.loads(row['a__mean'][0]) == 1.0 + assert json.loads(row['b__mean'][0]) == 2.0 + assert json.loads(row['a__dtype'][0]) == 'float64' + assert json.loads(row['b__dtype'][0]) == 'int64' + + +def test_sd_to_parquet_b64_nan_encoded(): + """NaN values are JSON-encoded via default=str as 'NaN'.""" + sd = {'col': {'mean': np.nan, 'dtype': 'float64'}} + result = sd_to_parquet_b64(sd) + table = _decode_parquet_b64(result) + row = table.to_pydict() + + # NaN goes through json.dumps(default=str) → "NaN" string + cell = row['a__mean'][0] + assert isinstance(cell, str) + assert json.loads(row['a__dtype'][0]) == 'float64' + + +def test_sd_to_parquet_b64_value_counts_series(): + """pd.Series values are JSON-encoded via default=str.""" + sd = { + 'col': { + 'value_counts': pd.Series({'foo': 10, 'bar': 5}), + 'dtype': 'object', + }, + } + result = sd_to_parquet_b64(sd) + table = _decode_parquet_b64(result) + row = table.to_pydict() - # Columns are rewritten to 'a', 'b' by prepare_df_for_serialization - mean_row = df[df['index'] == 'mean'] - assert json.loads(mean_row.iloc[0]['a']) == 1.0 - assert json.loads(mean_row.iloc[0]['b']) == 2.0 + cell = row['a__value_counts'][0] + assert isinstance(cell, str) diff --git a/tests/unit/test_widget_weird_types.py b/tests/unit/test_widget_weird_types.py index 05a500d7..46b5c961 100644 --- a/tests/unit/test_widget_weird_types.py +++ b/tests/unit/test_widget_weird_types.py @@ -33,8 +33,37 @@ def _resolve_all_stats(all_stats): if isinstance(all_stats, list): return all_stats if isinstance(all_stats, dict) and all_stats.get('format') == 'parquet_b64': + import pyarrow.parquet as pq raw = base64.b64decode(all_stats['data']) - df = pd.read_parquet(BytesIO(raw), engine='pyarrow') + table = pq.read_table(BytesIO(raw)) + col_names = table.column_names + + if any('__' in c for c in col_names): + row_dict = table.to_pydict() + stat_cols = {} + all_cols = set() + for key in col_names: + sep = key.index('__') + col, stat = key[:sep], key[sep+2:] + all_cols.add(col) + if stat not in stat_cols: + stat_cols[stat] = {} + val = row_dict[key][0] + if isinstance(val, str): + try: + val = json.loads(val) + except (json.JSONDecodeError, ValueError): + pass + stat_cols[stat][col] = val + rows = [] + for stat, cols in stat_cols.items(): + row = {'index': stat, 'level_0': stat} + for c in sorted(all_cols): + row[c] = cols.get(c) + rows.append(row) + return rows + + df = table.to_pandas() rows = json.loads(df.to_json(orient='records')) parsed_rows = [] for row in rows: