buckaroo-data · paddymul · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/buckaroo/artifact.py b/buckaroo/artifact.py
@@ -29,7 +29,7 @@ def _df_to_parquet_b64_tagged(df: pd.DataFrame) -> dict:
     JSON-encoded per cell (same convention as sd_to_parquet_b64) so the
     JS side can decode them uniformly via parseParquetRow().
 
-    Returns {'format': 'parquet_b64', 'data': '<base64 string>'}
+    Returns {'format': 'parquet_b64', 'layout': 'row', 'data': '<base64 string>'}
     """
     df2 = prepare_df_for_serialization(df)
     if not isinstance(df.index, pd.MultiIndex):
@@ -55,7 +55,7 @@ def _df_to_parquet_b64_tagged(df: pd.DataFrame) -> dict:
     df2.to_parquet(buf, engine='pyarrow')
     buf.seek(0)
     b64 = base64.b64encode(buf.read()).decode('ascii')
-    return {'format': 'parquet_b64', 'data': b64}
+    return {'format': 'parquet_b64', 'layout': 'row', 'data': b64}
 
 
 def prepare_buckaroo_artifact(df, column_config_overrides=None,

diff --git a/buckaroo/serialization_utils.py b/buckaroo/serialization_utils.py
@@ -271,35 +271,37 @@ def _json_encode_cell(val):
 def sd_to_parquet_b64(sd: Dict[str, Any]) -> Dict[str, str]:
     """Convert a summary stats dict to a tagged parquet-b64 payload.
 
-    Summary stats DataFrames have mixed-type columns (strings, numbers, lists)
-    which fastparquet can't handle directly. We JSON-encode every cell value
-    first so each column becomes a pure string column, then use pyarrow for
-    parquet serialization. The JS side decodes parquet then JSON.parse's each cell.
+    Uses a wide-column layout: one parquet column per (col, stat) pair.
+    Column names are ``{short_col}__{stat_name}`` (e.g. ``a__mean``).
+    The parquet file has a single row. All cell values are JSON-encoded
+    via ``_json_encode_cell()`` so the JS side can ``JSON.parse`` each one.
 
-    Returns {'format': 'parquet_b64', 'data': '<base64 string>'}
+    Returns ``{'format': 'parquet_b64', 'layout': 'wide', 'data': '<base64>'}``
     Falls back to JSON if parquet serialization fails.
     """
-    # JSON-encode every value so parquet sees only string columns
-    json_sd: Dict[str, Any] = {}
-    for col, stats in sd.items():
-        if isinstance(stats, dict):
-            json_sd[col] = {k: _json_encode_cell(v) for k, v in stats.items()}
-        else:
-            json_sd[col] = stats
+    import pyarrow as pa
+    import pyarrow.parquet as pq
 
-    df = pd.DataFrame(json_sd)
-    df2 = prepare_df_for_serialization(df)
-    # Add level_0 for backwards compatibility with JSON path (pd_to_obj adds it)
-    if not isinstance(df.index, pd.MultiIndex):
-        df2['level_0'] = df2['index']
+    col_mapping = [(orig, to_chars(i)) for i, orig in enumerate(sd.keys())]
+    names: List[str] = []
+    arrays: List = []
+
+    for orig_col, short_col in col_mapping:
+        stats = sd[orig_col]
+        if not isinstance(stats, dict):
+            continue
+        for stat_name, val in stats.items():
+            names.append(f"{short_col}__{stat_name}")
+            arrays.append(pa.array([_json_encode_cell(val)]))
 
     try:
+        table = pa.table(dict(zip(names, arrays)))
         data = BytesIO()
-        df2.to_parquet(data, engine='pyarrow')
+        pq.write_table(table, data)
         data.seek(0)
         raw_bytes = data.read()
         b64 = base64.b64encode(raw_bytes).decode('ascii')
-        return {'format': 'parquet_b64', 'data': b64}
+        return {'format': 'parquet_b64', 'layout': 'wide', 'data': b64}
     except Exception as e:
         logger.warning("Failed to serialize summary stats as parquet, falling back to JSON: %r", e)
         return pd_to_obj(pd.DataFrame(sd))

diff --git a/packages/buckaroo-js-core/pw-tests/static-embed.spec.ts b/packages/buckaroo-js-core/pw-tests/static-embed.spec.ts
@@ -15,8 +15,21 @@ import { waitForCells, getRowCount } from './ag-pw-utils';
 test.describe('Static embed renders', () => {
 
     test('AG-Grid table appears with data rows', async ({ page }) => {
+        // Capture all console messages for debugging
+        const logs: string[] = [];
+        page.on('console', msg => logs.push(`[${msg.type()}] ${msg.text()}`));
+        page.on('pageerror', err => logs.push(`[PAGE_ERROR] ${err.message}`));
+
         await page.goto('/static-test.html');
 
+        // Give 5s for initial load, then dump console
+        await page.waitForTimeout(5000);
+        console.log('--- Browser console output ---');
+        for (const log of logs) console.log(log);
+        console.log('--- End browser console ---');
+        console.log('Page title:', await page.title());
+        console.log('Body text (first 500):', (await page.locator('body').innerText()).slice(0, 500));
+
         // Wait for the AG-Grid cells to render (parquet decode + React mount)
         await waitForCells(page);
 

diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/ChartCell.tsx b/packages/buckaroo-js-core/src/components/DFViewerParts/ChartCell.tsx
@@ -19,7 +19,7 @@ export const formatter = (value: any, name: any, props: any) => {
     }
 };
 
-export function FloatingTooltip({ items, x, y }: any) {
+export function FloatingTooltip({ items, x, y }: any): React.ReactPortal {
     const offset = 30;
     const renderedItems = items.map((name: [string, number], _value: number | string) => {
         const [realName, realValue] = name;

diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/DFWhole.ts b/packages/buckaroo-js-core/src/components/DFViewerParts/DFWhole.ts
@@ -216,6 +216,7 @@ export type DFData = DFDataRow[];
 export interface ParquetB64Payload {
     format: 'parquet_b64';
     data: string;  // base64-encoded parquet bytes
+    layout?: 'wide' | 'row';  // 'wide' = summary stats (col__stat columns), 'row' = normal rows
 }
 
 // A value in df_data_dict can be plain JSON (DFData) or a tagged parquet payload

diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/HistogramCell.tsx b/packages/buckaroo-js-core/src/components/DFViewerParts/HistogramCell.tsx
@@ -20,7 +20,7 @@ export const formatter = (value: any, name: any, props: any) => {
     }
 };
 
-export function FloatingTooltip({ items, x, y }: any) {
+export function FloatingTooltip({ items, x, y }: any): React.ReactPortal {
     const offset = 30;
     const renderedItems = items.map((name: [string, number], _value: number | string) => {
         const [realName, realValue] = name;

diff --git a/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.test.ts b/packages/buckaroo-js-core/src/components/DFViewerParts/resolveDFData.test.ts
@@ -1,9 +1,8 @@
 import { parquetRead, parquetMetadata } from 'hyparquet';
-import { resolveDFData, resolveDFDataAsync } from './resolveDFData';
+import { resolveDFData, resolveDFDataAsync, pivotWideSummaryStats } from './resolveDFData';
 import { DFData, DFDataRow, ParquetB64Payload } from './DFWhole';
 
-// Fixture generated by Python's sd_to_parquet_b64() with a summary stats dict
-// containing numeric histogram data for one column.
+// Fixture generated by Python's sd_to_parquet_b64() with wide-column layout.
 // eslint-disable-next-line @typescript-eslint/no-var-requires
 const fixture = require('./test-fixtures/summary_stats_parquet_b64.json');
 const parquetPayload: ParquetB64Payload = fixture as ParquetB64Payload;
@@ -29,9 +28,7 @@ describe('resolveDFData', () => {
         expect(resolveDFData(data)).toBe(data);
     });
 
-    it('hyparquet can read the parquet_b64 fixture', async () => {
-        // Verify the fixture is valid and hyparquet can decode it.
-        // This is independent of resolveDFData — it tests the raw decode path.
+    it('hyparquet can read the wide-format parquet_b64 fixture', async () => {
         const buf = b64ToArrayBuffer(parquetPayload.data);
         const metadata = parquetMetadata(buf);
         expect(metadata.row_groups.length).toBeGreaterThan(0);
@@ -44,60 +41,124 @@ describe('resolveDFData', () => {
             onComplete: (data: any[]) => { rows.push(...data); },
         });
 
-        expect(rows.length).toBeGreaterThan(0);
+        // Wide format: single row with col__stat columns
+        expect(rows.length).toBe(1);
+        const keys = Object.keys(rows[0]);
+        expect(keys.some(k => k.includes('__'))).toBe(true);
+        expect(keys).toContain('a__mean');
+        expect(keys).toContain('b__dtype');
+    });
 
-        // Should have an 'index' column with stat names
-        const indices = rows.map(r => r.index).filter(Boolean);
-        expect(indices).toContain('histogram');
-        expect(indices).toContain('dtype');
+    it('sync resolveDFData returns [] for parquet_b64 (known async limitation)', () => {
+        const result = resolveDFData(parquetPayload);
+        expect(result.length).toBe(0);
     });
 
-    it('parquet_b64 histogram data round-trips with correct types', async () => {
-        // Decode the fixture and verify histogram arrays have the right structure.
-        const buf = b64ToArrayBuffer(parquetPayload.data);
-        const metadata = parquetMetadata(buf);
+    it('async resolveDFDataAsync returns pivoted DFData for wide-format parquet', async () => {
+        const result = await resolveDFDataAsync(parquetPayload);
+        expect(result.length).toBeGreaterThan(0);
 
-        const rows: DFDataRow[] = [];
-        await parquetRead({
-            file: buf,
-            metadata,
-            rowFormat: 'object',
-            onComplete: (data: any[]) => { rows.push(...data); },
-        });
+        // Should have row-based format with index column
+        const meanRow = result.find(r => r.index === 'mean');
+        expect(meanRow).toBeDefined();
+        expect(meanRow!.a).toBe(50.0);
+        expect(meanRow!.b).toBe(22.0);
 
-        const histRow = rows.find(r => r.index === 'histogram');
-        expect(histRow).toBeDefined();
+        const dtypeRow = result.find(r => r.index === 'dtype');
+        expect(dtypeRow).toBeDefined();
+        expect(dtypeRow!.a).toBe('float64');
+        expect(dtypeRow!.b).toBe('int64');
+    });
 
-        // Column 'a' contains the JSON-encoded histogram array
-        const rawCell = histRow!['a'];
-        expect(typeof rawCell).toBe('string');
+    it('async decode produces histogram arrays from JSON strings', async () => {
+        const result = await resolveDFDataAsync(parquetPayload);
 
-        const parsed = JSON.parse(rawCell as string);
-        expect(Array.isArray(parsed)).toBe(true);
-        expect(parsed.length).toBeGreaterThan(0);
+        const histRow = result.find(r => r.index === 'histogram');
+        expect(histRow).toBeDefined();
+        expect(Array.isArray(histRow!.a)).toBe(true);
+        const hist = histRow!.a as any[];
+        expect(hist.length).toBe(5);
+        expect(typeof hist[0].population).toBe('number');
+        expect(hist[0].name).toBe('0-20');
+    });
 
-        // Verify types: population should be a number, not a string
-        const popBar = parsed.find((b: any) => b.population !== undefined);
-        expect(popBar).toBeDefined();
-        expect(typeof popBar.population).toBe('number');
-        expect(typeof parsed[0].name).toBe('string');
+    it('async decode produces histogram_bins arrays', async () => {
+        const result = await resolveDFDataAsync(parquetPayload);
+
+        const binsRow = result.find(r => r.index === 'histogram_bins');
+        expect(binsRow).toBeDefined();
+        expect(Array.isArray(binsRow!.a)).toBe(true);
+        expect((binsRow!.a as number[]).length).toBe(6);
     });
+});
 
-    it('sync resolveDFData returns [] for parquet_b64 (known async limitation)', () => {
-        // Documents #630: parquetRead is async so the sync wrapper returns [].
-        // Widget components use useResolvedDFDataDict which falls back to async.
-        // The static embed path uses resolveDFDataAsync which works correctly.
-        const result = resolveDFData(parquetPayload);
-        expect(result.length).toBe(0);
+describe('pivotWideSummaryStats', () => {
+    it('pivots a wide row into row-based DFData', () => {
+        const wideRow = {
+            a__mean: 42.5,
+            a__dtype: 'float64',
+            b__mean: 10.0,
+            b__dtype: 'int64',
+        };
+        const result = pivotWideSummaryStats(wideRow);
+
+        const meanRow = result.find(r => r.index === 'mean');
+        expect(meanRow).toBeDefined();
+        expect(meanRow!.a).toBe(42.5);
+        expect(meanRow!.b).toBe(10.0);
+        expect(meanRow!.level_0).toBe('mean');
+
+        const dtypeRow = result.find(r => r.index === 'dtype');
+        expect(dtypeRow).toBeDefined();
+        expect(dtypeRow!.a).toBe('float64');
+        expect(dtypeRow!.b).toBe('int64');
     });
 
-    it('async resolveDFDataAsync returns non-empty result for parquet_b64', async () => {
-        const result = await resolveDFDataAsync(parquetPayload);
-        expect(result.length).toBeGreaterThan(0);
+    it('JSON-parses list/object values in string cells', () => {
+        const wideRow = {
+            a__histogram: '[{"name": "foo", "population": 10}]',
+            a__dtype: 'float64',
+        };
+        const result = pivotWideSummaryStats(wideRow);
 
-        // Verify the histogram row was JSON-parsed correctly
         const histRow = result.find(r => r.index === 'histogram');
         expect(histRow).toBeDefined();
-        expect(Array.isArray(histRow!['a'])).toBe(true);
+        expect(Array.isArray(histRow!.a)).toBe(true);
+        expect((histRow!.a as any[])[0].population).toBe(10);
+    });
+
+    it('keeps plain strings as strings (not JSON-parsed)', () => {
+        const wideRow = {
+            a__dtype: 'float64',
+        };
+        const result = pivotWideSummaryStats(wideRow);
+        const row = result.find(r => r.index === 'dtype');
+        expect(row!.a).toBe('float64');
+    });
+
+    it('handles null values', () => {
+        const wideRow = {
+            a__mean: null,
+            a__dtype: 'float64',
+        };
+        const result = pivotWideSummaryStats(wideRow);
+        const meanRow = result.find(r => r.index === 'mean');
+        expect(meanRow!.a).toBeNull();
+    });
+
+    it('fills missing columns with null', () => {
+        const wideRow = {
+            a__mean: 42,
+            b__dtype: 'int64',
+        };
+        const result = pivotWideSummaryStats(wideRow);
+
+        const meanRow = result.find(r => r.index === 'mean');
+        expect(meanRow!.a).toBe(42);
+        expect(meanRow!.b).toBeNull();
+
+        const dtypeRow = result.find(r => r.index === 'dtype');
+        expect(dtypeRow!.a).toBeNull();
+        expect(dtypeRow!.b).toBe('int64');
     });
 });