Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion web/packages/studio/src/constants/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ export const DEFAULT_API_ERR_MSG = 'Invalid API response. Please try again later
export const DEFAULT_TOOLS_FILE_NAME = 'tools.json';
export const EMPTY_FIELD_VALUE = '-';
export const EMPTY_FIELD_EMDASH_VALUE = '—';
export const DEFAULT_BUILD_MODEL_NAME = 'nvidia-nvidia-llama-3-3-nemotron-super-49b-v1-5';
export const DEFAULT_BUILD_MODEL_NAME = 'nvidia-llama-3-3-nemotron-super-49b-v1';
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { Stack, Text } from '@nvidia/foundations-react-core';
import type { CategoricalHistogramData } from '@studio/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes';
import { FC, useMemo } from 'react';
import {
Bar,
BarChart,
CartesianGrid,
LabelList,
ResponsiveContainer,
Tooltip,
XAxis,
YAxis,
} from 'recharts';

interface CategoricalHistogramChartProps {
histogram: CategoricalHistogramData;
}

/** Show at most this many bars; remaining categories are summarized below. */
const MAX_BARS = 12;
const CHART_HEIGHT = 220;
const TICK_STYLE = { fontSize: 11, fill: 'var(--text-color-base)' } as const;

interface HistogramBar {
label: string;
count: number;
}

/** Truncate long category labels so the axis stays legible. */
const truncateLabel = (label: string): string =>
label.length > 14 ? `${label.slice(0, 13)}…` : label;

/**
* Vertical bar chart of a categorical sampler column's value distribution.
* Bars are sorted by frequency and capped at {@link MAX_BARS}; any overflow is
* surfaced as a "+N more categories" note so the chart stays readable.
*/
export const CategoricalHistogramChart: FC<CategoricalHistogramChartProps> = ({ histogram }) => {
const { bars, hiddenCount, hiddenTotal } = useMemo(() => {
const all: HistogramBar[] = histogram.categories.map((category, index) => ({
label: String(category),
count: histogram.counts[index] ?? 0,
}));
all.sort((a, b) => b.count - a.count);
const visible = all.slice(0, MAX_BARS);
const hidden = all.slice(MAX_BARS);
return {
bars: visible,
hiddenCount: hidden.length,
hiddenTotal: hidden.reduce((sum, bar) => sum + bar.count, 0),
};
}, [histogram]);

if (bars.length === 0) {
return (
<Text kind="body/regular/sm" className="text-muted">
No category counts available.
</Text>
);
}

return (
<Stack gap="density-sm">
<ResponsiveContainer width="100%" height={CHART_HEIGHT}>
<BarChart data={bars} margin={{ top: 16, bottom: 8, left: 0, right: 8 }}>
<CartesianGrid strokeDasharray="3 3" vertical={false} stroke="var(--border-color-base)" />
<XAxis
dataKey="label"
tick={TICK_STYLE}
tickFormatter={truncateLabel}
tickLine={false}
interval={0}
angle={-35}
textAnchor="end"
height={56}
/>
<YAxis tick={TICK_STYLE} width={40} allowDecimals={false} tickLine={false} />
<Tooltip
cursor={{ fill: 'var(--background-color-accent-gray-subtle)' }}
contentStyle={{
fontSize: 12,
backgroundColor: 'var(--background-color-component-tooltip)',
borderColor: 'var(--border-color-base)',
color: 'var(--text-color-base)',
}}
labelStyle={{ color: 'var(--text-color-base)' }}
itemStyle={{ color: 'var(--text-color-base)' }}
formatter={(value: number) => [value.toLocaleString(), 'Count']}
/>
<Bar dataKey="count" name="Count" fill="var(--text-color-brand)" radius={[4, 4, 0, 0]}>
<LabelList
dataKey="count"
position="top"
fill="var(--text-color-base)"
fontSize={11}
formatter={(value: number) => value.toLocaleString()}
/>
</Bar>
</BarChart>
</ResponsiveContainer>
{hiddenCount > 0 && (
<Text kind="body/regular/sm" className="text-muted">
+{hiddenCount} more {hiddenCount === 1 ? 'category' : 'categories'} (
{hiddenTotal.toLocaleString()} records)
</Text>
)}
</Stack>
);
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import { Badge, Card, Divider, Flex, Stack, Text } from '@nvidia/foundations-react-core';
import { CategoricalHistogramChart } from '@studio/routes/DataDesignerJobDetailsRoute/CategoricalHistogramChart';
import {
formatPercent,
formatStatCount,
formatStatDecimal,
getCategoricalHistogram,
getColumnTypeLabel,
getNumericalDistribution,
getPercentNull,
getPercentUnique,
isLLMColumnStatistics,
isValidationColumnStatistics,
type ColumnStatistics,
} from '@studio/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes';
import { FC } from 'react';

interface StatProps {
label: string;
value: string;
}

const Stat: FC<StatProps> = ({ label, value }) => (
<Stack gap="density-xxs" className="min-w-0">
<Text kind="body/regular/xs" className="text-muted uppercase tracking-wide">
{label}
</Text>
<Text kind="body/regular/md" className="truncate">
{value}
</Text>
</Stack>
);

interface ColumnProfileCardProps {
stats: ColumnStatistics;
}

/**
* Builds the column-specific detail body: a bar chart for categorical sampler
* distributions, a numeric summary for numerical samplers, token usage for LLM
* columns, and valid-record counts for validation columns. Returns `null` when
* a column has no extra detail (e.g. a uuid sampler), so the caller can skip the
* divider rather than render an empty section.
*/
const renderColumnDetail = (stats: ColumnStatistics): React.ReactNode => {
const histogram = getCategoricalHistogram(stats);
if (histogram) {
return <CategoricalHistogramChart histogram={histogram} />;
}

const numerical = getNumericalDistribution(stats);
if (numerical) {
return (
<Flex gap="density-xl" className="flex-wrap">
<Stat label="Min" value={formatStatDecimal(numerical.min)} />
<Stat label="Max" value={formatStatDecimal(numerical.max)} />
<Stat label="Mean" value={formatStatDecimal(numerical.mean)} />
<Stat label="Median" value={formatStatDecimal(numerical.median)} />
<Stat label="Std dev" value={formatStatDecimal(numerical.stddev)} />
</Flex>
);
}

if (isLLMColumnStatistics(stats)) {
return (
<Flex gap="density-xl" className="flex-wrap">
<Stat label="Input tokens (avg)" value={formatStatDecimal(stats.input_tokens_mean)} />
<Stat label="Output tokens (avg)" value={formatStatDecimal(stats.output_tokens_mean)} />
</Flex>
);
}

if (isValidationColumnStatistics(stats)) {
return (
<Flex gap="density-xl" className="flex-wrap">
<Stat label="Valid records" value={formatStatCount(stats.num_valid_records)} />
</Flex>
);
}

return null;
};

/** A single column's profile rendered as a self-contained card for the grid. */
export const ColumnProfileCard: FC<ColumnProfileCardProps> = ({ stats }) => {
const detail = renderColumnDetail(stats);

return (
<Card className="h-full">
<Stack gap="density-md" className="h-full">
<Stack gap="density-md">
<Stack gap="density-xxs">
<Flex justify="between" align="center" gap="density-sm">
<Text kind="body/bold/md" className="truncate font-mono">
{stats.column_name}
</Text>
<Badge kind="outline">{getColumnTypeLabel(stats)}</Badge>
</Flex>
<Text kind="body/regular/xs" className="text-muted font-mono">
{stats.simple_dtype}
</Text>
</Stack>

<Flex gap="density-xl" className="flex-wrap">
<Stat label="Records" value={formatStatCount(stats.num_records)} />
<Stat label="Null %" value={formatPercent(getPercentNull(stats))} />
<Stat label="Unique %" value={formatPercent(getPercentUnique(stats))} />
</Flex>
{detail && <Divider />}
</Stack>

{detail}
</Stack>
</Card>
);
};
Loading