diff --git a/web/packages/studio/src/constants/constants.ts b/web/packages/studio/src/constants/constants.ts index fade534278..3ecfa27f2c 100644 --- a/web/packages/studio/src/constants/constants.ts +++ b/web/packages/studio/src/constants/constants.ts @@ -19,4 +19,4 @@ export const DEFAULT_API_ERR_MSG = 'Invalid API response. Please try again later export const DEFAULT_TOOLS_FILE_NAME = 'tools.json'; export const EMPTY_FIELD_VALUE = '-'; export const EMPTY_FIELD_EMDASH_VALUE = '—'; -export const DEFAULT_BUILD_MODEL_NAME = 'nvidia-nvidia-llama-3-3-nemotron-super-49b-v1-5'; +export const DEFAULT_BUILD_MODEL_NAME = 'nvidia-llama-3-3-nemotron-super-49b-v1'; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/CategoricalHistogramChart.tsx b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/CategoricalHistogramChart.tsx new file mode 100644 index 0000000000..e6d6893abe --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/CategoricalHistogramChart.tsx @@ -0,0 +1,112 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Stack, Text } from '@nvidia/foundations-react-core'; +import type { CategoricalHistogramData } from '@studio/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes'; +import { FC, useMemo } from 'react'; +import { + Bar, + BarChart, + CartesianGrid, + LabelList, + ResponsiveContainer, + Tooltip, + XAxis, + YAxis, +} from 'recharts'; + +interface CategoricalHistogramChartProps { + histogram: CategoricalHistogramData; +} + +/** Show at most this many bars; remaining categories are summarized below. */ +const MAX_BARS = 12; +const CHART_HEIGHT = 220; +const TICK_STYLE = { fontSize: 11, fill: 'var(--text-color-base)' } as const; + +interface HistogramBar { + label: string; + count: number; +} + +/** Truncate long category labels so the axis stays legible. */ +const truncateLabel = (label: string): string => + label.length > 14 ? `${label.slice(0, 13)}…` : label; + +/** + * Vertical bar chart of a categorical sampler column's value distribution. + * Bars are sorted by frequency and capped at {@link MAX_BARS}; any overflow is + * surfaced as a "+N more categories" note so the chart stays readable. + */ +export const CategoricalHistogramChart: FC = ({ histogram }) => { + const { bars, hiddenCount, hiddenTotal } = useMemo(() => { + const all: HistogramBar[] = histogram.categories.map((category, index) => ({ + label: String(category), + count: histogram.counts[index] ?? 0, + })); + all.sort((a, b) => b.count - a.count); + const visible = all.slice(0, MAX_BARS); + const hidden = all.slice(MAX_BARS); + return { + bars: visible, + hiddenCount: hidden.length, + hiddenTotal: hidden.reduce((sum, bar) => sum + bar.count, 0), + }; + }, [histogram]); + + if (bars.length === 0) { + return ( + + No category counts available. + + ); + } + + return ( + + + + + + + [value.toLocaleString(), 'Count']} + /> + + value.toLocaleString()} + /> + + + + {hiddenCount > 0 && ( + + +{hiddenCount} more {hiddenCount === 1 ? 'category' : 'categories'} ( + {hiddenTotal.toLocaleString()} records) + + )} + + ); +}; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/ColumnProfileCard.tsx b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/ColumnProfileCard.tsx new file mode 100644 index 0000000000..a67885de02 --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/ColumnProfileCard.tsx @@ -0,0 +1,119 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Badge, Card, Divider, Flex, Stack, Text } from '@nvidia/foundations-react-core'; +import { CategoricalHistogramChart } from '@studio/routes/DataDesignerJobDetailsRoute/CategoricalHistogramChart'; +import { + formatPercent, + formatStatCount, + formatStatDecimal, + getCategoricalHistogram, + getColumnTypeLabel, + getNumericalDistribution, + getPercentNull, + getPercentUnique, + isLLMColumnStatistics, + isValidationColumnStatistics, + type ColumnStatistics, +} from '@studio/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes'; +import { FC } from 'react'; + +interface StatProps { + label: string; + value: string; +} + +const Stat: FC = ({ label, value }) => ( + + + {label} + + + {value} + + +); + +interface ColumnProfileCardProps { + stats: ColumnStatistics; +} + +/** + * Builds the column-specific detail body: a bar chart for categorical sampler + * distributions, a numeric summary for numerical samplers, token usage for LLM + * columns, and valid-record counts for validation columns. Returns `null` when + * a column has no extra detail (e.g. a uuid sampler), so the caller can skip the + * divider rather than render an empty section. + */ +const renderColumnDetail = (stats: ColumnStatistics): React.ReactNode => { + const histogram = getCategoricalHistogram(stats); + if (histogram) { + return ; + } + + const numerical = getNumericalDistribution(stats); + if (numerical) { + return ( + + + + + + + + ); + } + + if (isLLMColumnStatistics(stats)) { + return ( + + + + + ); + } + + if (isValidationColumnStatistics(stats)) { + return ( + + + + ); + } + + return null; +}; + +/** A single column's profile rendered as a self-contained card for the grid. */ +export const ColumnProfileCard: FC = ({ stats }) => { + const detail = renderColumnDetail(stats); + + return ( + + + + + + + {stats.column_name} + + {getColumnTypeLabel(stats)} + + + {stats.simple_dtype} + + + + + + + + + {detail && } + + + {detail} + + + ); +}; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/DataDesignerConfigPanel.tsx b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/DataDesignerConfigPanel.tsx new file mode 100644 index 0000000000..99bd4e5ca0 --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/DataDesignerConfigPanel.tsx @@ -0,0 +1,177 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { KVPair } from '@nemo/common/src/components/KVPair'; +import { Banner, Divider, SidePanel, Spinner, Stack, Text } from '@nvidia/foundations-react-core'; +import { useDatasetFileContent } from '@studio/api/datasets/useDatasetFileContent'; +import { + BUILDER_CONFIG_FILENAME, + formatColumnTypeBreakdown, + summarizeBuilderConfig, + type BuilderConfigSummary, +} from '@studio/routes/DataDesignerJobDetailsRoute/builderConfig'; +import { useDataDesignerArtifactsFileset } from '@studio/routes/DataDesignerJobDetailsRoute/useDataDesignerArtifactsFileset'; +import { useMemo, type FC } from 'react'; + +export interface DataDesignerConfigPanelProps { + open: boolean; + onClose: () => void; +} + +const ConfigSummary: FC<{ summary: BuilderConfigSummary }> = ({ summary }) => ( + + + Overview + 0 + ? `${summary.columnCount} (${formatColumnTypeBreakdown(summary)})` + : '0' + } + /> + {summary.seed ? ( + + ) : null} + + + 0 ? summary.processorNames.join(', ') : '0'} + /> + {summary.libraryVersion ? ( + + ) : null} + + + {summary.models.length > 0 ? ( + <> + + + Models + {summary.models.map((model, index) => ( + + ))} + + + ) : null} + + {summary.columns.length > 0 ? ( + <> + + + Columns + {summary.columns.map((column, index) => ( + + ))} + + + ) : null} + +); + +export const DataDesignerConfigPanel: FC = ({ open, onClose }) => { + const { filesetWorkspace, filesetName, files, isResultsLoading, isFilesLoading } = + useDataDesignerArtifactsFileset(); + + const builderConfigPath = useMemo( + () => + files.find( + (file) => + file.path === BUILDER_CONFIG_FILENAME || file.path.endsWith(`/${BUILDER_CONFIG_FILENAME}`) + )?.path, + [files] + ); + + const { + data: rawContent, + isLoading: isContentLoading, + isError: isContentError, + } = useDatasetFileContent({ + workspace: filesetWorkspace, + name: filesetName, + path: builderConfigPath ?? '', + enabled: open && Boolean(filesetWorkspace && filesetName && builderConfigPath), + }); + + const summary = useMemo(() => { + if (!rawContent) { + return null; + } + try { + return summarizeBuilderConfig(JSON.parse(rawContent) as unknown); + } catch { + return null; + } + }, [rawContent]); + + const isResolving = isResultsLoading || isFilesLoading; + const isLoading = isResolving || (Boolean(builderConfigPath) && isContentLoading); + + const handleOpenChange = (isOpen: boolean) => { + if (!isOpen) { + onClose(); + } + }; + + const renderBody = () => { + if (isLoading) { + return ( + + + + Loading job config... + + + ); + } + + if (!builderConfigPath) { + return ( + + No {BUILDER_CONFIG_FILENAME} was found in this job's output fileset. The + config is available once the job has produced its artifacts. + + ); + } + + if (isContentError || !summary) { + return ( + + The {BUILDER_CONFIG_FILENAME} file could not be loaded or parsed. + + ); + } + + return ; + }; + + return ( + + {renderBody()} + + ); +}; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/DatasetProfilerSection.tsx b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/DatasetProfilerSection.tsx new file mode 100644 index 0000000000..516a465ff2 --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/DatasetProfilerSection.tsx @@ -0,0 +1,112 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { PlatformJobTerminalStatuses } from '@nemo/common/src/constants/query'; +import { + Banner, + Flex, + Grid, + ProgressBar, + Skeleton, + Stack, + Text, +} from '@nvidia/foundations-react-core'; +import { ColumnProfileCard } from '@studio/routes/DataDesignerJobDetailsRoute/ColumnProfileCard'; +import { + formatPercent, + getPercentComplete, +} from '@studio/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes'; +import { useDataDesignerJobAnalysis } from '@studio/routes/DataDesignerJobDetailsRoute/useDataDesignerJobAnalysis'; +import { useDataDesignerJobFromRoute } from '@studio/routes/DataDesignerJobDetailsRoute/useDataDesignerJobFromRoute'; +import type { FC } from 'react'; + +const ColumnGrid: FC<{ children: React.ReactNode }> = ({ children }) => ( + + {children} + +); + +export const DatasetProfilerSection: FC = () => { + const { workspace, jobName, job } = useDataDesignerJobFromRoute(); + + const isTerminal = job?.status != null && PlatformJobTerminalStatuses.includes(job.status); + + const { analysis, hasAnalysis, isLoading, isError } = useDataDesignerJobAnalysis( + workspace, + jobName, + { enabled: isTerminal } + ); + + // Job still running: the profiler hasn't produced an analysis result yet. + if (!isTerminal) { + return ( + + The dataset profile will appear here once the job completes. + + ); + } + + if (isError) { + return ( + + The profiler analysis could not be loaded for this job. + + ); + } + + if (isLoading && !analysis) { + return ( + + {Array.from({ length: 6 }, (_, i) => ( + + ))} + + ); + } + + if (!hasAnalysis) { + return ( + + No dataset profile was generated for this job. + + ); + } + + const columns = analysis?.column_statistics ?? []; + const percentComplete = analysis ? getPercentComplete(analysis) : 0; + + return ( + + {analysis ? ( + + + + {analysis.num_records.toLocaleString()} of{' '} + {analysis.target_num_records.toLocaleString()} rows + + + {formatPercent(percentComplete)} complete + + + + + ) : null} + + {columns.length > 0 ? ( + + {columns.map((stats) => ( + + ))} + + ) : ( + + The profile did not include any column statistics. + + )} + + ); +}; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/JobOutputFilesetSection.tsx b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/JobOutputFilesetSection.tsx index a4b3cbde08..6b819a1a2e 100644 --- a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/JobOutputFilesetSection.tsx +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/JobOutputFilesetSection.tsx @@ -1,39 +1,27 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import { parseFilesetLocation } from '@nemo/common/src/components/DatasetFileSelect/parseFilesetLocation'; import { StudioDataView } from '@nemo/common/src/components/DataView/StudioDataView'; import { KVPair } from '@nemo/common/src/components/KVPair'; import { TableEmptyState } from '@nemo/common/src/components/TableEmptyState'; -import { PlatformJobTerminalStatuses } from '@nemo/common/src/constants/query'; import { useStudioDataViewState } from '@nemo/common/src/hooks/useStudioDataViewState'; -import { useDataDesignerListCreateJobResults } from '@nemo/sdk/generated/data-designer/api'; -import type { CreateJob as DataDesignerJob } from '@nemo/sdk/generated/data-designer/schema'; import { getFilesListFilesetFilesQueryKey, - useFilesListFilesetFiles, useFilesRetrieveFileset, } from '@nemo/sdk/generated/platform/api'; import type { FilesetFileOutput } from '@nemo/sdk/generated/platform/schema'; import { Anchor, Banner, Card, Stack, Text } from '@nvidia/foundations-react-core'; import { FilesetFilePreviewPanel } from '@studio/components/FilesetFilePreviewPanel'; import type { FileSystemFile } from '@studio/components/FilesTable/utils'; +import { useDataDesignerArtifactsFileset } from '@studio/routes/DataDesignerJobDetailsRoute/useDataDesignerArtifactsFileset'; import { getFilesetDetailsRoute } from '@studio/routes/utils'; import { getHumanReadableFileSize } from '@studio/util/files'; import { useQueryClient } from '@tanstack/react-query'; -import { ComponentProps, FC, useCallback, useEffect, useMemo, useState } from 'react'; +import { useCallback, useEffect, useMemo, useState, type ComponentProps, type FC } from 'react'; import { Link, useNavigate } from 'react-router-dom'; -const ARTIFACTS_RESULT_NAME = 'artifacts'; - type FileRow = FilesetFileOutput & { id: string }; -interface JobOutputFilesetSectionProps { - workspace: string; - jobName: string; - job: DataDesignerJob; -} - function fileRowToSystemFile(row: FileRow): FileSystemFile { return { type: 'file', @@ -43,50 +31,26 @@ function fileRowToSystemFile(row: FileRow): FileSystemFile { }; } -export const JobOutputFilesetSection: FC = ({ - workspace, - jobName, - job, -}) => { +export const JobOutputFilesetSection: FC = () => { const navigate = useNavigate(); const queryClient = useQueryClient(); const [previewFile, setPreviewFile] = useState(null); - const isTerminal = job.status != null && PlatformJobTerminalStatuses.includes(job.status); - - const { data: resultsResponse, isLoading: isResultsLoading } = - useDataDesignerListCreateJobResults(workspace, jobName, { - query: { - refetchInterval: isTerminal ? false : 3000, - }, - }); - - const artifactsResult = useMemo(() => { - const data = resultsResponse?.data; - if (!data?.length) { - return undefined; - } - const preferred = data.find((r) => r.name === ARTIFACTS_RESULT_NAME); - if (preferred) { - return preferred; - } - return data.find((r) => r.artifact_url && parseFilesetLocation(r.artifact_url, workspace)); - }, [resultsResponse?.data, workspace]); - - const filesetLoc = useMemo( - () => - artifactsResult?.artifact_url - ? parseFilesetLocation(artifactsResult.artifact_url, workspace) - : null, - [artifactsResult?.artifact_url, workspace] - ); - - const filesetWorkspace = filesetLoc?.workspace ?? ''; - const filesetName = filesetLoc?.name ?? ''; - const listFilesParams = useMemo( - () => (filesetLoc?.filesListPathPrefix ? { path: filesetLoc.filesListPathPrefix } : undefined), - [filesetLoc?.filesListPathPrefix] - ); + const { + isTerminal, + artifactsResult, + filesetLoc, + filesetWorkspace, + filesetName, + listFilesParams, + files, + isResultsLoading, + isResultsError, + resultsError, + isFilesLoading, + isFilesError: isListFilesError, + filesError: listFilesError, + } = useDataDesignerArtifactsFileset(); const { data: filesetMeta, @@ -99,25 +63,11 @@ export const JobOutputFilesetSection: FC = ({ }, }); - const { - data: listFilesResponse, - isLoading: isFilesLoading, - isError: isListFilesError, - error: listFilesError, - } = useFilesListFilesetFiles(filesetWorkspace, filesetName, listFilesParams, { - query: { - enabled: Boolean(filesetWorkspace && filesetName), - }, - }); - const dataViewState = useStudioDataViewState({ defaultPageSize: 10, }); - const rows: FileRow[] = useMemo(() => { - const fileList = listFilesResponse?.data ?? []; - return fileList.map((f) => ({ ...f, id: f.file_ref })); - }, [listFilesResponse?.data]); + const rows: FileRow[] = useMemo(() => files.map((f) => ({ ...f, id: f.file_ref })), [files]); const makeColumns: ComponentProps>['makeColumns'] = useMemo( () => (helpers) => [ @@ -190,10 +140,10 @@ export const JobOutputFilesetSection: FC = ({ } }, [filesetWorkspace, filesetName]); - if (isResultsLoading && !resultsResponse) { + if (isResultsLoading && !artifactsResult) { return ( - + Loading job results… @@ -202,11 +152,24 @@ export const JobOutputFilesetSection: FC = ({ ); } + if (isResultsError) { + return ( + + + + {resultsError instanceof Error + ? resultsError.message + : 'The job results list could not be loaded.'} + + + + ); + } + if (!artifactsResult) { return ( - - Output fileset + {isTerminal ? 'No artifacts result was returned for this job.' @@ -220,8 +183,7 @@ export const JobOutputFilesetSection: FC = ({ if (!filesetLoc) { return ( - - Output fileset + = ({ return ( <> - + Output fileset @@ -279,10 +241,6 @@ export const JobOutputFilesetSection: FC = ({ - Files - - Select a row to preview the file. - dataViewState={dataViewState} makeColumns={makeColumns} diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/builderConfig.test.ts b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/builderConfig.test.ts new file mode 100644 index 0000000000..3d543dcb6a --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/builderConfig.test.ts @@ -0,0 +1,91 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + formatColumnTypeBreakdown, + summarizeBuilderConfig, +} from '@studio/routes/DataDesignerJobDetailsRoute/builderConfig'; + +const fullConfig = { + library_version: '1.2.3', + data_designer: { + columns: [ + { name: 'product_id', column_type: 'sampler' }, + { name: 'category', column_type: 'sampler' }, + { name: 'review_text', column_type: 'llm-text', model_alias: 'review-model' }, + { name: 'sentiment', column_type: 'llm-structured', model_alias: 'review-model' }, + ], + model_configs: [ + { alias: 'review-model', model: 'meta/llama-3.1-8b-instruct', provider: 'nvidia' }, + ], + seed_config: { source: { seed_type: 'fileset' }, sampling_strategy: 'shuffle' }, + constraints: [{}, {}], + profilers: [{}], + processors: [{ name: 'dedup' }, { name: 'drop-pii' }], + }, +}; + +describe('summarizeBuilderConfig', () => { + it('returns null for non-config payloads', () => { + expect(summarizeBuilderConfig(null)).toBeNull(); + expect(summarizeBuilderConfig('not json')).toBeNull(); + expect(summarizeBuilderConfig({})).toBeNull(); + expect(summarizeBuilderConfig([])).toBeNull(); + }); + + it('extracts columns, models, seed, and counts from a full config', () => { + const summary = summarizeBuilderConfig(fullConfig); + expect(summary).not.toBeNull(); + expect(summary?.columnCount).toBe(4); + expect(summary?.columns).toEqual([ + { name: 'product_id', type: 'sampler', modelAlias: undefined }, + { name: 'category', type: 'sampler', modelAlias: undefined }, + { name: 'review_text', type: 'llm-text', modelAlias: 'review-model' }, + { name: 'sentiment', type: 'llm-structured', modelAlias: 'review-model' }, + ]); + expect(summary?.models).toEqual([ + { alias: 'review-model', model: 'meta/llama-3.1-8b-instruct', provider: 'nvidia' }, + ]); + expect(summary?.seed).toEqual({ type: 'fileset', samplingStrategy: 'shuffle' }); + expect(summary?.constraintCount).toBe(2); + expect(summary?.profilerCount).toBe(1); + expect(summary?.processorNames).toEqual(['dedup', 'drop-pii']); + expect(summary?.libraryVersion).toBe('1.2.3'); + }); + + it('orders the column-type breakdown by count then name', () => { + const summary = summarizeBuilderConfig(fullConfig); + expect(summary?.columnTypeBreakdown).toEqual([ + { type: 'sampler', count: 2 }, + { type: 'llm-structured', count: 1 }, + { type: 'llm-text', count: 1 }, + ]); + expect(formatColumnTypeBreakdown(summary!)).toBe('2 sampler, 1 llm-structured, 1 llm-text'); + }); + + it('tolerates a minimal config and omits the absent seed', () => { + const summary = summarizeBuilderConfig({ data_designer: { columns: [] } }); + expect(summary).not.toBeNull(); + expect(summary?.columnCount).toBe(0); + expect(summary?.models).toEqual([]); + expect(summary?.seed).toBeUndefined(); + expect(summary?.constraintCount).toBe(0); + expect(summary?.processorNames).toEqual([]); + expect(summary?.libraryVersion).toBeUndefined(); + }); + + it('falls back gracefully for malformed entries', () => { + const summary = summarizeBuilderConfig({ + data_designer: { + columns: [{ column_type: 'sampler' }, { name: 'x' }, 'garbage'], + model_configs: [{ alias: 'm1' }], + }, + }); + expect(summary?.columns).toEqual([ + { name: '(unnamed)', type: 'sampler', modelAlias: undefined }, + { name: 'x', type: 'unknown', modelAlias: undefined }, + { name: '(unnamed)', type: 'unknown', modelAlias: undefined }, + ]); + expect(summary?.models).toEqual([{ alias: 'm1', model: '—', provider: undefined }]); + }); +}); diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/builderConfig.ts b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/builderConfig.ts new file mode 100644 index 0000000000..2766bf151c --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/builderConfig.ts @@ -0,0 +1,124 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Lightweight reader for the Data Designer `builder_config.json` artifact. + * + * The file is the serialized `BuilderConfig` Pydantic model + * (`{ data_designer: DataDesignerConfig, library_version }`). Rather than mirror + * the full discriminated-union config (that's the schema-inspector ticket), this + * extracts just the important fields for an at-a-glance summary, parsing + * defensively so an unexpected shape degrades gracefully instead of throwing. + * + * Filename inside the artifacts fileset. + */ +export const BUILDER_CONFIG_FILENAME = 'builder_config.json'; + +export interface BuilderConfigColumnSummary { + readonly name: string; + readonly type: string; + readonly modelAlias?: string; +} + +export interface BuilderConfigModelSummary { + readonly alias: string; + readonly model: string; + readonly provider?: string; +} + +export interface BuilderConfigSeedSummary { + readonly type: string; + readonly samplingStrategy?: string; +} + +export interface BuilderConfigSummary { + readonly columnCount: number; + readonly columns: BuilderConfigColumnSummary[]; + readonly columnTypeBreakdown: Array<{ type: string; count: number }>; + readonly models: BuilderConfigModelSummary[]; + readonly seed?: BuilderConfigSeedSummary; + readonly constraintCount: number; + readonly profilerCount: number; + readonly processorNames: string[]; + readonly libraryVersion?: string; +} + +const asRecord = (value: unknown): Record | undefined => + typeof value === 'object' && value !== null && !Array.isArray(value) + ? (value as Record) + : undefined; + +const asString = (value: unknown): string | undefined => + typeof value === 'string' ? value : undefined; + +const asArray = (value: unknown): unknown[] => (Array.isArray(value) ? value : []); + +const UNNAMED = '(unnamed)'; + +/** + * Parses raw `builder_config.json` contents into a {@link BuilderConfigSummary}. + * Returns `null` when the payload is not a recognizable builder config (missing + * the top-level `data_designer` object). + */ +export const summarizeBuilderConfig = (raw: unknown): BuilderConfigSummary | null => { + const root = asRecord(raw); + const dataDesigner = asRecord(root?.data_designer); + if (!root || !dataDesigner) { + return null; + } + + const columns: BuilderConfigColumnSummary[] = asArray(dataDesigner.columns).map((column) => { + const record = asRecord(column) ?? {}; + return { + name: asString(record.name) ?? UNNAMED, + type: asString(record.column_type) ?? 'unknown', + modelAlias: asString(record.model_alias), + }; + }); + + const breakdownCounts = new Map(); + for (const column of columns) { + breakdownCounts.set(column.type, (breakdownCounts.get(column.type) ?? 0) + 1); + } + const columnTypeBreakdown = [...breakdownCounts.entries()] + .map(([type, count]) => ({ type, count })) + .sort((a, b) => b.count - a.count || a.type.localeCompare(b.type)); + + const models: BuilderConfigModelSummary[] = asArray(dataDesigner.model_configs).map((model) => { + const record = asRecord(model) ?? {}; + return { + alias: asString(record.alias) ?? UNNAMED, + model: asString(record.model) ?? '—', + provider: asString(record.provider), + }; + }); + + const seedConfig = asRecord(dataDesigner.seed_config); + const seedSource = asRecord(seedConfig?.source); + const seed: BuilderConfigSeedSummary | undefined = seedConfig + ? { + type: asString(seedSource?.seed_type) ?? 'unknown', + samplingStrategy: asString(seedConfig.sampling_strategy), + } + : undefined; + + const processorNames = asArray(dataDesigner.processors).map( + (processor) => asString(asRecord(processor)?.name) ?? UNNAMED + ); + + return { + columnCount: columns.length, + columns, + columnTypeBreakdown, + models, + seed, + constraintCount: asArray(dataDesigner.constraints).length, + profilerCount: asArray(dataDesigner.profilers).length, + processorNames, + libraryVersion: asString(root.library_version), + }; +}; + +/** Formats the column-type breakdown as a compact label (e.g. `3 sampler, 2 llm-text`). */ +export const formatColumnTypeBreakdown = (summary: BuilderConfigSummary): string => + summary.columnTypeBreakdown.map(({ type, count }) => `${count} ${type}`).join(', '); diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes.test.ts b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes.test.ts new file mode 100644 index 0000000000..72baca6057 --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes.test.ts @@ -0,0 +1,276 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + describeColumnStats, + formatPercent, + formatStatCount, + formatStatDecimal, + getCategoricalHistogram, + getColumnTypeLabel, + getNumericalDistribution, + getPercentComplete, + getPercentNull, + getPercentUnique, + MISSING_VALUE, + type ColumnStatistics, + type DatasetProfilerResults, +} from '@studio/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes'; + +const baseStats = { + column_name: 'col', + num_records: 100, + num_null: 5, + num_unique: 80, + pyarrow_dtype: 'string', + simple_dtype: 'str', +}; + +describe('formatStatCount', () => { + it('formats numbers with locale separators', () => { + expect(formatStatCount(1234)).toBe((1234).toLocaleString()); + }); + + it('renders the calculation-failed sentinel as an em dash', () => { + expect(formatStatCount(MISSING_VALUE.CALCULATION_FAILED)).toBe('—'); + }); + + it('renders the output-format-error sentinel as text', () => { + expect(formatStatCount(MISSING_VALUE.OUTPUT_FORMAT_ERROR)).toBe('format error'); + }); + + it('renders nullish values as an em dash', () => { + expect(formatStatCount(undefined)).toBe('—'); + expect(formatStatCount(null)).toBe('—'); + }); +}); + +describe('formatStatDecimal', () => { + it('limits to one fractional digit', () => { + expect(formatStatDecimal(12.345)).toBe((12.3).toLocaleString()); + }); + + it('passes sentinels through', () => { + expect(formatStatDecimal(MISSING_VALUE.CALCULATION_FAILED)).toBe('—'); + }); +}); + +describe('formatPercent', () => { + it('formats a percentage to one decimal', () => { + expect(formatPercent(42.5)).toBe('42.5%'); + }); + + it('renders undefined as an em dash', () => { + expect(formatPercent(undefined)).toBe('—'); + }); +}); + +describe('getPercentComplete', () => { + it('computes completion percentage', () => { + const results = { num_records: 50, target_num_records: 200 } as DatasetProfilerResults; + expect(getPercentComplete(results)).toBe(25); + }); + + it('returns 0 when the target is non-positive', () => { + const results = { num_records: 10, target_num_records: 0 } as DatasetProfilerResults; + expect(getPercentComplete(results)).toBe(0); + }); +}); + +describe('getPercentNull / getPercentUnique', () => { + it('computes percentages from counts', () => { + const stats = { ...baseStats, column_type: 'general' } as ColumnStatistics; + expect(getPercentNull(stats)).toBe(5); + expect(getPercentUnique(stats)).toBe(80); + }); + + it('returns undefined when a count is a missing-value sentinel', () => { + const stats = { + ...baseStats, + num_null: MISSING_VALUE.CALCULATION_FAILED, + num_unique: MISSING_VALUE.CALCULATION_FAILED, + column_type: 'general', + } as ColumnStatistics; + expect(getPercentNull(stats)).toBeUndefined(); + expect(getPercentUnique(stats)).toBeUndefined(); + }); +}); + +describe('getColumnTypeLabel', () => { + it('returns the column type for non-sampler columns', () => { + const stats = { ...baseStats, column_type: 'llm-text' } as ColumnStatistics; + expect(getColumnTypeLabel(stats)).toBe('llm-text'); + }); + + it('includes the sampler subtype for sampler columns', () => { + const stats = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'category', + distribution_type: 'categorical', + distribution: null, + } as ColumnStatistics; + expect(getColumnTypeLabel(stats)).toBe('sampler · category'); + }); +}); + +describe('describeColumnStats (variant → render mapping)', () => { + it('summarizes LLM columns with token usage', () => { + const stats = { + ...baseStats, + column_type: 'llm-text', + input_tokens_mean: 120.4, + input_tokens_median: 100, + input_tokens_stddev: 10, + output_tokens_mean: 45.9, + output_tokens_median: 40, + output_tokens_stddev: 5, + } as ColumnStatistics; + expect(describeColumnStats(stats)).toBe( + `Tokens in/out (avg): ${formatStatDecimal(120.4)} / ${formatStatDecimal(45.9)}` + ); + }); + + it('summarizes validation columns with valid-record counts', () => { + const stats = { + ...baseStats, + column_type: 'validation', + num_valid_records: 95, + } as ColumnStatistics; + expect(describeColumnStats(stats)).toBe('Valid records: 95'); + }); + + it('summarizes categorical sampler columns with the most common value', () => { + const stats = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'category', + distribution_type: 'categorical', + distribution: { + most_common_value: 'positive', + least_common_value: 'negative', + histogram: { categories: ['positive', 'negative'], counts: [70, 30] }, + }, + } as ColumnStatistics; + expect(describeColumnStats(stats)).toBe('Most common: positive'); + }); + + it('summarizes numerical sampler columns with min/max/mean', () => { + const stats = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'gaussian', + distribution_type: 'numerical', + distribution: { min: 1, max: 5, mean: 3.2, stddev: 1.1, median: 3 }, + } as ColumnStatistics; + expect(describeColumnStats(stats)).toBe( + `min ${formatStatDecimal(1)} · max ${formatStatDecimal(5)} · mean ${formatStatDecimal(3.2)}` + ); + }); + + it('falls back to an em dash for plain and unknown column types', () => { + expect(describeColumnStats({ ...baseStats, column_type: 'general' } as ColumnStatistics)).toBe( + '—' + ); + expect( + describeColumnStats({ ...baseStats, column_type: 'some-plugin-type' } as ColumnStatistics) + ).toBe('—'); + }); + + it('handles a missing sampler distribution without throwing', () => { + const stats = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'uuid', + distribution_type: 'other', + distribution: null, + } as ColumnStatistics; + expect(describeColumnStats(stats)).toBe('—'); + }); +}); + +describe('getCategoricalHistogram', () => { + const histogram = { categories: ['positive', 'negative'], counts: [70, 30] }; + + it('returns the histogram for a categorical sampler column', () => { + const stats = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'category', + distribution_type: 'categorical', + distribution: { + most_common_value: 'positive', + least_common_value: 'negative', + histogram, + }, + } as ColumnStatistics; + expect(getCategoricalHistogram(stats)).toEqual(histogram); + }); + + it('returns null for numerical sampler columns', () => { + const stats = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'gaussian', + distribution_type: 'numerical', + distribution: { min: 1, max: 5, mean: 3, stddev: 1, median: 3 }, + } as ColumnStatistics; + expect(getCategoricalHistogram(stats)).toBeNull(); + }); + + it('returns null for non-sampler columns and missing distributions', () => { + expect( + getCategoricalHistogram({ ...baseStats, column_type: 'llm-text' } as ColumnStatistics) + ).toBeNull(); + expect( + getCategoricalHistogram({ + ...baseStats, + column_type: 'sampler', + sampler_type: 'uuid', + distribution_type: 'other', + distribution: null, + } as ColumnStatistics) + ).toBeNull(); + expect( + getCategoricalHistogram({ + ...baseStats, + column_type: 'sampler', + sampler_type: 'category', + distribution_type: 'categorical', + distribution: MISSING_VALUE.CALCULATION_FAILED, + } as ColumnStatistics) + ).toBeNull(); + }); +}); + +describe('getNumericalDistribution', () => { + it('returns the distribution for a numerical sampler column', () => { + const distribution = { min: 1, max: 5, mean: 3.2, stddev: 1.1, median: 3 }; + const stats = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'gaussian', + distribution_type: 'numerical', + distribution, + } as ColumnStatistics; + expect(getNumericalDistribution(stats)).toEqual(distribution); + }); + + it('returns null for categorical and non-sampler columns', () => { + const categorical = { + ...baseStats, + column_type: 'sampler', + sampler_type: 'category', + distribution_type: 'categorical', + distribution: { + most_common_value: 'a', + least_common_value: 'b', + histogram: { categories: ['a', 'b'], counts: [1, 2] }, + }, + } as ColumnStatistics; + expect(getNumericalDistribution(categorical)).toBeNull(); + expect( + getNumericalDistribution({ ...baseStats, column_type: 'general' } as ColumnStatistics) + ).toBeNull(); + }); +}); diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes.ts b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes.ts new file mode 100644 index 0000000000..1e7cf9bdb6 --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes.ts @@ -0,0 +1,290 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Hand-written TypeScript mirror of the Data Designer `DatasetProfilerResults` + * Pydantic model (data_designer.config.analysis.dataset_profiler). + * + */ + +/** Sentinel emitted by the profiler when a statistic could not be computed. */ +export const MISSING_VALUE = { + CALCULATION_FAILED: '--', + OUTPUT_FORMAT_ERROR: 'output_format_error', +} as const; + +export type MissingValue = (typeof MISSING_VALUE)[keyof typeof MISSING_VALUE]; + +/** A numeric statistic that may instead be a {@link MissingValue} sentinel. */ +export type MaybeMissing = T | MissingValue; + +export const isMissingValue = (value: unknown): value is MissingValue => + value === MISSING_VALUE.CALCULATION_FAILED || value === MISSING_VALUE.OUTPUT_FORMAT_ERROR; + +export interface CategoricalHistogramData { + categories: Array; + counts: number[]; +} + +export interface CategoricalDistribution { + most_common_value: string | number; + least_common_value: string | number; + histogram: CategoricalHistogramData; +} + +export interface NumericalDistribution { + min: number; + max: number; + mean: number; + stddev: number; + median: number; +} + +/** Fields shared by every column-statistics variant (GeneralColumnStatistics). */ +interface BaseColumnStatistics { + column_name: string; + num_records: MaybeMissing; + num_null: MaybeMissing; + num_unique: MaybeMissing; + pyarrow_dtype: string; + simple_dtype: string; +} + +export interface GeneralColumnStatistics extends BaseColumnStatistics { + column_type: 'general'; +} + +/** Token-usage metrics shared by all LLM-backed column types. */ +interface LLMColumnStatisticsBase extends BaseColumnStatistics { + output_tokens_mean: MaybeMissing; + output_tokens_median: MaybeMissing; + output_tokens_stddev: MaybeMissing; + input_tokens_mean: MaybeMissing; + input_tokens_median: MaybeMissing; + input_tokens_stddev: MaybeMissing; +} + +export interface LLMTextColumnStatistics extends LLMColumnStatisticsBase { + column_type: 'llm-text'; +} + +export interface LLMCodeColumnStatistics extends LLMColumnStatisticsBase { + column_type: 'llm-code'; +} + +export interface LLMStructuredColumnStatistics extends LLMColumnStatisticsBase { + column_type: 'llm-structured'; +} + +export interface LLMJudgedColumnStatistics extends LLMColumnStatisticsBase { + column_type: 'llm-judge'; +} + +export interface SamplerColumnStatistics extends BaseColumnStatistics { + column_type: 'sampler'; + sampler_type: string; + distribution_type: 'categorical' | 'numerical' | 'text' | 'other' | 'unknown'; + distribution: CategoricalDistribution | NumericalDistribution | MissingValue | null; +} + +export interface SeedDatasetColumnStatistics extends BaseColumnStatistics { + column_type: 'seed-dataset'; +} + +export interface ValidationColumnStatistics extends BaseColumnStatistics { + column_type: 'validation'; + num_valid_records: MaybeMissing; +} + +export interface ExpressionColumnStatistics extends BaseColumnStatistics { + column_type: 'expression'; +} + +/** + * Plugin-provided column generators dynamically register General-shaped + * statistics classes with their own `column_type`, so an unknown discriminator + * still carries the base fields. + */ +export interface UnknownColumnStatistics extends BaseColumnStatistics { + column_type: string; +} + +export type ColumnStatistics = + | GeneralColumnStatistics + | LLMTextColumnStatistics + | LLMCodeColumnStatistics + | LLMStructuredColumnStatistics + | LLMJudgedColumnStatistics + | SamplerColumnStatistics + | SeedDatasetColumnStatistics + | ValidationColumnStatistics + | ExpressionColumnStatistics + | UnknownColumnStatistics; + +export interface DatasetProfilerResults { + num_records: number; + target_num_records: number; + column_statistics: ColumnStatistics[]; + side_effect_column_names?: string[] | null; + // Advanced profiler results (e.g. JudgeScoreProfilerResults). Rendered as a + // follow-up; typed loosely until the schema stabilizes. + column_profiles?: unknown[] | null; +} + +const LLM_COLUMN_TYPES = new Set(['llm-text', 'llm-code', 'llm-structured', 'llm-judge']); + +export type LLMColumnStatistics = + | LLMTextColumnStatistics + | LLMCodeColumnStatistics + | LLMStructuredColumnStatistics + | LLMJudgedColumnStatistics; + +export const isLLMColumnStatistics = (stats: ColumnStatistics): stats is LLMColumnStatistics => + LLM_COLUMN_TYPES.has(stats.column_type); + +export const isValidationColumnStatistics = ( + stats: ColumnStatistics +): stats is ValidationColumnStatistics => stats.column_type === 'validation'; + +export const isSamplerColumnStatistics = ( + stats: ColumnStatistics +): stats is SamplerColumnStatistics => stats.column_type === 'sampler'; + +/** Completion percentage of the dataset (mirrors `percent_complete`). */ +export const getPercentComplete = (results: DatasetProfilerResults): number => { + if (results.target_num_records <= 0) { + return 0; + } + const percent = (100 * results.num_records) / results.target_num_records; + return Math.max(0, Math.min(100, percent)); +}; + +/** `num_unique / num_records` as a percentage, or undefined when unavailable. */ +export const getPercentUnique = (stats: ColumnStatistics): number | undefined => { + if ( + isMissingValue(stats.num_unique) || + isMissingValue(stats.num_records) || + stats.num_records <= 0 + ) { + return undefined; + } + return (100 * stats.num_unique) / stats.num_records; +}; + +/** `num_null / num_records` as a percentage, or undefined when unavailable. */ +export const getPercentNull = (stats: ColumnStatistics): number | undefined => { + if ( + isMissingValue(stats.num_null) || + isMissingValue(stats.num_records) || + stats.num_records <= 0 + ) { + return undefined; + } + return (100 * stats.num_null) / stats.num_records; +}; + +const EM_DASH = '—'; + +/** Render a sentinel-or-undefined value as display text, or `null` if it's a real value. */ +const formatSentinel = (value: MaybeMissing | null | undefined): string | null => { + if (value == null) { + return EM_DASH; + } + if (value === MISSING_VALUE.OUTPUT_FORMAT_ERROR) { + return 'format error'; + } + if (value === MISSING_VALUE.CALCULATION_FAILED) { + return EM_DASH; + } + return null; +}; + +/** Format a whole-number statistic, surfacing missing-value sentinels as text. */ +export const formatStatCount = (value: MaybeMissing | null | undefined): string => + formatSentinel(value) ?? (value as number).toLocaleString(); + +/** Format a (possibly fractional) statistic, surfacing missing-value sentinels as text. */ +export const formatStatDecimal = (value: MaybeMissing | null | undefined): string => + formatSentinel(value) ?? + (value as number).toLocaleString(undefined, { maximumFractionDigits: 1 }); + +/** Format a percentage produced by the getPercent* helpers. */ +export const formatPercent = (value: number | undefined): string => + value == null ? EM_DASH : `${value.toFixed(1)}%`; + +/** Human-readable label for a column's generator type (e.g. `sampler · category`). */ +export const getColumnTypeLabel = (stats: ColumnStatistics): string => + isSamplerColumnStatistics(stats) ? `sampler · ${stats.sampler_type}` : stats.column_type; + +export const isCategoricalDistribution = ( + distribution: CategoricalDistribution | NumericalDistribution +): distribution is CategoricalDistribution => 'histogram' in distribution; + +/** + * Categorical histogram for a sampler column, or `null` when the column isn't a + * sampler, has no distribution, or its distribution isn't categorical. Use this + * to decide whether a column should render a bar chart of its value counts. + */ +export const getCategoricalHistogram = ( + stats: ColumnStatistics +): CategoricalHistogramData | null => { + if (!isSamplerColumnStatistics(stats)) { + return null; + } + const { distribution } = stats; + if (distribution == null || isMissingValue(distribution)) { + return null; + } + return isCategoricalDistribution(distribution) ? distribution.histogram : null; +}; + +/** + * Numerical distribution (min/max/mean/median/stddev) for a sampler column, or + * `null` when the column isn't a sampler with a numerical distribution. + */ +export const getNumericalDistribution = (stats: ColumnStatistics): NumericalDistribution | null => { + if (!isSamplerColumnStatistics(stats)) { + return null; + } + const { distribution } = stats; + if (distribution == null || isMissingValue(distribution)) { + return null; + } + return isCategoricalDistribution(distribution) ? null : distribution; +}; + +const describeSamplerDistribution = (stats: SamplerColumnStatistics): string => { + const { distribution } = stats; + if (distribution == null) { + return EM_DASH; + } + if (isMissingValue(distribution)) { + return formatSentinel(distribution) ?? EM_DASH; + } + if (isCategoricalDistribution(distribution)) { + return `Most common: ${distribution.most_common_value}`; + } + return `min ${formatStatDecimal(distribution.min)} · max ${formatStatDecimal( + distribution.max + )} · mean ${formatStatDecimal(distribution.mean)}`; +}; + +/** + * Variant-specific one-line summary for a column's "Details" cell. LLM columns + * surface token usage, validation columns surface valid-record counts, sampler + * columns surface their distribution, and everything else falls back to a dash. + */ +export const describeColumnStats = (stats: ColumnStatistics): string => { + if (isLLMColumnStatistics(stats)) { + return `Tokens in/out (avg): ${formatStatDecimal( + stats.input_tokens_mean + )} / ${formatStatDecimal(stats.output_tokens_mean)}`; + } + if (isValidationColumnStatistics(stats)) { + return `Valid records: ${formatStatCount(stats.num_valid_records)}`; + } + if (isSamplerColumnStatistics(stats)) { + return describeSamplerDistribution(stats); + } + return EM_DASH; +}; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/index.tsx b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/index.tsx index fbd5ac180c..669feb9b8f 100644 --- a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/index.tsx +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/index.tsx @@ -3,39 +3,39 @@ import { ErrorMessage } from '@nemo/common/src/components/ErrorMessage'; import { StatusBadge } from '@nemo/common/src/components/StatusBadge'; -import { PlatformJobTerminalStatuses } from '@nemo/common/src/constants/query'; -import { useDataDesignerGetCreateJob } from '@nemo/sdk/generated/data-designer/api'; -import { Button, Card, Stack, Text } from '@nvidia/foundations-react-core'; +import { + Button, + Flex, + Stack, + TabsContent, + TabsList, + TabsRoot, + TabsTrigger, + Text, +} from '@nvidia/foundations-react-core'; import { AccessibleTitle } from '@studio/components/AccessibleTitle'; import { Loading } from '@studio/components/Layouts/Loading'; -import { ROUTE_PARAMS } from '@studio/constants/routes'; -import { useWorkspaceFromPath } from '@studio/hooks/useWorkspaceFromPath'; import { useBreadcrumbs } from '@studio/providers/breadcrumbs/useBreadcrumbs'; +import { DataDesignerConfigPanel } from '@studio/routes/DataDesignerJobDetailsRoute/DataDesignerConfigPanel'; +import { DatasetProfilerSection } from '@studio/routes/DataDesignerJobDetailsRoute/DatasetProfilerSection'; import { JobOutputFilesetSection } from '@studio/routes/DataDesignerJobDetailsRoute/JobOutputFilesetSection'; +import { useDataDesignerJobFromRoute } from '@studio/routes/DataDesignerJobDetailsRoute/useDataDesignerJobFromRoute'; import { getDataDesignerJobListRoute } from '@studio/routes/utils'; -import { useRequiredPathParams } from '@studio/util/hooks/useRequiredPathParams'; -import { ArrowLeft } from 'lucide-react'; -import { FC } from 'react'; +import { ArrowLeft, FileJson } from 'lucide-react'; +import { useState, type FC } from 'react'; import { Link } from 'react-router-dom'; export const DataDesignerJobDetailsRoute: FC = () => { - const workspace = useWorkspaceFromPath(); - const { dataDesignerJobName } = useRequiredPathParams([ROUTE_PARAMS.dataDesignerJobName]); - const { - data: job, + workspace, + jobName: dataDesignerJobName, + job, isLoading, isError, refetch, - } = useDataDesignerGetCreateJob(workspace, dataDesignerJobName, { - query: { - refetchInterval: (query) => { - const status = query.state.data?.status; - const isTerminated = status && PlatformJobTerminalStatuses.includes(status); - return isTerminated ? false : 3000; - }, - }, - }); + } = useDataDesignerJobFromRoute(); + + const [isConfigPanelOpen, setIsConfigPanelOpen] = useState(false); useBreadcrumbs({ items: [ @@ -78,25 +78,23 @@ export const DataDesignerJobDetailsRoute: FC = () => { return ( - - - - - - {job.name} - {job.description && ( - - {job.description} - - )} - - Status: + + + + + {job.name} {job.status ? : null} - + + + + {job.description && ( + + {job.description} + + )} + {job.created_at && ( Created: {new Date(job.created_at).toLocaleString()} @@ -107,11 +105,29 @@ export const DataDesignerJobDetailsRoute: FC = () => { Updated: {new Date(job.updated_at).toLocaleString()} )} - - + + + + + + Profile + Output files + - + + + + + + + + + + setIsConfigPanelOpen(false)} + /> ); }; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerArtifactsFileset.ts b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerArtifactsFileset.ts new file mode 100644 index 0000000000..7c5ab297f2 --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerArtifactsFileset.ts @@ -0,0 +1,96 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { parseFilesetLocation } from '@nemo/common/src/components/DatasetFileSelect/parseFilesetLocation'; +import { PlatformJobTerminalStatuses } from '@nemo/common/src/constants/query'; +import { useDataDesignerListCreateJobResults } from '@nemo/sdk/generated/data-designer/api'; +import { useFilesListFilesetFiles } from '@nemo/sdk/generated/platform/api'; +import type { FilesetFileOutput } from '@nemo/sdk/generated/platform/schema'; +import { useDataDesignerJobFromRoute } from '@studio/routes/DataDesignerJobDetailsRoute/useDataDesignerJobFromRoute'; +import { useMemo } from 'react'; + +/** Result name under which a job registers its output artifacts fileset. */ +const ARTIFACTS_RESULT_NAME = 'artifacts'; + +/** + * Resolves the output artifacts fileset for the current Data Designer job: + * finds the `artifacts` result, parses its fileset location, and lists the + * files inside it. Shared by the output-fileset section and the config panel + * so both agree on which fileset (and files) belong to the job. + */ +export const useDataDesignerArtifactsFileset = () => { + const { workspace, jobName, job } = useDataDesignerJobFromRoute(); + + const isTerminal = job?.status != null && PlatformJobTerminalStatuses.includes(job.status); + + const { + data: resultsResponse, + isLoading: isResultsLoading, + isError: isResultsError, + error: resultsError, + } = useDataDesignerListCreateJobResults(workspace, jobName, { + query: { refetchInterval: isTerminal ? false : 3000 }, + }); + + const artifactsResult = useMemo(() => { + const data = resultsResponse?.data; + if (!data?.length) { + return undefined; + } + const preferred = data.find((r) => r.name === ARTIFACTS_RESULT_NAME); + if (preferred) { + return preferred; + } + return data.find((r) => r.artifact_url && parseFilesetLocation(r.artifact_url, workspace)); + }, [resultsResponse?.data, workspace]); + + const filesetLoc = useMemo( + () => + artifactsResult?.artifact_url + ? parseFilesetLocation(artifactsResult.artifact_url, workspace) + : null, + [artifactsResult?.artifact_url, workspace] + ); + + const filesetWorkspace = filesetLoc?.workspace ?? ''; + const filesetName = filesetLoc?.name ?? ''; + const listFilesParams = useMemo( + () => (filesetLoc?.filesListPathPrefix ? { path: filesetLoc.filesListPathPrefix } : undefined), + [filesetLoc?.filesListPathPrefix] + ); + + const { + data: listFilesResponse, + isLoading: isFilesLoading, + isError: isFilesError, + error: filesError, + } = useFilesListFilesetFiles(filesetWorkspace, filesetName, listFilesParams, { + query: { + enabled: Boolean(filesetWorkspace && filesetName), + }, + }); + + const files: FilesetFileOutput[] = useMemo( + () => listFilesResponse?.data ?? [], + [listFilesResponse?.data] + ); + + return { + workspace, + jobName, + job, + isTerminal, + artifactsResult, + filesetLoc, + filesetWorkspace, + filesetName, + listFilesParams, + files, + isResultsLoading, + isResultsError, + resultsError, + isFilesLoading, + isFilesError, + filesError, + }; +}; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerJobAnalysis.ts b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerJobAnalysis.ts new file mode 100644 index 0000000000..96b8abe47d --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerJobAnalysis.ts @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + dataDesignerDownloadCreateJobResult, + getDataDesignerDownloadCreateJobResultQueryKey, + useDataDesignerListCreateJobResults, +} from '@nemo/sdk/generated/data-designer/api'; +import type { DatasetProfilerResults } from '@studio/routes/DataDesignerJobDetailsRoute/datasetProfilerTypes'; +import { useQuery } from '@tanstack/react-query'; + +/** Result name under which the profiler analysis JSON is registered for a job. */ +const ANALYSIS_RESULT_NAME = 'analysis'; + +interface UseDataDesignerJobAnalysisOptions { + /** Gate the download until the job has reached a terminal status. */ + enabled?: boolean; +} + +/** + * Loads and parses the {@link DatasetProfilerResults} for a Data Designer job. + * + * The profiler output is exposed as a downloadable JSON result named + * `analysis`. We first list the job's results to confirm the analysis exists + * (a failed job never produces one) and only then download + parse it, so a + * missing profile surfaces as `hasAnalysis: false` rather than a 404. + */ +export const useDataDesignerJobAnalysis = ( + workspace: string, + jobName: string, + { enabled = true }: UseDataDesignerJobAnalysisOptions = {} +) => { + const { + data: resultsResponse, + isLoading: isResultsLoading, + isError: isResultsError, + error: resultsError, + } = useDataDesignerListCreateJobResults(workspace, jobName, { + query: { enabled: enabled && Boolean(workspace && jobName) }, + }); + + const hasAnalysis = Boolean( + resultsResponse?.data?.some((result) => result.name === ANALYSIS_RESULT_NAME) + ); + + const analysisQuery = useQuery({ + queryKey: getDataDesignerDownloadCreateJobResultQueryKey( + workspace, + jobName, + ANALYSIS_RESULT_NAME + ), + queryFn: async ({ signal }): Promise => { + const blob = await dataDesignerDownloadCreateJobResult( + workspace, + jobName, + ANALYSIS_RESULT_NAME, + signal + ); + const text = await blob.text(); + return JSON.parse(text) as DatasetProfilerResults; + }, + enabled: enabled && hasAnalysis, + }); + + return { + analysis: analysisQuery.data, + hasAnalysis, + isLoading: isResultsLoading || (hasAnalysis && analysisQuery.isLoading), + isError: isResultsError || analysisQuery.isError, + error: resultsError ?? analysisQuery.error, + }; +}; diff --git a/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerJobFromRoute.ts b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerJobFromRoute.ts new file mode 100644 index 0000000000..d36f87f68d --- /dev/null +++ b/web/packages/studio/src/routes/DataDesignerJobDetailsRoute/useDataDesignerJobFromRoute.ts @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { PlatformJobTerminalStatuses } from '@nemo/common/src/constants/query'; +import { useDataDesignerGetCreateJob } from '@nemo/sdk/generated/data-designer/api'; +import { ROUTE_PARAMS } from '@studio/constants/routes'; +import { useWorkspaceFromPath } from '@studio/hooks/useWorkspaceFromPath'; +import { useRequiredPathParams } from '@studio/util/hooks/useRequiredPathParams'; + +export const useDataDesignerJobFromRoute = () => { + const workspace = useWorkspaceFromPath(); + const { dataDesignerJobName } = useRequiredPathParams([ROUTE_PARAMS.dataDesignerJobName]); + + const query = useDataDesignerGetCreateJob(workspace, dataDesignerJobName, { + query: { + refetchInterval: (q) => { + const status = q.state.data?.status; + const isTerminated = status && PlatformJobTerminalStatuses.includes(status); + return isTerminated ? false : 3000; + }, + }, + }); + + return { + ...query, + workspace, + jobName: dataDesignerJobName, + job: query.data, + }; +};