Skip to content
3 changes: 3 additions & 0 deletions packages/app/cypress/support/mock-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ export function createMockInferenceContext(
activePresetId: null,
setActivePresetId: namedStub('setActivePresetId'),
presetGuardRef: { current: false } as React.RefObject<boolean>,
hwColorOverrides: {},
...overrides,
};
}
Expand Down Expand Up @@ -441,6 +442,8 @@ export function createMockUnofficialRunContext(
): UnofficialRunContextType {
return {
isUnofficialRun: false,
mergeAsIngested: false,
setMergeAsIngested: namedStub('setMergeAsIngested'),
unofficialRunInfo: null,
unofficialRunInfos: [],
runIndexByUrl: {},
Expand Down
22 changes: 20 additions & 2 deletions packages/app/src/app/api/unofficial-run/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,22 @@ async function processSingleRun(
const bmkArtifact = artifacts
.filter((a) => a.name === 'results_bmk')
.toSorted((a, b) => b.id - a.id)[0];
// Fallback: some workflows (e.g. the Mock-ascend uploader) emit one
// `bmk_<config>_conc<N>_<gpu>_<idx>` artifact per concurrency instead of a
// single aggregated `results_bmk`. When the canonical artifact is absent,
// gather everything matching `bmk_*` and concatenate the rows.
const perConfigBmkArtifacts = bmkArtifact
? []
: artifacts.filter((a) => a.name.startsWith('bmk_'));
const evalArtifact = artifacts
.filter((a) => a.name === 'eval_results_all')
.toSorted((a, b) => b.id - a.id)[0];

if (!bmkArtifact && !evalArtifact) {
if (!bmkArtifact && perConfigBmkArtifacts.length === 0 && !evalArtifact) {
return {
errorResponse: NextResponse.json(
{
error: `No results_bmk or eval_results_all artifact found for runId ${runId}`,
error: `No results_bmk, bmk_*, or eval_results_all artifact found for runId ${runId}`,
},
{ status: 404 },
),
Expand All @@ -250,6 +257,17 @@ async function processSingleRun(
);
if (errorResponse) return { errorResponse };
benchmarks = normalizeArtifactRows(rows, date, runUrl || null);
} else if (perConfigBmkArtifacts.length > 0) {
const allRows: Record<string, unknown>[] = [];
for (const artifact of perConfigBmkArtifacts) {
const { rows, errorResponse } = await downloadArtifactRows(
artifact.archive_download_url,
githubToken,
);
if (errorResponse) return { errorResponse };
allRows.push(...rows);
}
benchmarks = normalizeArtifactRows(allRows, date, runUrl || null);
}

if (evalArtifact) {
Expand Down
22 changes: 21 additions & 1 deletion packages/app/src/components/GlobalFilterContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ import {
Sequence,
SEQUENCE_OPTIONS,
} from '@/lib/data-mappings';
import { computeAutoSwitchDecision } from '@/lib/unofficial-run-auto-switch';
import {
computeAutoSwitchDecision,
computeUnofficialOverrideDecision,
} from '@/lib/unofficial-run-auto-switch';
import type { AvailabilityRow, WorkflowInfoResponse } from '@/lib/api';

interface RunInfo {
Expand Down Expand Up @@ -201,6 +204,23 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) {
}
}, [unofficialAvailable, selectedModel]);

// TEMPORARY (this branch only): default the sequence to `8K / 256` when an
// unofficial run loads and the URL didn't pin `i_seq`. Same dedupe shape as
// the model auto-switch above — manual sequence picks stick because the URL
// gets `i_seq` written by the URL-sync effect after the override fires.
const lastUnofficialSeqOverrideRef = useRef<string>('');
useEffect(() => {
const decision = computeUnofficialOverrideDecision(
unofficialAvailable,
getUrlParam('i_seq'),
lastUnofficialSeqOverrideRef.current,
);
lastUnofficialSeqOverrideRef.current = decision.nextKey;
if (decision.shouldOverride) {
setSelectedSequence(Sequence.EightK_256);
}
}, [unofficialAvailable]);

// Sequences available for the selected model (DB ∪ unofficial run for this model)
const availableSequences = useMemo(() => {
const unofficialSeqs = unofficialAvailable
Expand Down
86 changes: 84 additions & 2 deletions packages/app/src/components/inference/InferenceContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,15 @@ import {

import { useGlobalFilters } from '@/components/GlobalFilterContext';
import type {
ChartDefinition,
InferenceChartContextType,
InferenceData,
TrackedConfig,
} from '@/components/inference/types';
import { useUnofficialRun } from '@/components/unofficial-run-provider';
import chartDefinitions from '@/components/inference/inference-chart-config.json';
import { mergeUnofficialIntoOfficial } from '@/lib/unofficial-merge';
import { computeUnofficialOverrideDecision } from '@/lib/unofficial-run-auto-switch';
import { Button } from '@/components/ui/button';
import {
Dialog,
Expand Down Expand Up @@ -170,10 +175,10 @@ export function InferenceProvider({
const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined;

const {
graphs,
graphs: officialGraphs,
loading: chartDataLoading,
error: chartDataError,
hardwareConfig,
hardwareConfig: officialHardwareConfig,
} = useChartData(
selectedModel,
effectiveSequence,
Expand All @@ -191,6 +196,81 @@ export function InferenceProvider({
latestDate,
);

// ── Promote unofficial rows to first-class series when toggled ────────────
// When `mergeAsIngested` is on, overlay points are re-keyed with per-run
// synth hwKeys and merged into `graphs` so they participate in the same
// filter/optimal-only/legend pipeline as official data. The resulting
// `hwColorOverrides` map is consumed by ScatterGraph's color resolver.
const {
mergeAsIngested,
unofficialChartData,
unofficialRunInfos,
runIndexByUrl,
availableModelsAndSequences: unofficialAvailable,
} = useUnofficialRun();

// TEMPORARY (this branch only): default the y-axis metric to "Output Token
// Throughput per GPU" when an unofficial run loads and the URL didn't pin
// `i_metric`. Mirrors the sequence override in GlobalFilterContext — manual
// metric picks stick because the URL gets `i_metric` written after the
// override fires.
const lastUnofficialMetricOverrideRef = useRef<string>('');
useEffect(() => {
const decision = computeUnofficialOverrideDecision(
unofficialAvailable,
getUrlParam('i_metric'),
lastUnofficialMetricOverrideRef.current,
);
lastUnofficialMetricOverrideRef.current = decision.nextKey;
if (decision.shouldOverride) {
setSelectedYAxisMetric('y_outputTputPerGpu');
}
}, [unofficialAvailable]);

const { graphs, hardwareConfig, hwColorOverrides } = useMemo(() => {
if (!mergeAsIngested) {
return {
graphs: officialGraphs,
hardwareConfig: officialHardwareConfig,
hwColorOverrides: {} as Record<string, string>,
};
}
const merged = mergeUnofficialIntoOfficial({
graphs: officialGraphs,
hardwareConfig: officialHardwareConfig,
unofficialChartData,
selectedModel,
selectedSequence: effectiveSequence,
selectedYAxisMetric,
selectedXAxisMetric,
selectedE2eXAxisMetric,
runIndexByUrl,
unofficialRunInfos: unofficialRunInfos.map((r) => ({
id: r.id,
branch: r.branch,
url: r.url,
})),
chartDefinitions: chartDefinitions as ChartDefinition[],
});
return {
graphs: merged.graphs,
hardwareConfig: merged.hardwareConfig,
hwColorOverrides: merged.colorOverrides,
};
}, [
mergeAsIngested,
officialGraphs,
officialHardwareConfig,
unofficialChartData,
selectedModel,
effectiveSequence,
selectedYAxisMetric,
selectedXAxisMetric,
selectedE2eXAxisMetric,
runIndexByUrl,
unofficialRunInfos,
]);

// For GPU comparison date picker — use shared availability data from global filters
const dbModelKeys = useMemo<string[]>(
() => DISPLAY_MODEL_TO_DB[selectedModel] ?? [selectedModel],
Expand Down Expand Up @@ -977,6 +1057,7 @@ export function InferenceProvider({
activePresetId,
setActivePresetId,
presetGuardRef,
hwColorOverrides,
}),
[
activeHwTypes,
Expand Down Expand Up @@ -1030,6 +1111,7 @@ export function InferenceProvider({
removeTrackedConfig,
clearTrackedConfigs,
activePresetId,
hwColorOverrides,
],
);

Expand Down
6 changes: 6 additions & 0 deletions packages/app/src/components/inference/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,12 @@ export interface InferenceChartContextType {
activePresetId: string | null;
setActivePresetId: (id: string | null) => void;
presetGuardRef: React.RefObject<boolean>;
/**
* Per-hwKey CSS color overrides. Populated when unofficial-as-ingested
* merging is on so each synthesized run series gets the same per-run color
* the overlay legend would have shown. Empty when nothing is merged.
*/
hwColorOverrides: Record<string, string>;
}
export interface CalculateUserCostsRequest {
model: string;
Expand Down
17 changes: 13 additions & 4 deletions packages/app/src/components/inference/ui/ChartDisplay.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,20 @@ export default function ChartDisplay() {
track('inference_view_changed', { view: value, chartIndex: index });
};

const { unofficialRunInfo, unofficialRunInfos, runIndexByUrl, getOverlayData, isUnofficialRun } =
useUnofficialRun();
const {
unofficialRunInfo,
unofficialRunInfos,
runIndexByUrl,
getOverlayData,
isUnofficialRun,
mergeAsIngested,
} = useUnofficialRun();

// Compute overlay data for each chart type — must match useChartData processing
// Compute overlay data for each chart type — must match useChartData processing.
// When `mergeAsIngested` is on, the unofficial rows are already promoted to
// official series via InferenceContext, so suppress the X-shape overlay layer.
const overlayDataByChartType = useMemo(() => {
if (!unofficialRunInfo || !getOverlayData) {
if (mergeAsIngested || !unofficialRunInfo || !getOverlayData) {
return { e2e: null, interactivity: null };
}

Expand Down Expand Up @@ -223,6 +231,7 @@ export default function ChartDisplay() {
interactivity: processData(interactivityRaw, 'interactivity'),
};
}, [
mergeAsIngested,
unofficialRunInfo,
unofficialRunInfos,
runIndexByUrl,
Expand Down
42 changes: 41 additions & 1 deletion packages/app/src/components/inference/ui/ScatterGraph.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,13 @@ const ScatterGraph = React.memo(
trackedConfigs,
addTrackedConfig,
removeTrackedConfig,
hwColorOverrides,
} = useInference();

const {
isUnofficialRun,
mergeAsIngested,
setMergeAsIngested,
activeOverlayHwTypes,
setActiveOverlayHwTypes,
allOverlayHwTypes,
Expand Down Expand Up @@ -218,12 +221,23 @@ const ScatterGraph = React.memo(
() => [...effectiveOfficialHwTypes],
[effectiveOfficialHwTypes],
);
const { resolveColor, getCssColor } = useThemeColors({
const { resolveColor: baseResolveColor, getCssColor } = useThemeColors({
highContrast,
identifiers: activeHwKeys,
activeKeys: activeOfficialKeys,
});

// Wrap resolveColor so synthesized unofficial-as-ingested hwKeys (provided
// by InferenceContext via `hwColorOverrides`) get their per-run palette
// color even when the vendor system would otherwise pick a GPU-derived hue.
const resolveColor = useCallback(
(identifier: string, hardwareKey?: string): string => {
if (identifier in hwColorOverrides) return hwColorOverrides[identifier];
return baseResolveColor(identifier, hardwareKey);
},
[baseResolveColor, hwColorOverrides],
);

// --- Changelog ---
const changelog = availableRuns ? availableRuns[selectedRunId]?.changelog || null : null;
const highlightConfigSuffixes = useMemo(() => {
Expand Down Expand Up @@ -1846,6 +1860,19 @@ const ScatterGraph = React.memo(
chartRef.current?.dismissTooltip();
}, [selectedPrecisions, selectedYAxisMetric, hideNonOptimal, overlayData, chartId]);

// Clean up overlay DOM elements when overlayData is removed (e.g. when
// unofficial-as-ingested is toggled on). The layer system has no built-in
// teardown for layers that drop out of the array, so the previous render's
// X-shape points / dashed rooflines would otherwise stick around.
useEffect(() => {
if (overlayData) return;
const svg = chartRef.current?.getSvgElement?.();
if (!svg) return;
const root = d3.select(svg);
root.selectAll('.unofficial-overlay-pt').remove();
root.selectAll('.overlay-roofline-path').remove();
}, [overlayData]);

// Dismiss when pinned point's hardware becomes hidden
useEffect(() => {
const pp = chartRef.current?.getPinnedPoint() as InferenceData | null;
Expand Down Expand Up @@ -2007,6 +2034,19 @@ const ScatterGraph = React.memo(
track('latency_legend_expanded', { expanded });
}}
switches={[
...(isUnofficialRun
? [
{
id: 'scatter-uoff-as-ingested',
label: 'Show as ingested',
checked: mergeAsIngested,
onCheckedChange: (checked: boolean) => {
setMergeAsIngested(checked);
track('latency_unofficial_as_ingested_toggled', { enabled: checked });
},
},
]
: []),
...(selectedYAxisMetric !== 'y_inputTputPerGpu'
? [
{
Expand Down
Loading
Loading