From cc580bf470763ccd6232e6a76ad59011819187e9 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 27 Apr 2026 12:39:32 -0700 Subject: [PATCH 1/6] feat(unofficial): support 950dt + huawei-mindie for unofficial runs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds Huawei Ascend 950DT GPU and Huawei MindIE framework so the Mock-ascend GitHub Actions run (25014782858) can be loaded via the unofficial-run viewer. TDP/power/cost are placeholder 9.99 — Huawei specs are not yet finalized. - HW_REGISTRY: new 950dt entry, vendor=Huawei, arch=Ascend, sort=9 - VENDOR_OKLCH_ZONES / VENDOR_HSL_ZONES: amber/yellow zone for Huawei (steals 30-60° HSL from AMD's lower band; AMD keeps 300-360 + 0-30) - FW_REGISTRY: huawei-mindie → "Huawei MindIE" - dynamic-colors Vendor type extended with 'huawei' - chart-utils BANNED_HUE_TEST/PREFERRED_ZONE entries for huawei (avoid AMD-red and NVIDIA-green clashes; preferred amber/yellow zone) Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/app/src/lib/chart-utils.ts | 6 ++++- packages/app/src/lib/dynamic-colors.ts | 3 ++- packages/constants/src/framework-aliases.ts | 1 + packages/constants/src/gpu-keys.ts | 28 ++++++++++++++++----- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/packages/app/src/lib/chart-utils.ts b/packages/app/src/lib/chart-utils.ts index c7c51a7c..5d522116 100644 --- a/packages/app/src/lib/chart-utils.ts +++ b/packages/app/src/lib/chart-utils.ts @@ -20,16 +20,19 @@ import { getVendor, type Vendor } from '@/lib/dynamic-colors'; * In Lab space: 0° = red, 90° = yellow, 180° = green, 270° = blue. * NVIDIA must not be red/rose/pink (wraps around 0°: 320–40°). * AMD must not be green (roughly 120–195°). + * Huawei must not be red (AMD zone) or green (NVIDIA zone). */ const BANNED_HUE_TEST: Record boolean) | null> = { nvidia: (hue) => hue >= 320 || hue <= 40, // red/rose/pink zone amd: (hue) => hue >= 120 && hue <= 195, // green zone + huawei: (hue) => hue >= 320 || hue <= 40 || (hue >= 120 && hue <= 195), // avoid red + green unknown: null, }; /** * Preferred hue ranges (CIELab) — used when a vendor has few items so they - * cluster in the brand-appropriate zone. NVIDIA = greens, AMD = reds/oranges. + * cluster in the brand-appropriate zone. NVIDIA = greens, AMD = reds/oranges, + * Huawei = amber/yellow. */ const PREFERRED_ZONE: Record< Vendor, @@ -37,6 +40,7 @@ const PREFERRED_ZONE: Record< > = { nvidia: { hmin: 100, hmax: 195 }, // greens/teals amd: { hmin: 20, hmax: 50, cmin: 70, lmin: 50 }, // vivid reds/oranges + huawei: { hmin: 50, hmax: 95, cmin: 60 }, // amber/yellow unknown: null, }; diff --git a/packages/app/src/lib/dynamic-colors.ts b/packages/app/src/lib/dynamic-colors.ts index 38b9e10e..410701c3 100644 --- a/packages/app/src/lib/dynamic-colors.ts +++ b/packages/app/src/lib/dynamic-colors.ts @@ -14,7 +14,7 @@ import { getModelSortIndex } from '@/lib/constants'; // Vendor detection // --------------------------------------------------------------------------- -export type Vendor = 'nvidia' | 'amd' | 'unknown'; +export type Vendor = 'nvidia' | 'amd' | 'huawei' | 'unknown'; /** Determine vendor from a hardware key by looking up GPU_VENDORS. */ export function getVendor(hwKey: string): Vendor { @@ -23,6 +23,7 @@ export function getVendor(hwKey: string): Vendor { const vendor = GPU_VENDORS[base]; if (vendor === 'NVIDIA') return 'nvidia'; if (vendor === 'AMD') return 'amd'; + if (vendor === 'Huawei') return 'huawei'; return 'unknown'; } diff --git a/packages/constants/src/framework-aliases.ts b/packages/constants/src/framework-aliases.ts index cc5eb6b4..6d34e543 100644 --- a/packages/constants/src/framework-aliases.ts +++ b/packages/constants/src/framework-aliases.ts @@ -9,6 +9,7 @@ export const FW_REGISTRY: Record = { 'dynamo-sglang': { label: 'Dynamo SGLang' }, 'dynamo-trt': { label: 'Dynamo TRT' }, 'dynamo-vllm': { label: 'Dynamo vLLM' }, + 'huawei-mindie': { label: 'Huawei MindIE' }, 'mori-sglang': { label: 'MoRI SGLang' }, sglang: { label: 'SGLang' }, trt: { label: 'TRT' }, diff --git a/packages/constants/src/gpu-keys.ts b/packages/constants/src/gpu-keys.ts index ec0ba96e..95590588 100644 --- a/packages/constants/src/gpu-keys.ts +++ b/packages/constants/src/gpu-keys.ts @@ -122,6 +122,17 @@ export const HW_REGISTRY: Record = { costn: 1.9, costr: 2.1, }, + '950dt': { + vendor: 'Huawei', + arch: 'Ascend', + label: 'Ascend 950DT', + sort: 9, + tdp: 9.99, + power: 9.99, + costh: 9.99, + costn: 9.99, + costr: 9.99, + }, }; /** Canonical set of GPU key strings used across all packages. */ @@ -146,7 +157,9 @@ export const GPU_VENDORS: Record = Object.fromEntries( * Layout (approximate): * 0-12 (gap) * 12-42 AMD reds/oranges - * 42-120 (gap) + * 42-60 (gap) + * 60-90 Huawei amber/yellow + * 90-120 (gap) * 120-170 NVIDIA greens * 170-275 (gap) * 275-330 unknown / fallback (purples) @@ -157,6 +170,7 @@ export const VENDOR_OKLCH_ZONES: Record< { start: number; end: number; chroma: { light: number; dark: number } } > = { amd: { start: 12, end: 42, chroma: { light: 0.18, dark: 0.22 } }, + huawei: { start: 60, end: 90, chroma: { light: 0.16, dark: 0.18 } }, nvidia: { start: 120, end: 170, chroma: { light: 0.15, dark: 0.15 } }, unknown: { start: 275, end: 330, chroma: { light: 0.14, dark: 0.16 } }, }; @@ -165,23 +179,25 @@ export const VENDOR_OKLCH_ZONES: Record< * Preferred HSL hue zones for high-contrast mode. * Each vendor gets a non-overlapping slice of the 360° hue wheel so items * from different vendors are visually distinct and vendor-appropriate - * (NVIDIA = greens, AMD = reds/oranges, unknown = blues/purples). + * (NVIDIA = greens, AMD = reds/oranges, Huawei = amber/yellow, unknown = blues/purples). * When a vendor has too many items to fit with sufficient spacing, the zone * expands symmetrically — these are preferred zones, not hard constraints. * * Layout (360° wheel): - * NVIDIA: 60–195 (135°) — greens through cyans - * AMD: 300–360 + 0–60 (120°, wraps) — magentas through oranges + * NVIDIA: 90–195 (105°) — greens through cyans + * Huawei: 30–60 (30°) — amber/yellow + * AMD: 300–360 + 0–30 (90°, wraps) — magentas through reds * unknown: 195–300 (105°) — blues/purples * * Each entry is an array of linear {start, span} segments (wrapping bands * are split into two segments). */ export const VENDOR_HSL_ZONES: Record = { - nvidia: [{ start: 60, span: 135 }], + nvidia: [{ start: 90, span: 105 }], + huawei: [{ start: 30, span: 30 }], amd: [ { start: 300, span: 60 }, - { start: 0, span: 60 }, + { start: 0, span: 30 }, ], unknown: [{ start: 195, span: 105 }], }; From c01af64859608cbba867c01d6d4f89c57d5a4316 Mon Sep 17 00:00:00 2001 From: Oseltamivir Date: Mon, 27 Apr 2026 13:15:35 -0700 Subject: [PATCH 2/6] feat(unofficial-run): fall back to per-config bmk_* artifacts The Mock-ascend workflow uploads one bmk__conc__ artifact per concurrency instead of a single aggregated results_bmk. The unofficial-run API was filtering strictly on name === 'results_bmk', returning 404 even though the per-config artifacts contained valid rows. When results_bmk is absent, gather every artifact whose name starts with 'bmk_', download each, and concat the rows before normalization. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../app/src/app/api/unofficial-run/route.ts | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/packages/app/src/app/api/unofficial-run/route.ts b/packages/app/src/app/api/unofficial-run/route.ts index 4e5b5265..1a84a4ba 100644 --- a/packages/app/src/app/api/unofficial-run/route.ts +++ b/packages/app/src/app/api/unofficial-run/route.ts @@ -217,15 +217,22 @@ async function processSingleRun( const bmkArtifact = artifacts .filter((a) => a.name === 'results_bmk') .toSorted((a, b) => b.id - a.id)[0]; + // Fallback: some workflows (e.g. the Mock-ascend uploader) emit one + // `bmk__conc__` artifact per concurrency instead of a + // single aggregated `results_bmk`. When the canonical artifact is absent, + // gather everything matching `bmk_*` and concatenate the rows. + const perConfigBmkArtifacts = bmkArtifact + ? [] + : artifacts.filter((a) => a.name.startsWith('bmk_')); const evalArtifact = artifacts .filter((a) => a.name === 'eval_results_all') .toSorted((a, b) => b.id - a.id)[0]; - if (!bmkArtifact && !evalArtifact) { + if (!bmkArtifact && perConfigBmkArtifacts.length === 0 && !evalArtifact) { return { errorResponse: NextResponse.json( { - error: `No results_bmk or eval_results_all artifact found for runId ${runId}`, + error: `No results_bmk, bmk_*, or eval_results_all artifact found for runId ${runId}`, }, { status: 404 }, ), @@ -246,6 +253,17 @@ async function processSingleRun( ); if (errorResponse) return { errorResponse }; benchmarks = normalizeArtifactRows(rows, date, runUrl || null); + } else if (perConfigBmkArtifacts.length > 0) { + const allRows: Record[] = []; + for (const artifact of perConfigBmkArtifacts) { + const { rows, errorResponse } = await downloadArtifactRows( + artifact.archive_download_url, + githubToken, + ); + if (errorResponse) return { errorResponse }; + allRows.push(...rows); + } + benchmarks = normalizeArtifactRows(allRows, date, runUrl || null); } if (evalArtifact) { From f1549edcb8a88d46f32265956a5fb4204581f1ca Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 6 May 2026 21:01:00 +0000 Subject: [PATCH 3/6] feat(unofficial): support 8K/256 sequence (8192/256) Registers '8k/256' (isl=8192, osl=256) so unofficial-run overlays from sweeps like InferenceX run 25457884421 render instead of being silently dropped by islOslToSequence returning null. - packages/constants/src/models.ts: add 8k/256 to both conversion maps. - packages/app/src/lib/data-mappings.ts: add Sequence.EightK_256 with SEQUENCE_CONFIG entry (label "8K / 256"). Replace the binary 1k-vs-8k ternary in getModelAndSequenceFromArtifact with islOslToSequence so future ISL/OSL pairs registered in constants are picked up automatically rather than mis-mapped. - Tests added for round-trip + label coverage in models.test.ts / models-mapping.test.ts and a dsv4 + 8192/256 case in data-mappings.test.ts. Co-authored-by: Bryan Shan --- packages/app/src/lib/data-mappings.test.ts | 10 ++++++++++ packages/app/src/lib/data-mappings.ts | 13 +++++++------ packages/app/src/lib/models-mapping.test.ts | 10 +++++++++- packages/constants/src/models.test.ts | 10 +++++++++- packages/constants/src/models.ts | 2 ++ 5 files changed, 37 insertions(+), 8 deletions(-) diff --git a/packages/app/src/lib/data-mappings.test.ts b/packages/app/src/lib/data-mappings.test.ts index d96d28a6..c79c52f2 100644 --- a/packages/app/src/lib/data-mappings.test.ts +++ b/packages/app/src/lib/data-mappings.test.ts @@ -124,6 +124,15 @@ describe('getModelAndSequenceFromArtifact', () => { expect(result).toEqual({ model: Model.Kimi_K2_5, sequence: Sequence.EightK_OneK }); }); + it('parses structured artifact with dsv4 prefix and 8k/256 ISL/OSL', () => { + const result = getModelAndSequenceFromArtifact({ + infmax_model_prefix: 'dsv4', + isl: 8192, + osl: 256, + }); + expect(result).toEqual({ model: Model.DeepSeek_V4_Pro, sequence: Sequence.EightK_256 }); + }); + it('returns undefined for unknown model prefix', () => { const result = getModelAndSequenceFromArtifact({ infmax_model_prefix: 'unknown', @@ -206,6 +215,7 @@ describe('getSequenceLabel', () => { expect(getSequenceLabel(Sequence.OneK_OneK)).toBe('1K / 1K'); expect(getSequenceLabel(Sequence.OneK_EightK)).toBe('1K / 8K'); expect(getSequenceLabel(Sequence.EightK_OneK)).toBe('8K / 1K'); + expect(getSequenceLabel(Sequence.EightK_256)).toBe('8K / 256'); }); it('falls back to the sequence value for unknown sequence', () => { diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index ad7d4ad7..2dcfd0f7 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -1,3 +1,5 @@ +import { islOslToSequence } from '@semianalysisai/inferencex-constants'; + export enum Model { Llama3_3_70B = 'Llama-3.3-70B-Instruct-FP8', Llama3_1_70B = 'Llama-3.1-70B-Instruct-FP8-KV', @@ -118,6 +120,7 @@ export enum Sequence { OneK_OneK = '1k/1k', OneK_EightK = '1k/8k', EightK_OneK = '8k/1k', + EightK_256 = '8k/256', } const SEQUENCE_CONFIG: Record = @@ -125,6 +128,7 @@ const SEQUENCE_CONFIG: Record { expect(sequenceToIslOsl('8k/1k')).toEqual({ isl: 8192, osl: 1024 }); }); + it('converts 8k/256 to 8192/256', () => { + expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('4k/4k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -67,13 +71,17 @@ describe('islOslToSequence', () => { expect(islOslToSequence(8192, 1024)).toBe('8k/1k'); }); + it('converts 8192/256 to 8k/256', () => { + expect(islOslToSequence(8192, 256)).toBe('8k/256'); + }); + it('returns null for unknown ISL/OSL combos', () => { expect(islOslToSequence(4096, 4096)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256']) { const islOsl = sequenceToIslOsl(seq)!; expect(islOslToSequence(islOsl.isl, islOsl.osl)).toBe(seq); } diff --git a/packages/constants/src/models.test.ts b/packages/constants/src/models.test.ts index 308c9c2a..515fcf65 100644 --- a/packages/constants/src/models.test.ts +++ b/packages/constants/src/models.test.ts @@ -39,6 +39,10 @@ describe('sequenceToIslOsl', () => { expect(sequenceToIslOsl('8k/1k')).toEqual({ isl: 8192, osl: 1024 }); }); + it('parses 8k/256 to 8192/256', () => { + expect(sequenceToIslOsl('8k/256')).toEqual({ isl: 8192, osl: 256 }); + }); + it('returns null for unknown sequences', () => { expect(sequenceToIslOsl('2k/2k')).toBeNull(); expect(sequenceToIslOsl('')).toBeNull(); @@ -55,13 +59,17 @@ describe('islOslToSequence', () => { expect(islOslToSequence(1024, 8192)).toBe('1k/8k'); }); + it('converts 8192/256 to 8k/256', () => { + expect(islOslToSequence(8192, 256)).toBe('8k/256'); + }); + it('returns null for unmapped ISL/OSL pairs', () => { expect(islOslToSequence(2048, 2048)).toBeNull(); expect(islOslToSequence(0, 0)).toBeNull(); }); it('round-trips with sequenceToIslOsl for all known sequences', () => { - for (const seq of ['1k/1k', '1k/8k', '8k/1k']) { + for (const seq of ['1k/1k', '1k/8k', '8k/1k', '8k/256']) { const parsed = sequenceToIslOsl(seq)!; expect(islOslToSequence(parsed.isl, parsed.osl)).toBe(seq); } diff --git a/packages/constants/src/models.ts b/packages/constants/src/models.ts index c75034c7..83b6540c 100644 --- a/packages/constants/src/models.ts +++ b/packages/constants/src/models.ts @@ -41,6 +41,7 @@ export function sequenceToIslOsl(seq: string): { isl: number; osl: number } | nu '1k/1k': { isl: 1024, osl: 1024 }, '1k/8k': { isl: 1024, osl: 8192 }, '8k/1k': { isl: 8192, osl: 1024 }, + '8k/256': { isl: 8192, osl: 256 }, }; return map[seq] ?? null; } @@ -51,6 +52,7 @@ export function islOslToSequence(isl: number, osl: number): string | null { '1024_1024': '1k/1k', '1024_8192': '1k/8k', '8192_1024': '8k/1k', + '8192_256': '8k/256', }; return map[`${isl}_${osl}`] ?? null; } From 5e327a74346815689b8710a60b441bd7d8c2af32 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 04:26:56 +0000 Subject: [PATCH 4/6] feat(unofficial): toggle to render unofficial-run rows as ingested series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a "Show as ingested" switch in the inference scatter legend (visible when an unofficial run is loaded) that promotes overlay rows to first-class points so they participate in the same filter pipeline as DB-ingested data: hardware toggles, Optimal-only, precision filtering, and per-(hwKey, run) roofline grouping. Each (run, original hwKey) pair is rewritten to a synthesized hwKey of the form `${origHwKey}__uorun${runId}` — preserving the GPU base prefix so `getModelSortIndex` and `isKnownGpu` keep working — with a synthesized HardwareEntry whose label embeds the run branch and a per-run color override drawn from the existing overlay palette. Multiple runs and per-run multi-GPU configs each surface as their own legend entry. URL-synced via `i_uoff_ingested=1` so the state survives reloads and shares. The X-shape overlay layer is suppressed and stale DOM is cleaned up when the toggle flips on (the layer system has no built-in teardown for layers that drop out of the array). Unit tests cover synth-key encoding, no-op behavior when no overlay matches, and multi-run separation. Co-authored-by: Bryan Shan --- packages/app/cypress/support/mock-data.ts | 3 + .../components/inference/InferenceContext.tsx | 62 +++- .../app/src/components/inference/types.ts | 6 + .../components/inference/ui/ChartDisplay.tsx | 17 +- .../components/inference/ui/ScatterGraph.tsx | 42 ++- .../components/unofficial-run-provider.tsx | 37 ++ packages/app/src/lib/unofficial-merge.test.ts | 320 ++++++++++++++++++ packages/app/src/lib/unofficial-merge.ts | 220 ++++++++++++ packages/app/src/lib/url-state.ts | 2 + 9 files changed, 702 insertions(+), 7 deletions(-) create mode 100644 packages/app/src/lib/unofficial-merge.test.ts create mode 100644 packages/app/src/lib/unofficial-merge.ts diff --git a/packages/app/cypress/support/mock-data.ts b/packages/app/cypress/support/mock-data.ts index 467cbd59..0b9769c2 100644 --- a/packages/app/cypress/support/mock-data.ts +++ b/packages/app/cypress/support/mock-data.ts @@ -246,6 +246,7 @@ export function createMockInferenceContext( activePresetId: null, setActivePresetId: namedStub('setActivePresetId'), presetGuardRef: { current: false } as React.RefObject, + hwColorOverrides: {}, ...overrides, }; } @@ -435,6 +436,8 @@ export function createMockUnofficialRunContext( ): UnofficialRunContextType { return { isUnofficialRun: false, + mergeAsIngested: false, + setMergeAsIngested: namedStub('setMergeAsIngested'), unofficialRunInfo: null, unofficialRunInfos: [], runIndexByUrl: {}, diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index 633a6269..8f0c23c9 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -17,10 +17,14 @@ import { FAVORITE_PRESETS, type FavoritePreset } from '@/components/favorites/fa import { useGlobalFilters } from '@/components/GlobalFilterContext'; import type { + ChartDefinition, InferenceChartContextType, InferenceData, TrackedConfig, } from '@/components/inference/types'; +import { useUnofficialRun } from '@/components/unofficial-run-provider'; +import chartDefinitions from '@/components/inference/inference-chart-config.json'; +import { mergeUnofficialIntoOfficial } from '@/lib/unofficial-merge'; import { Button } from '@/components/ui/button'; import { Dialog, @@ -144,10 +148,10 @@ export function InferenceProvider({ const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined; const { - graphs, + graphs: officialGraphs, loading: chartDataLoading, error: chartDataError, - hardwareConfig, + hardwareConfig: officialHardwareConfig, } = useChartData( selectedModel, effectiveSequence, @@ -165,6 +169,58 @@ export function InferenceProvider({ latestDate, ); + // ── Promote unofficial rows to first-class series when toggled ──────────── + // When `mergeAsIngested` is on, overlay points are re-keyed with per-run + // synth hwKeys and merged into `graphs` so they participate in the same + // filter/optimal-only/legend pipeline as official data. The resulting + // `hwColorOverrides` map is consumed by ScatterGraph's color resolver. + const { mergeAsIngested, unofficialChartData, unofficialRunInfos, runIndexByUrl } = + useUnofficialRun(); + + const { graphs, hardwareConfig, hwColorOverrides } = useMemo(() => { + if (!mergeAsIngested) { + return { + graphs: officialGraphs, + hardwareConfig: officialHardwareConfig, + hwColorOverrides: {} as Record, + }; + } + const merged = mergeUnofficialIntoOfficial({ + graphs: officialGraphs, + hardwareConfig: officialHardwareConfig, + unofficialChartData, + selectedModel, + selectedSequence: effectiveSequence, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos: unofficialRunInfos.map((r) => ({ + id: r.id, + branch: r.branch, + url: r.url, + })), + chartDefinitions: chartDefinitions as ChartDefinition[], + }); + return { + graphs: merged.graphs, + hardwareConfig: merged.hardwareConfig, + hwColorOverrides: merged.colorOverrides, + }; + }, [ + mergeAsIngested, + officialGraphs, + officialHardwareConfig, + unofficialChartData, + selectedModel, + effectiveSequence, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos, + ]); + // For GPU comparison date picker — use shared availability data from global filters const dbModelKeys = useMemo( () => DISPLAY_MODEL_TO_DB[selectedModel] ?? [selectedModel], @@ -833,6 +889,7 @@ export function InferenceProvider({ activePresetId, setActivePresetId, presetGuardRef, + hwColorOverrides, }), [ activeHwTypes, @@ -884,6 +941,7 @@ export function InferenceProvider({ removeTrackedConfig, clearTrackedConfigs, activePresetId, + hwColorOverrides, ], ); diff --git a/packages/app/src/components/inference/types.ts b/packages/app/src/components/inference/types.ts index 365923da..db67b2e8 100644 --- a/packages/app/src/components/inference/types.ts +++ b/packages/app/src/components/inference/types.ts @@ -527,6 +527,12 @@ export interface InferenceChartContextType { activePresetId: string | null; setActivePresetId: (id: string | null) => void; presetGuardRef: React.RefObject; + /** + * Per-hwKey CSS color overrides. Populated when unofficial-as-ingested + * merging is on so each synthesized run series gets the same per-run color + * the overlay legend would have shown. Empty when nothing is merged. + */ + hwColorOverrides: Record; } export interface CalculateUserCostsRequest { model: string; diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx index 9b563bfc..5f8c8f68 100644 --- a/packages/app/src/components/inference/ui/ChartDisplay.tsx +++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx @@ -165,12 +165,20 @@ export default function ChartDisplay() { track('inference_view_changed', { view: value, chartIndex: index }); }; - const { unofficialRunInfo, unofficialRunInfos, runIndexByUrl, getOverlayData, isUnofficialRun } = - useUnofficialRun(); + const { + unofficialRunInfo, + unofficialRunInfos, + runIndexByUrl, + getOverlayData, + isUnofficialRun, + mergeAsIngested, + } = useUnofficialRun(); - // Compute overlay data for each chart type — must match useChartData processing + // Compute overlay data for each chart type — must match useChartData processing. + // When `mergeAsIngested` is on, the unofficial rows are already promoted to + // official series via InferenceContext, so suppress the X-shape overlay layer. const overlayDataByChartType = useMemo(() => { - if (!unofficialRunInfo || !getOverlayData) { + if (mergeAsIngested || !unofficialRunInfo || !getOverlayData) { return { e2e: null, interactivity: null }; } @@ -224,6 +232,7 @@ export default function ChartDisplay() { interactivity: processData(interactivityRaw, 'interactivity'), }; }, [ + mergeAsIngested, unofficialRunInfo, unofficialRunInfos, runIndexByUrl, diff --git a/packages/app/src/components/inference/ui/ScatterGraph.tsx b/packages/app/src/components/inference/ui/ScatterGraph.tsx index ba8b6a42..35cf8129 100644 --- a/packages/app/src/components/inference/ui/ScatterGraph.tsx +++ b/packages/app/src/components/inference/ui/ScatterGraph.tsx @@ -144,10 +144,13 @@ const ScatterGraph = React.memo( trackedConfigs, addTrackedConfig, removeTrackedConfig, + hwColorOverrides, } = useInference(); const { isUnofficialRun, + mergeAsIngested, + setMergeAsIngested, activeOverlayHwTypes, setActiveOverlayHwTypes, allOverlayHwTypes, @@ -213,12 +216,23 @@ const ScatterGraph = React.memo( () => [...effectiveOfficialHwTypes], [effectiveOfficialHwTypes], ); - const { resolveColor, getCssColor } = useThemeColors({ + const { resolveColor: baseResolveColor, getCssColor } = useThemeColors({ highContrast, identifiers: activeHwKeys, activeKeys: activeOfficialKeys, }); + // Wrap resolveColor so synthesized unofficial-as-ingested hwKeys (provided + // by InferenceContext via `hwColorOverrides`) get their per-run palette + // color even when the vendor system would otherwise pick a GPU-derived hue. + const resolveColor = useCallback( + (identifier: string, hardwareKey?: string): string => { + if (identifier in hwColorOverrides) return hwColorOverrides[identifier]; + return baseResolveColor(identifier, hardwareKey); + }, + [baseResolveColor, hwColorOverrides], + ); + // --- Changelog --- const changelog = availableRuns ? availableRuns[selectedRunId]?.changelog || null : null; const highlightConfigSuffixes = useMemo(() => { @@ -1610,6 +1624,19 @@ const ScatterGraph = React.memo( chartRef.current?.dismissTooltip(); }, [selectedPrecisions, selectedYAxisMetric, hideNonOptimal, overlayData, chartId]); + // Clean up overlay DOM elements when overlayData is removed (e.g. when + // unofficial-as-ingested is toggled on). The layer system has no built-in + // teardown for layers that drop out of the array, so the previous render's + // X-shape points / dashed rooflines would otherwise stick around. + useEffect(() => { + if (overlayData) return; + const svg = chartRef.current?.getSvgElement?.(); + if (!svg) return; + const root = d3.select(svg); + root.selectAll('.unofficial-overlay-pt').remove(); + root.selectAll('.overlay-roofline-path').remove(); + }, [overlayData]); + // Dismiss when pinned point's hardware becomes hidden useEffect(() => { const pp = chartRef.current?.getPinnedPoint() as InferenceData | null; @@ -1771,6 +1798,19 @@ const ScatterGraph = React.memo( track('latency_legend_expanded', { expanded }); }} switches={[ + ...(isUnofficialRun + ? [ + { + id: 'scatter-uoff-as-ingested', + label: 'Show as ingested', + checked: mergeAsIngested, + onCheckedChange: (checked: boolean) => { + setMergeAsIngested(checked); + track('latency_unofficial_as_ingested_toggled', { enabled: checked }); + }, + }, + ] + : []), ...(selectedYAxisMetric !== 'y_inputTputPerGpu' ? [ { diff --git a/packages/app/src/components/unofficial-run-provider.tsx b/packages/app/src/components/unofficial-run-provider.tsx index 9de84519..c56a2ed9 100644 --- a/packages/app/src/components/unofficial-run-provider.tsx +++ b/packages/app/src/components/unofficial-run-provider.tsx @@ -51,6 +51,15 @@ interface AvailableModelSequence { export interface UnofficialRunContextType { isUnofficialRun: boolean; + /** + * When true, unofficial-run rows are promoted to first-class series in the + * inference scatter — each (run, GPU config) pair becomes its own legend + * entry with the run's branch name, and the rows participate in the same + * filter pipeline as ingested data (Optimal-only, hardware toggles, etc.) + * instead of rendering as a separate X-shape overlay. + */ + mergeAsIngested: boolean; + setMergeAsIngested: (v: boolean) => void; /** First run in the loaded set — kept as a convenience alias for overlay labels. */ unofficialRunInfo: UnofficialRunInfo | null; /** All runs loaded from the `unofficialrun(s)` URL param (comma-separated). */ @@ -176,6 +185,32 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { AvailableModelSequence[] >([]); + // Promote unofficial rows to ingested-style series. Initial value seeded + // synchronously when running in the browser so the toggle starts checked + // when the user shares a URL like `?unofficialrun=…&i_uoff_ingested=1`. + // Under SSR the value is false; we sync from the URL again after mount via + // the popstate listener attached below. + const [mergeAsIngested, setMergeAsIngestedRaw] = useState(() => { + if (typeof window === 'undefined') return false; + const sp = new URLSearchParams(window.location.search); + return sp.get('i_uoff_ingested') === '1'; + }); + // Re-sync after hydration in case the server rendered with the SSR default. + useEffect(() => { + if (typeof window === 'undefined') return; + const sp = new URLSearchParams(window.location.search); + const fromUrl = sp.get('i_uoff_ingested') === '1'; + setMergeAsIngestedRaw((prev) => (prev !== fromUrl ? fromUrl : prev)); + }, []); + const setMergeAsIngested = useCallback((v: boolean) => { + setMergeAsIngestedRaw(v); + if (typeof window === 'undefined') return; + const url = new URL(window.location.href); + if (v) url.searchParams.set('i_uoff_ingested', '1'); + else url.searchParams.delete('i_uoff_ingested'); + window.history.replaceState({}, '', url); + }, []); + // --- Shared overlay toggle state (unified across both charts) --- const [activeOverlayHwTypes, setActiveOverlayHwTypes] = useState>(new Set()); const [localOfficialOverride, setLocalOfficialOverrideRaw] = useState | null>(null); @@ -398,6 +433,8 @@ export function UnofficialRunProvider({ children }: { children: ReactNode }) { 0, + mergeAsIngested, + setMergeAsIngested, unofficialRunInfo, unofficialRunInfos, runIndexByUrl, diff --git a/packages/app/src/lib/unofficial-merge.test.ts b/packages/app/src/lib/unofficial-merge.test.ts new file mode 100644 index 00000000..971953b6 --- /dev/null +++ b/packages/app/src/lib/unofficial-merge.test.ts @@ -0,0 +1,320 @@ +import { describe, expect, it } from 'vitest'; + +import type { + ChartDefinition, + HardwareConfig, + InferenceData, + RenderableGraph, +} from '@/components/inference/types'; + +import { + isSynthHwKey, + makeSynthHwKey, + mergeUnofficialIntoOfficial, + parseSynthHwKey, + type UnofficialChartDataMap, +} from './unofficial-merge'; + +const E2E_DEF: ChartDefinition = { + chartType: 'e2e', + x: 'median_e2el', + y: 'tput_per_gpu', + x_label: 'End-to-end Latency (s)', + y_label: 'Throughput per GPU (tok/s/GPU)', + heading: 'Throughput vs Latency', + y_tpPerGpu_label: 'Throughput per GPU (tok/s/GPU)', +} as unknown as ChartDefinition; + +const INTERACTIVITY_DEF: ChartDefinition = { + chartType: 'interactivity', + x: 'median_intvty', + y: 'tput_per_gpu', + x_label: 'Interactivity (tok/s/user)', + y_label: 'Throughput per GPU (tok/s/GPU)', + heading: 'Throughput vs Interactivity', + y_tpPerGpu_label: 'Throughput per GPU (tok/s/GPU)', +} as unknown as ChartDefinition; + +const CHART_DEFS: ChartDefinition[] = [E2E_DEF, INTERACTIVITY_DEF]; + +function makeOverlayPoint(overrides: Partial = {}): InferenceData { + return { + hwKey: 'h100_vllm', + precision: 'fp8', + tp: 8, + conc: 64, + x: 0, + y: 0, + median_e2el: 2.3, + median_intvty: 12.5, + p99_ttft: 0.35, + median_ttft: 0.15, + tpPerGpu: { y: 450.5, roof: false }, + date: '2026-04-01', + run_url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100', + ...overrides, + } as InferenceData; +} + +function makeOverlayChartData(): UnofficialChartDataMap { + const e2eData = [ + makeOverlayPoint({ conc: 32 }), + makeOverlayPoint({ + hwKey: 'a100_sglang', + conc: 64, + tpPerGpu: { y: 200.1, roof: false }, + }), + ]; + const interactivityData = [ + makeOverlayPoint({ conc: 32 }), + makeOverlayPoint({ + hwKey: 'a100_sglang', + conc: 64, + tpPerGpu: { y: 200.1, roof: false }, + }), + ]; + const gpus: HardwareConfig = { + h100_vllm: { name: 'h100_vllm', label: 'H100', suffix: '(VLLM)', gpu: 'NVIDIA H100' }, + a100_sglang: { name: 'a100_sglang', label: 'A100', suffix: '(SGLANG)', gpu: 'NVIDIA A100' }, + }; + return { + 'DeepSeek-R1-0528_1k/1k': { + e2e: { data: e2eData, gpus }, + interactivity: { data: interactivityData, gpus }, + }, + }; +} + +function emptyOfficial(): { graphs: RenderableGraph[]; hardwareConfig: HardwareConfig } { + return { + graphs: [ + { model: 'DeepSeek-R1-0528', sequence: '1k/1k', chartDefinition: E2E_DEF, data: [] }, + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: INTERACTIVITY_DEF, + data: [], + }, + ], + hardwareConfig: {}, + }; +} + +const RUN_INDEX = { + 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100': 0, + '100': 0, + 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200': 1, + '200': 1, +}; + +const RUN_INFOS = [ + { + id: 100, + branch: 'feature-branch-a', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/100', + }, + { + id: 200, + branch: 'feature-branch-b', + url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200', + }, +]; + +describe('synth hwKey helpers', () => { + it('encodes runId into hwKey while preserving the GPU base prefix', () => { + const synth = makeSynthHwKey('h100_vllm', 100); + expect(synth).toBe('h100_vllm__uorun100'); + // Critical: the base GPU is still recoverable via split('_')[0] so + // getModelSortIndex / isKnownGpu keep working. + expect(synth.split('_')[0]).toBe('h100'); + }); + + it('round-trips through parseSynthHwKey', () => { + const synth = makeSynthHwKey('a100_sglang', 200); + expect(parseSynthHwKey(synth)).toEqual({ origHwKey: 'a100_sglang', runId: 200 }); + }); + + it('parseSynthHwKey returns null for non-synth keys', () => { + expect(parseSynthHwKey('h100_vllm')).toBeNull(); + expect(parseSynthHwKey('mi300x')).toBeNull(); + }); + + it('isSynthHwKey detects synthesized keys', () => { + expect(isSynthHwKey(makeSynthHwKey('h100', 100))).toBe(true); + expect(isSynthHwKey('h100_vllm')).toBe(false); + }); +}); + +describe('mergeUnofficialIntoOfficial', () => { + it('is a no-op when unofficialChartData is null', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: null, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: {}, + unofficialRunInfos: [], + }); + expect(result.graphs).toBe(graphs); + expect(result.hardwareConfig).toBe(hardwareConfig); + expect(result.colorOverrides).toEqual({}); + }); + + it('is a no-op when no overlay group matches the selected (model, sequence)', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'gpt-oss-120b', // not present in overlay map + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + expect(result.graphs).toBe(graphs); + expect(result.colorOverrides).toEqual({}); + }); + + it('rewrites overlay rows with synth hwKeys and adds matching hardwareConfig + colorOverrides', () => { + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Each chart graph received both overlay rows (different GPUs, both run 100). + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + expect(e2eGraph.data).toHaveLength(2); + const synthKeys = e2eGraph.data.map((d) => d.hwKey); + expect(synthKeys).toContain('h100_vllm__uorun100'); + expect(synthKeys).toContain('a100_sglang__uorun100'); + + // The synth keys are present in hardwareConfig with branch-bearing labels. + const h100Synth = result.hardwareConfig['h100_vllm__uorun100']; + expect(h100Synth.label).toBe('H100 • feature-branch-a'); + expect(h100Synth.gpu).toContain('UNOFFICIAL: feature-branch-a'); + + // Color overrides are populated for each synth key (palette-based, not GPU-vendor). + expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); + expect(result.colorOverrides['a100_sglang__uorun100']).toBe('var(--overlay-run-0)'); + }); + + it('keeps multiple runs separate so each (run, GPU) becomes its own legend entry', () => { + const data = makeOverlayChartData(); + // Inject a second run's row alongside the first. + const secondRunPoint = makeOverlayPoint({ + hwKey: 'h100_vllm', + run_url: 'https://github.com/SemiAnalysisAI/InferenceX/actions/runs/200', + tpPerGpu: { y: 460, roof: false }, + }); + data['DeepSeek-R1-0528_1k/1k'].e2e.data.push(secondRunPoint); + data['DeepSeek-R1-0528_1k/1k'].interactivity.data.push(secondRunPoint); + + const { graphs, hardwareConfig } = emptyOfficial(); + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: data, + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + // Same physical GPU (h100_vllm) appears twice — once per run — with distinct + // synth keys so they form separate roofline groups in the scatter chart. + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + const h100Keys = e2eGraph.data + .map((d) => d.hwKey) + .filter((k) => String(k).startsWith('h100_vllm__uorun')); + expect(h100Keys).toContain('h100_vllm__uorun100'); + expect(h100Keys).toContain('h100_vllm__uorun200'); + + expect(result.hardwareConfig['h100_vllm__uorun200'].label).toBe('H100 • feature-branch-b'); + expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); + expect(result.colorOverrides['h100_vllm__uorun200']).toBe('var(--overlay-run-1)'); + }); + + it('preserves official rows alongside merged overlay rows', () => { + const { hardwareConfig } = emptyOfficial(); + const officialPoint = { + hwKey: 'b200_trt', + precision: 'fp4', + tp: 4, + conc: 8, + x: 1.5, + y: 800, + date: '2026-03-01', + } as InferenceData; + const graphs: RenderableGraph[] = [ + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: E2E_DEF, + data: [officialPoint], + }, + { + model: 'DeepSeek-R1-0528', + sequence: '1k/1k', + chartDefinition: INTERACTIVITY_DEF, + data: [officialPoint], + }, + ]; + + const result = mergeUnofficialIntoOfficial({ + graphs, + hardwareConfig, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + }); + + const e2eGraph = result.graphs.find((g) => g.chartDefinition.chartType === 'e2e')!; + expect(e2eGraph.data.some((d) => d.hwKey === 'b200_trt')).toBe(true); + expect(e2eGraph.data.some((d) => String(d.hwKey).startsWith('h100_vllm__uorun'))).toBe(true); + }); + + it('synthesizes stub graphs from chartDefinitions when official graphs is empty', () => { + const result = mergeUnofficialIntoOfficial({ + graphs: [], + hardwareConfig: {}, + unofficialChartData: makeOverlayChartData(), + selectedModel: 'DeepSeek-R1-0528', + selectedSequence: '1k/1k', + selectedYAxisMetric: 'y_tpPerGpu', + selectedXAxisMetric: null, + selectedE2eXAxisMetric: null, + runIndexByUrl: RUN_INDEX, + unofficialRunInfos: RUN_INFOS, + chartDefinitions: CHART_DEFS, + }); + + // Two stub graphs synthesized (e2e + interactivity), each carrying merged overlay rows. + expect(result.graphs).toHaveLength(2); + expect(result.graphs.every((g) => g.data.length > 0)).toBe(true); + }); +}); diff --git a/packages/app/src/lib/unofficial-merge.ts b/packages/app/src/lib/unofficial-merge.ts new file mode 100644 index 00000000..e29db2e8 --- /dev/null +++ b/packages/app/src/lib/unofficial-merge.ts @@ -0,0 +1,220 @@ +/** + * Helpers for promoting unofficial-run benchmark rows to first-class + * "ingested-style" series so they participate in the regular scatter + * filter pipeline (Optimal-only, hardware toggles, precision filter, etc.) + * instead of being rendered as a separate overlay layer. + * + * Each (run, original hwKey) pair gets a synthesized hardware key of the form + * `${origHwKey}__uorun${runId}` + * — preserving the base GPU as `hwKey.split('_')[0]` so `getModelSortIndex` + * and `isKnownGpu` keep working — while still being unique per run so a single + * job with multiple GPUs surfaces as separate legend entries, and multiple + * runs don't collapse onto each other. + */ +import type { + ChartDefinition, + HardwareConfig, + InferenceData, + RenderableGraph, +} from '@/components/inference/types'; +import { processOverlayChartData } from '@/components/inference/utils'; +import type { HardwareEntry } from '@/lib/constants'; +import { overlayRunColor, overlayRunIndex } from '@/lib/overlay-run-style'; + +const SYNTH_KEY_DELIM = '__uorun'; + +export interface UnofficialRunInfoLite { + id: number; + branch: string; + url: string; +} + +export interface OverlayChartGroup { + e2e: { data: InferenceData[]; gpus: HardwareConfig }; + interactivity: { data: InferenceData[]; gpus: HardwareConfig }; +} + +export type UnofficialChartDataMap = Record; + +/** Build a unique per-run hwKey while keeping the original GPU base prefix. */ +export function makeSynthHwKey(origHwKey: string, runId: number): string { + return `${origHwKey}${SYNTH_KEY_DELIM}${runId}`; +} + +/** Reverse the encoding produced by {@link makeSynthHwKey}. */ +export function parseSynthHwKey(hwKey: string): { origHwKey: string; runId: number } | null { + const idx = hwKey.indexOf(SYNTH_KEY_DELIM); + if (idx === -1) return null; + const origHwKey = hwKey.slice(0, idx); + const runId = Number(hwKey.slice(idx + SYNTH_KEY_DELIM.length)); + if (!Number.isFinite(runId)) return null; + return { origHwKey, runId }; +} + +export function isSynthHwKey(hwKey: string): boolean { + return hwKey.includes(SYNTH_KEY_DELIM); +} + +function makeSynthHardwareEntry( + origEntry: HardwareEntry | undefined, + origHwKey: string, + run: UnofficialRunInfoLite, + synthHwKey: string, +): HardwareEntry { + const branch = run.branch || `run ${run.id}`; + const baseLabel = origEntry?.label ?? origHwKey; + return { + name: synthHwKey.replaceAll('_', '-'), + label: `${baseLabel} • ${branch}`, + suffix: origEntry?.suffix ?? '', + gpu: origEntry?.gpu ? `${origEntry.gpu} (UNOFFICIAL: ${branch})` : `UNOFFICIAL: ${branch}`, + framework: origEntry?.framework, + }; +} + +interface MergeArgs { + graphs: RenderableGraph[]; + hardwareConfig: HardwareConfig; + /** + * Per-(model_sequence) overlay chart data, indexed exactly as produced by + * {@link unofficial-run-provider#buildChartData}. We look up the entry for + * the currently-selected `${model}_${sequence}` key. + */ + unofficialChartData: UnofficialChartDataMap | null; + selectedModel: string; + selectedSequence: string; + selectedYAxisMetric: string; + selectedXAxisMetric: string | null; + selectedE2eXAxisMetric: string | null; + runIndexByUrl: Record; + unofficialRunInfos: UnofficialRunInfoLite[]; + /** + * Chart definitions to fall back on when `graphs` is empty. Lets the merger + * synthesize stub graphs so unofficial-only data (e.g. a model with no DB + * coverage but an unofficial sweep) still renders when the toggle is on. + * Optional — when omitted and `graphs` is empty, the merge is a no-op. + */ + chartDefinitions?: ChartDefinition[]; +} + +export interface MergeResult { + graphs: RenderableGraph[]; + hardwareConfig: HardwareConfig; + /** Map from synth hwKey → CSS color. ScatterGraph consults this before falling back to vendor colors. */ + colorOverrides: Record; +} + +/** + * Inject overlay rows into the official `graphs` as first-class points with + * synthesized per-run hwKeys, returning extended `hardwareConfig` and a + * color-override map for ScatterGraph's `resolveColor`. + * + * If `unofficialChartData` is null or has no rows for the selected + * (model, sequence), the result mirrors the input verbatim — the merge is a + * no-op and downstream behavior is unchanged. + */ +export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { + const { + graphs: inputGraphs, + hardwareConfig, + unofficialChartData, + selectedModel, + selectedSequence, + selectedYAxisMetric, + selectedXAxisMetric, + selectedE2eXAxisMetric, + runIndexByUrl, + unofficialRunInfos, + chartDefinitions, + } = args; + + const dataKey = `${selectedModel}_${selectedSequence}`; + const overlayGroup = unofficialChartData?.[dataKey]; + if (!overlayGroup) { + return { graphs: inputGraphs, hardwareConfig, colorOverrides: {} }; + } + + // When there are no official graphs but caller supplied chartDefinitions, + // synthesize empty stubs so the merge still has a place to inject points. + const graphs: RenderableGraph[] = + inputGraphs.length === 0 && chartDefinitions + ? buildStubGraphsForMerge(selectedModel, selectedSequence, chartDefinitions) + : inputGraphs; + + const mergedHardwareConfig: HardwareConfig = { ...hardwareConfig }; + const colorOverrides: Record = {}; + + /** + * Process overlay rows for one chart type: re-key by (run, origHwKey), + * synthesize HardwareEntry/colorOverride entries on first encounter, and + * apply the same metric/x-axis pipeline that `useChartData` runs on + * official rows so the resulting points sit in the same coordinate space. + */ + const processForChart = ( + chartType: 'e2e' | 'interactivity', + rawRows: InferenceData[], + overlayHwConfig: HardwareConfig, + ): InferenceData[] => { + if (rawRows.length === 0) return []; + const effectiveXMetric = chartType === 'e2e' ? selectedE2eXAxisMetric : selectedXAxisMetric; + const processed = processOverlayChartData( + rawRows, + chartType, + selectedYAxisMetric, + effectiveXMetric, + ); + return processed.map((row) => { + const runIdx = overlayRunIndex(row.run_url ?? null, runIndexByUrl); + const run = unofficialRunInfos[runIdx] ?? unofficialRunInfos[0]; + // No runs known (defensive — provider always populates one when overlay + // data exists). Fall back to the original hwKey untouched. + if (!run) return row; + const origHwKey = String(row.hwKey); + const synthHwKey = makeSynthHwKey(origHwKey, run.id); + if (!(synthHwKey in mergedHardwareConfig)) { + const origEntry = hardwareConfig[origHwKey] ?? overlayHwConfig[origHwKey]; + mergedHardwareConfig[synthHwKey] = makeSynthHardwareEntry( + origEntry, + origHwKey, + run, + synthHwKey, + ); + colorOverrides[synthHwKey] = overlayRunColor(runIdx); + } + return { ...row, hwKey: synthHwKey }; + }); + }; + + const mergedGraphs: RenderableGraph[] = graphs.map((g) => { + const ct = g.chartDefinition.chartType as 'e2e' | 'interactivity'; + const overlayRows = ct === 'e2e' ? overlayGroup.e2e.data : overlayGroup.interactivity.data; + const overlayHwCfg = ct === 'e2e' ? overlayGroup.e2e.gpus : overlayGroup.interactivity.gpus; + const merged = processForChart(ct, overlayRows, overlayHwCfg); + if (merged.length === 0) return g; + return { ...g, data: [...g.data, ...merged] }; + }); + + return { + graphs: mergedGraphs, + hardwareConfig: mergedHardwareConfig, + colorOverrides, + }; +} + +/** + * Build empty-data stub graphs from chart definitions, used when the official + * model has no DB data but we still want the unofficial rows to render after + * merge. Mirrors `effectiveGraphs` in ChartDisplay's no-data fallback. + */ +export function buildStubGraphsForMerge( + selectedModel: string, + selectedSequence: string, + chartDefinitions: ChartDefinition[], +): RenderableGraph[] { + return chartDefinitions.map((chartDefinition) => ({ + model: selectedModel, + sequence: selectedSequence, + chartDefinition, + data: [] as InferenceData[], + })); +} diff --git a/packages/app/src/lib/url-state.ts b/packages/app/src/lib/url-state.ts index e770caea..bcd79a01 100644 --- a/packages/app/src/lib/url-state.ts +++ b/packages/app/src/lib/url-state.ts @@ -37,6 +37,7 @@ const URL_STATE_KEYS = [ 'i_advlabel', 'i_gradlabel', 'i_linelabel', + 'i_uoff_ingested', // Evaluation 'e_rundate', 'e_bench', @@ -76,6 +77,7 @@ export const PARAM_DEFAULTS: Record = { i_advlabel: '', i_gradlabel: '', i_linelabel: '', + i_uoff_ingested: '', e_rundate: '', e_bench: '', e_hc: '', From 323d1b4e1f0a677ffab2f442e97a424c171fe577 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 06:25:04 +0000 Subject: [PATCH 5/6] feat(unofficial): use vendor-zone colors for ingested unofficial rows Drops the branch suffix from synth-key labels and the per-run color override so unofficial rows promoted via "Show as ingested" share the same vendor-aware palette as official rows. Two NVIDIA GPUs from one unofficial run now get distinct shades of green instead of one shared overlay-palette red. The branch + run URL still surface in `gpu` so the row tooltip retains provenance. Co-authored-by: Bryan Shan --- packages/app/src/lib/unofficial-merge.test.ts | 80 ++++++++++++++++--- packages/app/src/lib/unofficial-merge.ts | 29 +++++-- 2 files changed, 93 insertions(+), 16 deletions(-) diff --git a/packages/app/src/lib/unofficial-merge.test.ts b/packages/app/src/lib/unofficial-merge.test.ts index 971953b6..713dd2f7 100644 --- a/packages/app/src/lib/unofficial-merge.test.ts +++ b/packages/app/src/lib/unofficial-merge.test.ts @@ -6,6 +6,7 @@ import type { InferenceData, RenderableGraph, } from '@/components/inference/types'; +import { generateVendorColors, getVendor } from '@/lib/dynamic-colors'; import { isSynthHwKey, @@ -183,7 +184,7 @@ describe('mergeUnofficialIntoOfficial', () => { expect(result.colorOverrides).toEqual({}); }); - it('rewrites overlay rows with synth hwKeys and adds matching hardwareConfig + colorOverrides', () => { + it('rewrites overlay rows with synth hwKeys and adds matching hardwareConfig (no color override)', () => { const { graphs, hardwareConfig } = emptyOfficial(); const result = mergeUnofficialIntoOfficial({ graphs, @@ -205,14 +206,19 @@ describe('mergeUnofficialIntoOfficial', () => { expect(synthKeys).toContain('h100_vllm__uorun100'); expect(synthKeys).toContain('a100_sglang__uorun100'); - // The synth keys are present in hardwareConfig with branch-bearing labels. + // The synth keys are present in hardwareConfig with bare GPU labels — the + // branch is intentionally NOT in the legend label (the run is still + // recoverable from `gpu` for the row tooltip). const h100Synth = result.hardwareConfig['h100_vllm__uorun100']; - expect(h100Synth.label).toBe('H100 • feature-branch-a'); + expect(h100Synth.label).toBe('H100'); + expect(h100Synth.label).not.toContain('feature-branch-a'); expect(h100Synth.gpu).toContain('UNOFFICIAL: feature-branch-a'); - // Color overrides are populated for each synth key (palette-based, not GPU-vendor). - expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); - expect(result.colorOverrides['a100_sglang__uorun100']).toBe('var(--overlay-run-0)'); + // No color overrides are populated — colors fall through to the + // vendor-aware system in dynamic-colors.ts so two NVIDIA GPUs from a + // single unofficial run get distinct shades of green instead of one + // shared overlay-palette color. + expect(result.colorOverrides).toEqual({}); }); it('keeps multiple runs separate so each (run, GPU) becomes its own legend entry', () => { @@ -249,9 +255,18 @@ describe('mergeUnofficialIntoOfficial', () => { expect(h100Keys).toContain('h100_vllm__uorun100'); expect(h100Keys).toContain('h100_vllm__uorun200'); - expect(result.hardwareConfig['h100_vllm__uorun200'].label).toBe('H100 • feature-branch-b'); - expect(result.colorOverrides['h100_vllm__uorun100']).toBe('var(--overlay-run-0)'); - expect(result.colorOverrides['h100_vllm__uorun200']).toBe('var(--overlay-run-1)'); + // Both runs of the same GPU get the bare GPU label — visual disambiguation + // is done by the vendor-zone color system, which assigns distinct hues + // within the same vendor band. Provenance still surfaces via `gpu`. + expect(result.hardwareConfig['h100_vllm__uorun100'].label).toBe('H100'); + expect(result.hardwareConfig['h100_vllm__uorun200'].label).toBe('H100'); + expect(result.hardwareConfig['h100_vllm__uorun100'].gpu).toContain( + 'UNOFFICIAL: feature-branch-a', + ); + expect(result.hardwareConfig['h100_vllm__uorun200'].gpu).toContain( + 'UNOFFICIAL: feature-branch-b', + ); + expect(result.colorOverrides).toEqual({}); }); it('preserves official rows alongside merged overlay rows', () => { @@ -318,3 +333,50 @@ describe('mergeUnofficialIntoOfficial', () => { expect(result.graphs.every((g) => g.data.length > 0)).toBe(true); }); }); + +// Pull a hue out of an `oklch(L C H)` string for assertions below. +function hueOf(s: string): number { + const m = s.match(/oklch\([^)]*\s+([\d.]+)\)/); + return m ? Number(m[1]) : NaN; +} + +describe('synth hwKey color integration with generateVendorColors', () => { + // Regression: previously, two NVIDIA GPUs from one unofficial run shared a + // single overlay-palette color (e.g. both rendered red), making B200 and + // B300 visually identical. Now the merge omits color overrides and the + // vendor-zone palette assigns each synth key its own hue within the + // vendor's band. + it('assigns distinct shades within the vendor zone to two NVIDIA GPUs from one unofficial run', () => { + const synthKeys = [makeSynthHwKey('b200_vllm', 100), makeSynthHwKey('b300_vllm', 100)]; + expect(getVendor(synthKeys[0])).toBe('nvidia'); + expect(getVendor(synthKeys[1])).toBe('nvidia'); + const colors = generateVendorColors(synthKeys, 'light'); + expect(colors[synthKeys[0]]).toBeDefined(); + expect(colors[synthKeys[1]]).toBeDefined(); + expect(colors[synthKeys[0]]).not.toBe(colors[synthKeys[1]]); + }); + + it('keeps NVIDIA synth keys inside the NVIDIA hue zone and AMD synth keys inside AMD', () => { + const nvidiaSynth = makeSynthHwKey('b200_vllm', 100); + const amdSynth = makeSynthHwKey('mi300x_sglang', 100); + const colors = generateVendorColors([nvidiaSynth, amdSynth], 'light'); + // VENDOR_OKLCH_ZONES.nvidia is 120–170 (greens/teals). + const nvidiaHue = hueOf(colors[nvidiaSynth]); + expect(nvidiaHue).toBeGreaterThanOrEqual(120); + expect(nvidiaHue).toBeLessThanOrEqual(170); + // VENDOR_OKLCH_ZONES.amd is 12–42 (reds/oranges). + const amdHue = hueOf(colors[amdSynth]); + expect(amdHue).toBeGreaterThanOrEqual(12); + expect(amdHue).toBeLessThanOrEqual(42); + }); + + it('does not pin two unofficial runs of the same GPU to one color', () => { + // Both synth keys share the `b200_vllm` base, so they fall in the same + // sort bucket — but generateVendorColors still spreads them across + // distinct hues within the NVIDIA zone. + const a = makeSynthHwKey('b200_vllm', 100); + const b = makeSynthHwKey('b200_vllm', 200); + const colors = generateVendorColors([a, b], 'light'); + expect(colors[a]).not.toBe(colors[b]); + }); +}); diff --git a/packages/app/src/lib/unofficial-merge.ts b/packages/app/src/lib/unofficial-merge.ts index e29db2e8..248a1e62 100644 --- a/packages/app/src/lib/unofficial-merge.ts +++ b/packages/app/src/lib/unofficial-merge.ts @@ -19,7 +19,7 @@ import type { } from '@/components/inference/types'; import { processOverlayChartData } from '@/components/inference/utils'; import type { HardwareEntry } from '@/lib/constants'; -import { overlayRunColor, overlayRunIndex } from '@/lib/overlay-run-style'; +import { overlayRunIndex } from '@/lib/overlay-run-style'; const SYNTH_KEY_DELIM = '__uorun'; @@ -63,9 +63,12 @@ function makeSynthHardwareEntry( ): HardwareEntry { const branch = run.branch || `run ${run.id}`; const baseLabel = origEntry?.label ?? origHwKey; + // Legend label intentionally drops the branch — the color (assigned by the + // shared vendor-zone palette) is what disambiguates runs/GPUs visually. + // Branch + run URL stay in `gpu` so the row tooltip still shows provenance. return { name: synthHwKey.replaceAll('_', '-'), - label: `${baseLabel} • ${branch}`, + label: baseLabel, suffix: origEntry?.suffix ?? '', gpu: origEntry?.gpu ? `${origEntry.gpu} (UNOFFICIAL: ${branch})` : `UNOFFICIAL: ${branch}`, framework: origEntry?.framework, @@ -100,7 +103,14 @@ interface MergeArgs { export interface MergeResult { graphs: RenderableGraph[]; hardwareConfig: HardwareConfig; - /** Map from synth hwKey → CSS color. ScatterGraph consults this before falling back to vendor colors. */ + /** + * Map from synth hwKey → CSS color. ScatterGraph consults this before falling + * back to vendor colors. Currently empty — synth keys preserve the original + * GPU base prefix (`b200_vllm__uorun123`), so the standard + * `generateVendorColors` pipeline picks a vendor-appropriate hue for each + * synth key automatically. The override map is retained so callers can still + * pin a specific color per synth key if needed. + */ colorOverrides: Record; } @@ -146,9 +156,15 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { /** * Process overlay rows for one chart type: re-key by (run, origHwKey), - * synthesize HardwareEntry/colorOverride entries on first encounter, and - * apply the same metric/x-axis pipeline that `useChartData` runs on - * official rows so the resulting points sit in the same coordinate space. + * synthesize a HardwareEntry on first encounter, and apply the same + * metric/x-axis pipeline that `useChartData` runs on official rows so the + * resulting points sit in the same coordinate space. + * + * No color override is set: the synth hwKey preserves the original GPU base + * prefix, so the standard vendor-zone color generator distributes hues + * across all (official + synth) keys for a vendor automatically — that's + * how two NVIDIA GPUs from one unofficial run end up as different shades + * of green rather than two copies of the same overlay-palette color. */ const processForChart = ( chartType: 'e2e' | 'interactivity', @@ -179,7 +195,6 @@ export function mergeUnofficialIntoOfficial(args: MergeArgs): MergeResult { run, synthHwKey, ); - colorOverrides[synthHwKey] = overlayRunColor(runIdx); } return { ...row, hwKey: synthHwKey }; }); From 462dc0f6222a47775c3efe4cdba3c2298b608af1 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 21:09:38 +0000 Subject: [PATCH 6/6] feat(unofficial): default to 8K/256 + Output Token Throughput per GPU on unofficial-run load Temporary branch-only override: when a `?unofficialrun=` URL loads and the user did not pin `i_seq` / `i_metric` themselves, default the sequence to `8K / 256` and the y-axis metric to `Output Token Throughput per GPU` so InfiniteBench-style sweeps land on a useful default view alongside the model auto-switch from #243. Mirrors the dedupe shape of `computeAutoSwitchDecision` via a new pure helper `computeUnofficialOverrideDecision` so manual user picks stick once URL-synced and a fresh run-set transition can re-arm the override. Co-authored-by: Bryan Shan --- .../src/components/GlobalFilterContext.tsx | 22 ++++- .../components/inference/InferenceContext.tsx | 28 ++++++- .../lib/unofficial-run-auto-switch.test.ts | 80 ++++++++++++++++++- .../app/src/lib/unofficial-run-auto-switch.ts | 41 ++++++++++ 4 files changed, 167 insertions(+), 4 deletions(-) diff --git a/packages/app/src/components/GlobalFilterContext.tsx b/packages/app/src/components/GlobalFilterContext.tsx index b7e12e08..089d385f 100644 --- a/packages/app/src/components/GlobalFilterContext.tsx +++ b/packages/app/src/components/GlobalFilterContext.tsx @@ -25,7 +25,10 @@ import { Sequence, SEQUENCE_OPTIONS, } from '@/lib/data-mappings'; -import { computeAutoSwitchDecision } from '@/lib/unofficial-run-auto-switch'; +import { + computeAutoSwitchDecision, + computeUnofficialOverrideDecision, +} from '@/lib/unofficial-run-auto-switch'; import type { AvailabilityRow, WorkflowInfoResponse } from '@/lib/api'; interface RunInfo { @@ -201,6 +204,23 @@ export function GlobalFilterProvider({ children }: { children: ReactNode }) { } }, [unofficialAvailable, selectedModel]); + // TEMPORARY (this branch only): default the sequence to `8K / 256` when an + // unofficial run loads and the URL didn't pin `i_seq`. Same dedupe shape as + // the model auto-switch above — manual sequence picks stick because the URL + // gets `i_seq` written by the URL-sync effect after the override fires. + const lastUnofficialSeqOverrideRef = useRef(''); + useEffect(() => { + const decision = computeUnofficialOverrideDecision( + unofficialAvailable, + getUrlParam('i_seq'), + lastUnofficialSeqOverrideRef.current, + ); + lastUnofficialSeqOverrideRef.current = decision.nextKey; + if (decision.shouldOverride) { + setSelectedSequence(Sequence.EightK_256); + } + }, [unofficialAvailable]); + // Sequences available for the selected model (DB ∪ unofficial run for this model) const availableSequences = useMemo(() => { const unofficialSeqs = unofficialAvailable diff --git a/packages/app/src/components/inference/InferenceContext.tsx b/packages/app/src/components/inference/InferenceContext.tsx index bd1c2e21..2eb9da10 100644 --- a/packages/app/src/components/inference/InferenceContext.tsx +++ b/packages/app/src/components/inference/InferenceContext.tsx @@ -29,6 +29,7 @@ import type { import { useUnofficialRun } from '@/components/unofficial-run-provider'; import chartDefinitions from '@/components/inference/inference-chart-config.json'; import { mergeUnofficialIntoOfficial } from '@/lib/unofficial-merge'; +import { computeUnofficialOverrideDecision } from '@/lib/unofficial-run-auto-switch'; import { Button } from '@/components/ui/button'; import { Dialog, @@ -200,8 +201,31 @@ export function InferenceProvider({ // synth hwKeys and merged into `graphs` so they participate in the same // filter/optimal-only/legend pipeline as official data. The resulting // `hwColorOverrides` map is consumed by ScatterGraph's color resolver. - const { mergeAsIngested, unofficialChartData, unofficialRunInfos, runIndexByUrl } = - useUnofficialRun(); + const { + mergeAsIngested, + unofficialChartData, + unofficialRunInfos, + runIndexByUrl, + availableModelsAndSequences: unofficialAvailable, + } = useUnofficialRun(); + + // TEMPORARY (this branch only): default the y-axis metric to "Output Token + // Throughput per GPU" when an unofficial run loads and the URL didn't pin + // `i_metric`. Mirrors the sequence override in GlobalFilterContext — manual + // metric picks stick because the URL gets `i_metric` written after the + // override fires. + const lastUnofficialMetricOverrideRef = useRef(''); + useEffect(() => { + const decision = computeUnofficialOverrideDecision( + unofficialAvailable, + getUrlParam('i_metric'), + lastUnofficialMetricOverrideRef.current, + ); + lastUnofficialMetricOverrideRef.current = decision.nextKey; + if (decision.shouldOverride) { + setSelectedYAxisMetric('y_outputTputPerGpu'); + } + }, [unofficialAvailable]); const { graphs, hardwareConfig, hwColorOverrides } = useMemo(() => { if (!mergeAsIngested) { diff --git a/packages/app/src/lib/unofficial-run-auto-switch.test.ts b/packages/app/src/lib/unofficial-run-auto-switch.test.ts index f58776ad..485ca237 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.test.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.test.ts @@ -3,7 +3,10 @@ import { describe, expect, it } from 'vitest'; import type { AvailableModelSequence } from '@/components/unofficial-run-provider'; import { Model, Sequence } from '@/lib/data-mappings'; -import { computeAutoSwitchDecision } from './unofficial-run-auto-switch'; +import { + computeAutoSwitchDecision, + computeUnofficialOverrideDecision, +} from './unofficial-run-auto-switch'; function entry(model: Model, sequence: Sequence): AvailableModelSequence { return { model, sequence, precisions: [] }; @@ -112,3 +115,78 @@ describe('computeAutoSwitchDecision', () => { expect(a.nextKey).toBe(b.nextKey); }); }); + +describe('computeUnofficialOverrideDecision', () => { + it('returns no-op and resets the key when no unofficial run is loaded', () => { + expect(computeUnofficialOverrideDecision([], undefined, 'stale-key')).toEqual({ + nextKey: '', + shouldOverride: false, + }); + }); + + it('fires the override on a fresh run set when the URL does not pin the param', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const decision = computeUnofficialOverrideDecision(run, undefined, ''); + expect(decision.shouldOverride).toBe(true); + expect(decision.nextKey).toBe(Model.DeepSeek_V4_Pro); + }); + + it('respects an explicit URL pin even on a fresh run set', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const decision = computeUnofficialOverrideDecision(run, '1k/1k', ''); + expect(decision.shouldOverride).toBe(false); + // Ref must not be advanced — if the URL is later cleared we still want + // a fresh load of the same run to fire the override. + expect(decision.nextKey).toBe(''); + }); + + it('does not re-fire after the override has already been applied for this run set', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const lastKey = Model.DeepSeek_V4_Pro; + const decision = computeUnofficialOverrideDecision(run, undefined, lastKey); + expect(decision.shouldOverride).toBe(false); + expect(decision.nextKey).toBe(lastKey); + }); + + it('re-arms after the overlay set is cleared so a subsequent load can override again', () => { + const run = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const first = computeUnofficialOverrideDecision(run, undefined, ''); + expect(first.shouldOverride).toBe(true); + + const cleared = computeUnofficialOverrideDecision([], undefined, first.nextKey); + expect(cleared).toEqual({ nextKey: '', shouldOverride: false }); + + const run2 = [entry(Model.Kimi_K2_5, Sequence.OneK_OneK)]; + const second = computeUnofficialOverrideDecision(run2, undefined, cleared.nextKey); + expect(second.shouldOverride).toBe(true); + }); + + it('ignores sequence-only deltas in the dedupe key', () => { + const oneK = [entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK)]; + const both = [ + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.EightK_OneK), + ]; + const first = computeUnofficialOverrideDecision(oneK, undefined, ''); + const second = computeUnofficialOverrideDecision(both, undefined, first.nextKey); + expect(first.nextKey).toBe(second.nextKey); + expect(second.shouldOverride).toBe(false); + }); + + it('produces a deterministic key across insertion orders', () => { + const orderA = [ + entry(Model.MiniMax_M2_5, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + ]; + const orderB = [ + entry(Model.Kimi_K2_5, Sequence.OneK_OneK), + entry(Model.DeepSeek_V4_Pro, Sequence.OneK_OneK), + entry(Model.MiniMax_M2_5, Sequence.OneK_OneK), + ]; + const a = computeUnofficialOverrideDecision(orderA, undefined, ''); + const b = computeUnofficialOverrideDecision(orderB, undefined, ''); + expect(a.nextKey).toBe(b.nextKey); + expect(a.shouldOverride).toBe(b.shouldOverride); + }); +}); diff --git a/packages/app/src/lib/unofficial-run-auto-switch.ts b/packages/app/src/lib/unofficial-run-auto-switch.ts index a4af4683..1910e95c 100644 --- a/packages/app/src/lib/unofficial-run-auto-switch.ts +++ b/packages/app/src/lib/unofficial-run-auto-switch.ts @@ -46,3 +46,44 @@ export function computeAutoSwitchDecision( } return { nextKey: key, modelToSet: sortedModels[0] }; } + +export interface UnofficialOverrideDecision { + /** New value the caller should write into the dedupe ref. */ + nextKey: string; + /** Whether the caller should apply the temporary override. */ + shouldOverride: boolean; +} + +/** + * TEMPORARY (this branch only): when an unofficial run loads, override the + * default sequence to `8K / 256` and the default y-axis metric to "Output + * Token Throughput per GPU" so the InfiniteBench-style sweeps land on a + * useful default view. Mirrors the dedupe behavior of + * {@link computeAutoSwitchDecision} so manual user changes stick once they + * are URL-synced, and a fresh run-set transition can re-arm the override. + * + * - When the overlay set is empty, the dedupe key is reset. + * - When the URL pinned the corresponding param explicitly, no override + * fires (respect intent). + * - The dedupe key is the sorted unique list of overlay models — same shape + * as the auto-switch key — so a sequence-only delta does not invalidate a + * manual user pick. + */ +export function computeUnofficialOverrideDecision( + unofficialAvailable: AvailableModelSequence[], + urlValue: string | undefined, + lastKey: string, +): UnofficialOverrideDecision { + if (unofficialAvailable.length === 0) { + return { nextKey: '', shouldOverride: false }; + } + if (urlValue) { + return { nextKey: lastKey, shouldOverride: false }; + } + const sortedModels = [...new Set(unofficialAvailable.map((a) => a.model))].toSorted(); + const key = sortedModels.join(','); + if (lastKey === key) { + return { nextKey: lastKey, shouldOverride: false }; + } + return { nextKey: key, shouldOverride: true }; +}