From 511177521f8c31f00be5237eb6563e95c9c5956a Mon Sep 17 00:00:00 2001 From: johnnysoftware7 <6782926+johnnysoftware7@users.noreply.github.com> Date: Sat, 9 May 2026 10:15:21 -0400 Subject: [PATCH] =?UTF-8?q?test:=20bump=20remaining=20claude-opus-4-6=20?= =?UTF-8?q?=E2=86=92=204-7=20references?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test suite migration to claude-opus-4-7 in v1.x left 11 references to claude-opus-4-6 across 6 test files. Bumping for consistency with the rest of the suite (test/helpers/providers/claude.ts, pricing.ts, and the new skill-e2e-opus-47.test.ts already use 4-7). --- test/helpers/eval-store.ts | 2 +- test/skill-e2e-design.test.ts | 4 ++-- test/skill-e2e-plan.test.ts | 12 ++++++------ test/skill-e2e-qa-bugs.test.ts | 2 +- test/skill-e2e-review.test.ts | 2 +- test/skill-e2e-workflow.test.ts | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/helpers/eval-store.ts b/test/helpers/eval-store.ts index 9942f1e372..9a801ae1c2 100644 --- a/test/helpers/eval-store.ts +++ b/test/helpers/eval-store.ts @@ -68,7 +68,7 @@ export interface EvalTestEntry { last_tool_call?: string; // e.g. "Write(review-output.md)" // Model + timing diagnostics (added for Sonnet/Opus split) - model?: string; // e.g. 'claude-sonnet-4-6' or 'claude-opus-4-6' + model?: string; // e.g. 'claude-sonnet-4-6' or 'claude-opus-4-7' first_response_ms?: number; // time from spawn to first NDJSON line max_inter_turn_ms?: number; // peak latency between consecutive tool calls diff --git a/test/skill-e2e-design.test.ts b/test/skill-e2e-design.test.ts index a207965f54..123d522b5c 100644 --- a/test/skill-e2e-design.test.ts +++ b/test/skill-e2e-design.test.ts @@ -103,7 +103,7 @@ Write DESIGN.md and CLAUDE.md (or update it) in the working directory.`, timeout: 360_000, testName: 'design-consultation-core', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/design-consultation core', result); @@ -227,7 +227,7 @@ Skip research. Skip font preview. Skip any AskUserQuestion calls — this is non timeout: 360_000, testName: 'design-consultation-existing', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/design-consultation existing', result); diff --git a/test/skill-e2e-plan.test.ts b/test/skill-e2e-plan.test.ts index 269c889c39..cb630ca97d 100644 --- a/test/skill-e2e-plan.test.ts +++ b/test/skill-e2e-plan.test.ts @@ -82,7 +82,7 @@ Focus on reviewing the plan content: architecture, error handling, security, and timeout: 360_000, testName: 'plan-ceo-review', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/plan-ceo-review', result); @@ -167,7 +167,7 @@ Focus on reviewing the plan content: architecture, error handling, security, and timeout: 360_000, testName: 'plan-ceo-review-selective', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/plan-ceo-review (SELECTIVE)', result); @@ -233,7 +233,7 @@ Write your expansion proposals to ${planDir}/proposals.md with ONLY the proposal timeout: 360_000, testName: 'plan-ceo-review-expansion-energy', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/plan-ceo-review (EXPANSION ENERGY)', result); @@ -333,7 +333,7 @@ Focus on architecture, code quality, tests, and performance sections.`, timeout: 360_000, testName: 'plan-eng-review', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/plan-eng-review', result); @@ -459,7 +459,7 @@ Write your review to ${planDir}/review-output.md`, timeout: 360_000, testName: 'plan-eng-review-artifact', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/plan-eng-review artifact', result); @@ -679,7 +679,7 @@ This review report at the bottom of the plan is the MOST IMPORTANT deliverable o timeout: 360_000, testName: 'plan-review-report', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/plan-eng-review report', result); diff --git a/test/skill-e2e-qa-bugs.test.ts b/test/skill-e2e-qa-bugs.test.ts index f9fa8a6793..93514295f2 100644 --- a/test/skill-e2e-qa-bugs.test.ts +++ b/test/skill-e2e-qa-bugs.test.ts @@ -100,7 +100,7 @@ CRITICAL RULES: timeout: 300_000, testName: `qa-${label}`, runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost(`/qa ${label}`, result); diff --git a/test/skill-e2e-review.test.ts b/test/skill-e2e-review.test.ts index 0e0bca0258..1adbe25c71 100644 --- a/test/skill-e2e-review.test.ts +++ b/test/skill-e2e-review.test.ts @@ -514,7 +514,7 @@ Analyze the git history and produce the narrative report as described in the SKI timeout: 300_000, testName: 'retro', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/retro', result); diff --git a/test/skill-e2e-workflow.test.ts b/test/skill-e2e-workflow.test.ts index ee08290e8e..52892a50df 100644 --- a/test/skill-e2e-workflow.test.ts +++ b/test/skill-e2e-workflow.test.ts @@ -503,7 +503,7 @@ Write the full output (including the GATE verdict) to ${codexDir}/codex-output.m timeout: 300_000, testName: 'codex-review', runId, - model: 'claude-opus-4-6', + model: 'claude-opus-4-7', }); logCost('/codex review', result);