From ac228df108a458fc12622cabbb7be36dcb1b89af Mon Sep 17 00:00:00 2001 From: Fernando Date: Thu, 7 May 2026 17:34:37 -0300 Subject: [PATCH] Add governed Claude model profiles --- .claude/skills/opusminimax/SKILL.md | 13 + .claude/skills/opussonnet/SKILL.md | 8 + .claude/skills/opusworkflow/SKILL.md | 30 ++ .../green/valid-sonnet-model-profile-run.json | 67 +++++ AGENTS.md | 1 + CLAUDE.md | 6 +- README.md | 17 +- SPEC.md | 271 +++++++++++------- docs/harness-capability-map.json | 40 +-- schemas/opusminimax-run.schema.json | 8 +- scripts/artifact-lint.sh | 47 ++- scripts/opusminimax-doctor.sh | 30 +- scripts/opusminimax.sh | 140 +++++++-- scripts/opussonnetworkflow.sh | 1 + scripts/opusworkflow-smoke.sh | 42 ++- scripts/opusworkflow.sh | 70 ++++- scripts/test-harness.sh | 4 + 17 files changed, 639 insertions(+), 156 deletions(-) create mode 100644 .taste/fixtures/artifact-lint/green/valid-sonnet-model-profile-run.json diff --git a/.claude/skills/opusminimax/SKILL.md b/.claude/skills/opusminimax/SKILL.md index f630735..dc83e63 100644 --- a/.claude/skills/opusminimax/SKILL.md +++ b/.claude/skills/opusminimax/SKILL.md @@ -41,6 +41,10 @@ Worker summaries are claims until verified by diffs, logs, tests, or artifacts. - If `executor_provider=claude-sonnet` is explicit, treat it as the optional Claude-only `/opussonnet` route: no MiniMax base URL, executor model must be Sonnet, and the run artifact must not imply MiniMax executed anything. +- If `model_profile=sonnet|opus|default|custom` is explicit, treat it as a + governed Anthropic-only route: no MiniMax base URL, no MiniMax executor model, + and no runtime model identity claim without `/status`, sentinel, or artifact + proof. - Run `/introspect` before plan freeze, after executor execution, after failed verification, and before push or ship decisions. - Run `/verify` against `SPEC.md` after executor aggregation. @@ -68,6 +72,8 @@ Record: - planner profile path and whether it is provider-neutral - executor profile path and whether it uses `MiniMax-M2.7-highspeed` +- selected `model_profile` and whether it is default cost-optimized or an + explicit user override - requested planner model - requested executor model - local capacity ceiling @@ -207,6 +213,7 @@ When `/opusminimax` executes or prepares a real run, produce: "run_id": "YYYYMMDD-HHMMSS-task", "outer_route": "opusworkflow", "inner_contract": "workflow", + "model_profile": "minimax", "executor_provider": "minimax", "planner_identity_status": "blocked", "executor_identity_status": "configured", @@ -219,6 +226,12 @@ When `/opusminimax` executes or prepares a real run, produce: "planner_requested": "claude-opus-4-7", "executor_requested": "MiniMax-M2.7-highspeed" }, + "model_route": { + "profile": "minimax", + "planner": {"provider": "anthropic", "requested_model": "claude-opus-4-7", "identity_status": "blocked"}, + "executor": {"provider": "minimax", "requested_model": "MiniMax-M2.7-highspeed", "identity_status": "configured"}, + "fallback_policy": "fail-closed-unless-explicit" + }, "capacity": { "local_ceiling": 10, "provider_ceiling": 1, diff --git a/.claude/skills/opussonnet/SKILL.md b/.claude/skills/opussonnet/SKILL.md index 8e9ed35..528d40e 100644 --- a/.claude/skills/opussonnet/SKILL.md +++ b/.claude/skills/opussonnet/SKILL.md @@ -25,6 +25,8 @@ MiniMax is not required for this optional route. - This is a suggested alternative, not the standard default. The default `/opusworkflow` route remains Claude/Opus judgment plus MiniMax-M2.7-highspeed execution. +- It is also available as `/opusworkflow --model-profile opussonnet` or the + backward-compatible `/opusworkflow --executor-provider claude-sonnet`. - Use the same governed `/workflow` lifecycle: research brief, SPEC, bounded implementation, `/introspect`, `/verify`, and command-backed closeout. - Use Claude Code's `opusplan` behavior for the interactive session when @@ -75,6 +77,12 @@ claude /opussonnet "build or fix the thing" ``` +Equivalent static artifact preparation: + +```bash +bash scripts/opusworkflow.sh --task "build or fix the thing" --model-profile opussonnet +``` + ## Anti-Patterns - Presenting `opussonnet` as the default MiniMax-backed budget strategy. diff --git a/.claude/skills/opusworkflow/SKILL.md b/.claude/skills/opusworkflow/SKILL.md index 48d83cd..8e2a10a 100644 --- a/.claude/skills/opusworkflow/SKILL.md +++ b/.claude/skills/opusworkflow/SKILL.md @@ -26,6 +26,17 @@ Optional Claude-only sibling: Opus 4.7 planning/judgment and Sonnet 4.6 execution, with no MiniMax token. ``` +Model-profile selector: + +```text +--model-profile minimax # default: Opus judgment + MiniMax execution +--model-profile opussonnet # Opus judgment + Sonnet execution +--model-profile sonnet # Sonnet planning + Sonnet execution +--model-profile opus # Opus planning + Opus execution, explicit high-cost route +--model-profile default # Claude Code account default +--model-profile custom --planner-model MODEL --executor-model MODEL +``` + ## Contract - Treat `/opusworkflow` as `/opusminimax --mode workflow` with stricter budget @@ -33,6 +44,8 @@ Opus 4.7 planning/judgment and Sonnet 4.6 execution, with no MiniMax token. - Keep MiniMax as the standard executor provider. Use `/opussonnet` or `--executor-provider claude-sonnet` only when the operator explicitly wants the optional Claude-only route. +- Allow explicit model freedom through `--model-profile`; treat it as a + governed route request, not runtime identity proof. - Record the specialist being executed as `inner_contract=workflow|agentfactory|hiveworkflow|parallel|defineicp|deepretaste|demo|visualizeworkflow`. - If the task asks for Hermes, Hive, ICP/taste mutation, approved @@ -86,6 +99,15 @@ execution: claude-sonnet-4-6 through opusplan/Sonnet profile MiniMax token: not required ``` +Explicit Anthropic-only profiles: + +```text +sonnet: claude-sonnet-4-6 for planning and execution +opus: claude-opus-4-7 for planning and execution; use intentionally +default: Claude Code account default; confirm with /status before claims +custom: explicit planner/executor model IDs; static gates only prove request shape +``` + ## Workflow 1. Run provider/capacity preflight: @@ -117,6 +139,14 @@ For the optional Claude-only executor: bash scripts/opusworkflow.sh --task "$ARGUMENTS" --executor-provider claude-sonnet ``` +For explicit model profiles: + +```bash +bash scripts/opusworkflow.sh --task "$ARGUMENTS" --model-profile sonnet +bash scripts/opusworkflow.sh --task "$ARGUMENTS" --model-profile opus +bash scripts/opusworkflow.sh --task "$ARGUMENTS" --model-profile custom --planner-model claude-sonnet-4-6 --executor-model claude-sonnet-4-6 +``` + 4. MiniMax executes only planner-approved packets with owned paths, forbidden paths, allowed commands, acceptance checks, rollback notes, and stop conditions. diff --git a/.taste/fixtures/artifact-lint/green/valid-sonnet-model-profile-run.json b/.taste/fixtures/artifact-lint/green/valid-sonnet-model-profile-run.json new file mode 100644 index 0000000..c45aabc --- /dev/null +++ b/.taste/fixtures/artifact-lint/green/valid-sonnet-model-profile-run.json @@ -0,0 +1,67 @@ +{ + "artifact_type": "opusminimax-run", + "run_id": "20260507-150000-sonnet-profile", + "outer_route": "opusworkflow", + "inner_contract": "workflow", + "model_profile": "sonnet", + "executor_provider": "anthropic", + "planner_identity_status": "blocked", + "executor_identity_status": "configured", + "fallback_status": "explicit_user_override", + "provider_profiles": { + "planner": { + "path": ".claude/settings.opusminimax-planner.example.json", + "anthropic_base_url": "", + "model": "claude-sonnet-4-6" + }, + "executor": { + "path": ".claude/settings.opusminimax-planner.example.json", + "anthropic_base_url": "", + "model": "claude-sonnet-4-6", + "provider": "anthropic" + } + }, + "model_ids": { + "planner_requested": "claude-sonnet-4-6", + "executor_requested": "claude-sonnet-4-6" + }, + "model_route": { + "profile": "sonnet", + "planner": { + "provider": "anthropic", + "requested_model": "claude-sonnet-4-6", + "identity_status": "blocked" + }, + "executor": { + "provider": "anthropic", + "requested_model": "claude-sonnet-4-6", + "identity_status": "configured" + }, + "fallback_policy": "fail-closed-unless-explicit" + }, + "capacity": { + "local_ceiling": 10, + "provider_ceiling": 1, + "task_packet_count": 1, + "safety_cap": 1, + "effective_concurrency": 1 + }, + "packets": [ + "P1" + ], + "verification": { + "status": "runtime-pending", + "commands_run": [ + "bash scripts/opusworkflow.sh --task ... --model-profile sonnet" + ], + "closeout_status": "runtime-pending" + }, + "failures": [], + "retries": 0, + "final_confidence": "medium", + "model_identity_confirmed": false, + "claims": { + "opus_planned": false, + "runtime_model_calls": false + } +} diff --git a/AGENTS.md b/AGENTS.md index f4bbadf..79111d6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -12,6 +12,7 @@ This repo is optimized for Claude Code first, but it also ships project-scoped C - For `/deepretaste`, run a governed intent-to-ICP-to-taste bootstrap or retaste workflow, not a generic persona generator. Keep `/deepresearch` available for non-taste research such as architecture, debugging, benchmarks, provider behavior, market work, and product strategy; `/deepretaste` uses it only when findings will shape intent, ICP, or taste-kernel decisions. Detect product scope with evidence, compute an effective parallel or hive budget as a ceiling, define primary/secondary/anti-ICPs, and route fresh missing kernels through `/tastebootstrap`. Existing `taste.md` and `taste.vision` must use `/defineicp` proposal/apply semantics: no mutation by default, exact apply approval, backups, hashes, changed-line trace, protected-kernel preservation, validation, and rollback evidence. - For `/defineicp`, run a governed ICP-to-taste evolution workflow. Define primary, secondary, and anti-ICPs with deepresearch discipline, claim/source ledgers, buyer/user distinction, JTBD, disqualifiers, and missing-evidence labels. Default to proposal mode: draft ICP artifacts and `taste.md` / `taste.vision` patch proposals without mutation. Apply only with explicit approval, backups, hashes, changed-line trace, protected-kernel preservation, validation, and rollback evidence. - For `/opusworkflow`, use the recommended cost-optimized end-to-end route for a Claude subscription plus MiniMax Plus-Highspeed Token Plan. It wraps `/opusminimax --mode workflow`: Claude/Opus is used sparingly for plan/spec freeze, adversarial review, and final ship/no-ship judgment when model identity is proven; MiniMax-M2.7-highspeed handles bulk coding, repair loops, and exploration packets. Default executor concurrency to 1 for Plus-Highspeed unless runtime provider evidence proves a higher safe ceiling. Never claim the $20 Claude plan guarantees Opus, never silently enable PAYG, and never treat local 10-lane capacity as MiniMax provider capacity. +- For explicit Claude model choice, `/opusworkflow` may use `--model-profile minimax|opussonnet|sonnet|opus|default|custom`. Treat this as a governed model request, not runtime proof. The default remains `minimax`; Anthropic-only profiles must not inherit MiniMax base URLs or MiniMax model IDs, and Opus/Sonnet identity still needs `/status`, sentinel, or artifact evidence before claims. - For `/opussonnet`, use the optional Claude-only suggested route installed by `setup.sh --mode opussonnet`. It keeps the same governed workflow but requests Claude Code `opusplan`, pins `claude-opus-4-7` for plan/judgment and `claude-sonnet-4-6` for execution, requires no MiniMax token, and must never be described as the default MiniMax-backed budget strategy. Runtime Opus/Sonnet identity remains account-dependent and must be proven before claims. - For ordinary build/plan tasks in this harness, treat `/opusworkflow` as the default outer route unless the user explicitly requests plain `/workflow`, the provider split is unavailable, or the task is analysis-only. Plain `/workflow` remains the underlying governed lifecycle and manual fallback. - For any mutating specialist route, keep `/opusworkflow` as the default outer route and record the specialist as the inner contract. Examples: use `/opusworkflow "run this as a governed /agentfactory workflow: ..."` for Hermes agents, `/opusworkflow "run this as a /hiveworkflow with role map, blackboard, dissent log, packet ownership, and verification: ..."` for file-changing hive work, and `/opusworkflow --inner-contract defineicp|deepretaste|demo|visualizeworkflow` semantics for specialist mutation. Direct specialist commands are still valid when explicitly invoked, but they inherit the same Claude/Opus planner-reviewer plus MiniMax executor policy by default. diff --git a/CLAUDE.md b/CLAUDE.md index 5fc7104..f6becf8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,7 +29,8 @@ We prioritize getting it right over getting it done fast. Parallel agents only h 17. **Release Governance**: Public harness changes must pass `scripts/release-check.sh --static-only`; authenticated runtime checks stay explicit and secret-gated 18. **OpusMiniMax Split**: `/opusminimax` uses Claude/Opus for bounded planning, adversarial review, and verification while MiniMax-M2.7-highspeed executes bounded packets. Provider identity lives in ignored local profiles, not shared `.claude/settings.json`. 19. **OpusWorkflow Default**: `/opusworkflow` is the cost-optimized daily route over `/opusminimax --mode workflow` and the default for all mutating work: Opus only at judgment gates when proven available, MiniMax for bulk execution, executor concurrency 1 by default for Plus-Highspeed. Mutating specialist routes keep their own contracts as `inner_contract` values under this outer route. -20. **OpusSonnet Option**: `/opussonnet` is an optional Claude-only suggested route for installs created with `setup.sh --mode opussonnet`. It requests Claude Code `opusplan`, pins Opus 4.7 for planning/judgment and Sonnet 4.6 for execution, and requires no MiniMax token. Do not present it as the default MiniMax-backed budget strategy or claim runtime model identity without proof. +20. **Model Profile Freedom**: `/opusworkflow --model-profile minimax|opussonnet|sonnet|opus|default|custom` lets the operator choose Claude model routing without changing the default. Anthropic-only profiles must stay provider-neutral and never claim runtime identity without proof. +21. **OpusSonnet Option**: `/opussonnet` is an optional Claude-only suggested route for installs created with `setup.sh --mode opussonnet`. It requests Claude Code `opusplan`, pins Opus 4.7 for planning/judgment and Sonnet 4.6 for execution, and requires no MiniMax token. Do not present it as the default MiniMax-backed budget strategy or claim runtime model identity without proof. ## Default Behavior @@ -50,6 +51,8 @@ We prioritize getting it right over getting it done fast. Parallel agents only h **When you say `/opusworkflow` or give a normal build/plan task:** run `/opusminimax` in workflow mode with stricter cost policy: use Opus only for plan/spec freeze, adversarial review, and final judgment when identity is proven; use MiniMax-M2.7-highspeed for coding packets and repair loops; keep Plus-Highspeed executor concurrency at 1 unless provider evidence proves more. +**When you specify a model profile:** honor `/opusworkflow --model-profile sonnet|opus|opussonnet|default|custom` as an explicit operator choice while keeping the same SPEC, introspection, verification, no-secret, and runtime-identity-proof rules. + **When you say `/opussonnet`:** run the same governed lifecycle as `/opusworkflow`, but use the optional Claude-only contract: Claude Code `opusplan`, `claude-opus-4-7` for planning/judgment, and `claude-sonnet-4-6` for execution. Do not require a MiniMax token, and do not claim runtime model proof without `/status`, a sentinel, or artifact evidence. **When you request a governed Hermes agent, hive workflow, ICP/taste mutation, visualization continuation, or demo-producing work:** keep `/opusworkflow` as the outer route and apply the specialist as the inner contract. Direct `/agentfactory`, `/hiveworkflow`, `/defineicp`, `/deepretaste`, `/visualizeworkflow --continue`, and `/demo` invocations remain allowed, but they must inherit the same Opus planner-reviewer plus MiniMax executor policy before mutating files. @@ -130,6 +133,7 @@ We prioritize getting it right over getting it done fast. Parallel agents only h - **Security Profiles**: Validate profile examples with `bash scripts/security-smoke.sh`; the committed project default is trusted-local `bypassPermissions`, while `team-safe` remains the shared-work fallback and `solo-fast` documents the fast solo posture. - **OpusMiniMax Profiles**: `.claude/settings.json` is provider-neutral. Use ignored planner/executor local profiles copied from `.claude/settings.opusminimax-planner.example.json` and `.claude/settings.minimax-executor.example.json`; never claim Opus involvement unless runtime identity is proven. - **OpusWorkflow Budget**: `/opusworkflow` is the mutating-work default for the $20 Claude + $40 MiniMax strategy. It must not run Opus as a bulk executor, must not silently use PAYG, and must record `outer_route`, `inner_contract`, `planner_identity_status`, `executor_identity_status`, `fallback_status`, and `provider_ceiling=1` until runtime MiniMax tier evidence proves a higher safe executor budget. +- **Governed Model Profiles**: Explicit `--model-profile sonnet|opus|opussonnet|default|custom` overrides are allowed, but the artifact must record `model_profile`, `model_route`, requested planner/executor models, provider boundaries, and identity status. Static profile selection is not runtime proof. - **OpusSonnet Suggested Profile**: `.claude/settings.opussonnet.example.json` and `.claude/settings.sonnet-executor.example.json` are optional Claude-only profiles. They pin `opusplan`, `claude-opus-4-7`, and `claude-sonnet-4-6`, and must not contain MiniMax base URLs or credentials. - **Opus Runtime Proof**: A Claude subscription login plus exact `OPUSWORKFLOW_AUTH_OK` sentinel from `claude --model claude-opus-4-7` proves the planner side for the current account state. MiniMax executor runtime is a separate proof and must not be implied by the Opus check. - **Release Gate**: `bash scripts/release-check.sh --static-only` runs the no-secret public harness gate. Runtime checks belong to the manual/scheduled workflow. diff --git a/README.md b/README.md index 1b9bd21..37223ea 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,19 @@ Use this as a suggested alternative, not the default budget strategy. Runtime Opus access still depends on your Claude account state; use `/status` or an explicit runtime check before claiming Opus 4.7 actually planned a run. +You can also choose an explicit governed model profile per workflow without +changing the default: + +```bash +bash scripts/opusworkflow.sh --task "build or fix the thing" --model-profile sonnet +bash scripts/opusworkflow.sh --task "build or fix the thing" --model-profile opus +bash scripts/opusworkflow.sh --task "build or fix the thing" --model-profile custom --planner-model claude-sonnet-4-6 --executor-model claude-sonnet-4-6 +``` + +`minimax` remains the default. `opussonnet`, `sonnet`, `opus`, `default`, and +`custom` are explicit operator choices; static artifacts record the request, but +runtime identity still depends on the current Claude Code account/session. + Claude subscription auth is separate account auth. Run `claude auth login` once if this machine is not already logged in; the setup command does not store or fake your Claude subscription session. @@ -407,6 +420,7 @@ For REVCLI/Revis-style products, `/agentfactory` treats Hermes as the role-scope - **solo-fast option:** tracked example of the same trusted-local fast profile for personal repos where you want fewer prompts. - **Team-safe option:** copy [`.claude/settings.team-safe.example.json`](.claude/settings.team-safe.example.json) to your local settings and keep `defaultMode` at `acceptEdits`. - **OpusWorkflow default:** use `/opusworkflow` as the recommended daily mode for all mutating work so Opus is reserved for judgment checkpoints while MiniMax-M2.7-highspeed does bounded execution packets. Specialist mutation still uses its own contract as `inner_contract`. +- **Model profile override:** add `--model-profile sonnet`, `--model-profile opus`, `--model-profile opussonnet`, `--model-profile default`, or `--model-profile custom --planner-model MODEL --executor-model MODEL` when you intentionally want a different Claude model route for that run. - **OpusMiniMax option:** use `/opusminimax` directly when you need benchmark or repair mode, or lower-level packet control. - **OpusSonnet option:** use `/opussonnet` when you want the same governed harness without a MiniMax token, via Claude Code `opusplan` with Opus 4.7 planning and Sonnet 4.6 execution. - If you want even more guardrails, switch your local Claude session to `plan` before high-risk work. @@ -667,7 +681,7 @@ Now you can use any workflow pattern: |-------|-------------| | `/tastebootstrap` | **Fresh-repo bootstrap** — asks the 10 kernel questions and writes `taste.md` + `taste.vision` | | `/workflow` | **Underlying lifecycle and explicit fallback** — drives research → code audit → plan → Agent-Native Estimate → `SPEC.md` → implement → verify → closeout (supervises an efficacy-first agent budget) | -| `/opusworkflow` | **Recommended cost-optimized daily mode** — runs `/opusminimax --mode workflow` with Opus reserved for judgment and MiniMax-M2.7-highspeed as the executor | +| `/opusworkflow` | **Recommended cost-optimized daily mode** — runs `/opusminimax --mode workflow` with Opus reserved for judgment and MiniMax-M2.7-highspeed as the executor; supports explicit `--model-profile` overrides | | `/opusminimax` | **Primary split-execution mode** — Claude/Opus plans, adversarially reviews, and verifies while MiniMax-M2.7-highspeed executes bounded coding packets | | `/opussonnet` | **Optional Claude-only mode** — uses Claude Code `opusplan`, pins Opus 4.7 for planning/judgment and Sonnet 4.6 for execution, no MiniMax token required | | `/visualize` | **Taste-to-artifact comprehension check** — creates ignored visual, diagram, prompt, or narrative artifacts without implementation | @@ -734,6 +748,7 @@ Use this rule of thumb: | --- | --- | --- | | `/opusworkflow` | You want the default for a Claude subscription plus MiniMax Plus-Highspeed across ordinary or specialist mutating work: Opus only at plan/review/ship gates and MiniMax for bulk implementation. | One-command split setup, provider doctor, default executor concurrency 1, bounded packets, `outer_route` + `inner_contract` artifacts, parent verification, and no silent PAYG. | | `/opussonnet` | You want the whole governed harness without MiniMax for a repo, and you are okay spending Claude subscription or extra usage on execution. | `setup.sh --mode opussonnet`, Claude Code `opusplan`, pinned `claude-opus-4-7` + `claude-sonnet-4-6`, no MiniMax base URL, same hooks and workflow gates. | +| `/opusworkflow --model-profile sonnet|opus|default|custom` | You intentionally want Claude Code model freedom for one governed run. | Same workflow artifacts and gates, no MiniMax leakage for Anthropic-only profiles, requested model IDs recorded, and runtime identity claims blocked until proven. | | local `/workflow` | Explicit user override, provider split unavailable, one tight reasoning loop, one shared file, unclear ownership, or coordination would slow the work down. | One supervisor does the governed lifecycle and records why the hybrid outer route was not used. | | `/deepretaste` | You need to detect product intent, define ICPs, and bootstrap or retaste the project kernel from research-backed customer evidence. | `/deepresearch` remains general-purpose; `/deepretaste` uses it only for taste-driving evidence, then routes fresh kernels through `/tastebootstrap` and existing kernels through `/defineicp` proposal/apply semantics. | | `/defineicp` | You need to define the ICP or ICPs and tailor `taste.md` / `taste.vision` to that customer profile. | Deepresearch plan, primary/secondary/anti-ICPs, source and claim ledgers, taste patch proposal, explicit apply approval, backups, hashes, validation, and rollback evidence. | diff --git a/SPEC.md b/SPEC.md index e4a807c..54ecae7 100644 --- a/SPEC.md +++ b/SPEC.md @@ -1,146 +1,211 @@ -# SPEC: Suggested Opus + Sonnet Install Mode +# SPEC: Governed Claude Model Profile Flexibility ## Problem Statement -The harness default should remain `/opusworkflow`: Claude/Opus judgment plus -MiniMax-M2.7-highspeed execution for the user's preferred cost-optimized split. -The operator also wants a clean, suggested Claude-only install path for repos -where MiniMax should not be required: Opus 4.7 plans, adversarially reviews, and -handles judgment, while Sonnet 4.6 performs execution through Claude Code's -native model behavior. +The harness can already represent the cost-optimized `/opusworkflow` default +and the optional `/opussonnet` route, but the current implementation makes model +choice too rigid. Operators should be able to request Claude Code models such as +`claude-opus-4-7`, `claude-sonnet-4-6`, `opus`, `sonnet`, `opusplan`, or a +custom model route without breaking artifacts, smokes, or safety gates. -This must be a first-class suggested install option, not a replacement default -and not a silent model downgrade. +The default must remain conservative and cost-aware. Model freedom must not +weaken provider isolation, secret safety, runtime identity honesty, or +verification requirements. ## Success Criteria -- [x] `setup.sh` supports a non-default `--mode opussonnet` install/update path. -- [x] `opussonnet` installs the same harness files and governed hooks while - preparing ignored local Claude-only profiles. -- [x] The optional profile pins Opus 4.7 and Sonnet 4.6 without MiniMax base URLs - or secrets. -- [x] Existing MiniMax default install commands remain unchanged. -- [x] `/opusworkflow` script/artifacts can represent either the standard MiniMax - executor or the optional Claude/Sonnet executor without confusing the two. -- [x] Static gates prove the default MiniMax route still works and the optional - Sonnet route is lintable. -- [x] README and runtime docs show one clean command for the suggested - Claude-only install path and clearly mark it as optional. -- [x] No `.env`, `.env.*`, `.claude/*.local.json`, key files, or secrets are read - by the assistant or committed. +- [x] `/opusworkflow` accepts a first-class `--model-profile` selector while + preserving the existing default behavior. +- [x] Supported profiles include `minimax`, `opussonnet`, `sonnet`, `opus`, + `default`, and `custom`. +- [x] Existing `--executor-provider minimax|claude-sonnet` compatibility keeps + working for current scripts and docs. +- [x] Anthropic-only profiles never inherit MiniMax base URLs, API key fields, + or MiniMax executor model IDs. +- [x] `artifact-lint` validates model route honesty without requiring every + planner to be Opus. +- [x] Static smokes prove the default MiniMax route, optional Opus+Sonnet route, + all-Sonnet route, and all-Opus route. +- [x] Docs explain exact commands for switching models and state that runtime + identity is account/session dependent until proven with `/status`, sentinel, + or run artifact. +- [x] No `.env`, `.env.*`, `.claude/*.local.json`, key files, or secrets are + read or committed. + +## Scope + +In Scope: + +- Add governed model-profile resolution to the existing OpusWorkflow scripts. +- Extend run artifacts with explicit model profile and role route metadata. +- Update lint/doctor/static smokes for flexible Anthropic model profiles. +- Update README, AGENTS, CLAUDE, skills, fixtures, and generated capability map. + +Out of Scope: + +- Running live Claude model calls in this turn. +- Editing ignored local settings profiles or shell startup files. +- Changing the default `/opusworkflow` MiniMax-backed cost strategy. +- Claiming Opus/Sonnet runtime identity without authenticated runtime proof. ## Research Brief ### Local Evidence -- `setup.sh` already installs clean folders, imports into existing projects, and - creates ignored split profiles for Opus planner plus MiniMax executor. -- `.claude/skills/opusworkflow/SKILL.md` defines the standard budget policy: - Opus only at judgment gates, MiniMax-M2.7-highspeed for bounded coding and - repair packets. -- `scripts/opusworkflow.sh` and `scripts/opusminimax.sh` currently hard-code the - executor assumption to MiniMax, so optional Sonnet execution needs explicit - provider metadata to avoid misleading artifacts. -- `scripts/artifact-lint.sh` currently rejects every `opusminimax-run` whose - executor is not MiniMax, which is correct for the standard path but too narrow - for an explicit Claude-only optional route. +- `scripts/opusworkflow.sh` currently accepts only + `--executor-provider minimax|claude-sonnet`. +- `scripts/opusminimax.sh` defaults the planner to `claude-opus-4-7` and rejects + executor providers outside `minimax|claude-sonnet`. +- `scripts/artifact-lint.sh` rejects any `opusminimax-run` whose planner model + does not contain `opus`. +- `scripts/opusworkflow-smoke.sh` proves the current MiniMax default and + optional `claude-sonnet` route but has no all-Sonnet or all-Opus route. ### Current Product Evidence -- Claude Code model configuration docs say the `opusplan` alias uses Opus during - plan mode and Sonnet during execution mode. -- The same docs say Anthropic API aliases currently resolve `opus` to Opus 4.7 - and `sonnet` to Sonnet 4.6, and model environment variables can pin alias - resolution. -- The Claude Help Center lists `claude-opus-4-7` and `claude-sonnet-4-6` as +- Claude Code model configuration docs say users can switch models with + `/model`, `claude --model`, `ANTHROPIC_MODEL`, or the `model` settings field. +- Claude Code docs define `opusplan` as Opus in plan mode and Sonnet in + execution mode. +- Claude Code docs say `ANTHROPIC_DEFAULT_OPUS_MODEL`, + `ANTHROPIC_DEFAULT_SONNET_MODEL`, and `CLAUDE_CODE_SUBAGENT_MODEL` control + alias and subagent model routing. +- Claude Code docs say command-line settings override local/project/user + settings, while managed settings remain highest priority. +- Claude Help Center lists `claude-opus-4-7` and `claude-sonnet-4-6` as supported Claude Code model identifiers. ### Source Ledger -- Claude Code model configuration: +- Claude Code model configuration, accessed 2026-05-07: https://code.claude.com/docs/en/model-config -- Claude Help Center model configuration: +- Claude Code CLI reference, accessed 2026-05-07: + https://code.claude.com/docs/en/cli-reference +- Claude Code settings, accessed 2026-05-07: + https://code.claude.com/docs/en/settings +- Claude Help Center model configuration, accessed 2026-05-07: https://support.claude.com/en/articles/11940350-claude-code-model-configuration -## Plan - -1. Add committed `opussonnet`/Sonnet example profiles with no MiniMax endpoint, - no credentials, explicit secret denies, governance hooks, and pinned model - env vars. -2. Extend `setup.sh` and `setup.ps1` with `--mode opussonnet`, keeping default - `opusworkflow` untouched. -3. Extend `scripts/opusworkflow.sh`, `scripts/opusminimax.sh`, and - `scripts/artifact-lint.sh` with explicit `executor_provider` support for - `minimax` and `claude-sonnet`. -4. Extend static doctor/security/smoke tests to validate the optional profile - while preserving MiniMax as the standard route. -5. Update README, AGENTS/CLAUDE guidance, runtime quickstart, and regenerate the - harness capability map. -6. Run static acceptance gates and archive this SPEC after verified closeout. - ## Agent-Native Estimate - Estimate type: agent-native. - Capacity evidence: `bash scripts/parallel-capacity.sh --json` reported - `recommended_ceiling=10`, `codex_max_threads=10`, `hardware_class=workstation` - on 2026-05-07. -- Effective parallel budget: 1 implementation lane. The change is coupled across - installer, profile examples, artifact validation, docs, and static gates. + `codex_max_threads=10`, `recommended_ceiling=10`, `hardware_class=workstation`, + `cores=16`, `ram_gb=32`, and `agent_teams_available=false` on 2026-05-07. +- Effective parallel budget: 1 implementation lane. The work touches coupled + shell scripts, lint rules, fixtures, and docs, so parallel editing would + create merge friction. - Agent wall-clock: 60-120 minutes. -- Agent-hours: 1.5-3. -- Human touch time: none for static implementation; runtime account access - remains operator-dependent. +- Agent-hours: 1.5-3.0. +- Human touch time: none for static implementation. Runtime model proof remains + account/session dependent. - Calendar blockers: none for static release. -- Confidence: medium. Claude Code model availability is account-dependent, so - static checks can prove configuration but not live Opus/Sonnet access. +- Confidence: medium. Static gates can prove routing and safety invariants, not + live model availability for the operator's Claude account. + +## Implementation Plan + +### Task 1: Add model profile resolution + +Definition of Done: + +- [x] `scripts/opusworkflow.sh` exposes `--model-profile`. +- [x] `scripts/opusminimax.sh` resolves model profiles to planner/executor + provider and model IDs. +- [x] Backward-compatible `--executor-provider claude-sonnet` still maps to the + Opus+Sonnet route. + +### Task 2: Relax lint while preserving honesty + +Definition of Done: + +- [x] `scripts/artifact-lint.sh` accepts Anthropic model routes without MiniMax + base URLs. +- [x] Lint still rejects MiniMax leakage into Anthropic profiles. +- [x] Lint still rejects Opus runtime claims without model identity proof. + +### Task 3: Update smokes and docs + +Definition of Done: + +- [x] `scripts/opusworkflow-smoke.sh` validates default, Opus+Sonnet, all-Sonnet, + and all-Opus static artifacts. +- [x] Fixture coverage includes at least one Anthropic flexible model route. +- [x] README, AGENTS, CLAUDE, and route skills document the selector. +- [x] Capability map is regenerated. + +## Verification + +- `bash -n scripts/opusworkflow.sh scripts/opusminimax.sh scripts/opusminimax-doctor.sh scripts/artifact-lint.sh scripts/opusworkflow-smoke.sh` +- `python3 -m json.tool` on changed JSON fixtures and settings examples. +- `bash scripts/opusworkflow-smoke.sh` +- `bash scripts/artifact-lint.sh --fixtures` +- `bash scripts/security-smoke.sh` +- `bash scripts/harness-capability-map.sh --write` +- `bash scripts/harness-capability-map.sh --check --json` +- `bash scripts/harness-eval.sh --json` +- `env HARNESS_STATIC_CI=1 bash scripts/test-harness.sh` +- `bash scripts/release-check.sh --static-only` +- `git diff --check` + +## Rollback Plan + +1. Revert this commit. +2. Regenerate the capability map if needed. +3. Verify rollback with `bash scripts/opusworkflow-smoke.sh` and + `bash scripts/release-check.sh --static-only`. ## Introspection: Pre-Implementation -- Likely mistake: making `opussonnet` look like the new default. Mitigation: - docs and setup output must say it is suggested/optional; default commands stay - MiniMax-backed `/opusworkflow`. -- Likely mistake: using Sonnet artifacts that still say MiniMax executed. - Mitigation: add explicit `executor_provider` and provider-specific validation. -- Likely mistake: writing secrets or reading local ignored profiles during this - implementation. Mitigation: create committed examples only; tests must not - inspect local credential files. -- Likely mistake: overclaiming runtime proof. Mitigation: all static docs say - model identity still requires `claude auth login`, `/status`, or explicit - runtime checks. +- Likely mistake: weakening the MiniMax/Anthropic boundary. Mitigation: lint + must still reject MiniMax URLs or MiniMax model IDs in Anthropic profiles. +- Likely mistake: making all-Opus look like the new default. Mitigation: + default remains `minimax`; docs label Opus-only as explicit and expensive. +- Likely mistake: treating static profile selection as runtime proof. + Mitigation: artifacts keep identity status `blocked` or `runtime-pending` + until `/status`, sentinel, or equivalent run evidence proves the model. +- Likely mistake: breaking existing `--executor-provider claude-sonnet` users. + Mitigation: keep it as a backward-compatible alias for `opussonnet`. ## Verified 2026-05-07 -- `bash -n setup.sh scripts/opusminimax.sh scripts/opusworkflow.sh scripts/opussonnetworkflow.sh scripts/opusminimax-doctor.sh scripts/opusworkflow-smoke.sh scripts/security-smoke.sh scripts/test-harness.sh scripts/artifact-lint.sh scripts/harness-capability-map.sh`: pass. -- `python3 -m json.tool` on the new OpusSonnet profiles, updated schema, and - green artifact fixture: pass. -- `env -u MINIMAX_TOKEN_KEY -u TOKEN_KEY bash setup.sh --help`: pass; help shows - the optional `--mode opussonnet` command without executing setup. -- `bash scripts/opusminimax-doctor.sh --static --executor-provider claude-sonnet`: - exits 0 with only existing tracked fixture/test placeholder warnings. -- `bash scripts/opusworkflow-smoke.sh`: pass; validates default MiniMax and - optional `claude-sonnet` artifacts. -- `bash scripts/artifact-lint.sh --fixtures`: pass (`8 green`, `22 red`). +- `bash -n scripts/opusworkflow.sh scripts/opusminimax.sh scripts/opusminimax-doctor.sh scripts/artifact-lint.sh scripts/opusworkflow-smoke.sh scripts/opussonnetworkflow.sh scripts/test-harness.sh`: pass. +- `python3 -m json.tool schemas/opusminimax-run.schema.json` and + `.taste/fixtures/artifact-lint/green/valid-sonnet-model-profile-run.json`: + pass. +- `bash scripts/opusworkflow-smoke.sh`: pass; validates default MiniMax, + backward-compatible Opus+Sonnet, all-Sonnet, and all-Opus static artifacts. +- `bash scripts/artifact-lint.sh --fixtures`: pass (`9 green`, `22 red`). +- `bash scripts/opusminimax-doctor.sh --static --model-profile minimax --executor-provider minimax --json`: pass with existing tracked test/fixture secret-string warning. +- `bash scripts/opusminimax-doctor.sh --static --model-profile opussonnet --executor-provider claude-sonnet --json`: pass with existing tracked test/fixture secret-string warning. +- `bash scripts/opusminimax-doctor.sh --static --model-profile sonnet --executor-provider anthropic --json`: pass with existing tracked test/fixture secret-string warning. +- `bash scripts/opusminimax-doctor.sh --static --model-profile opus --executor-provider anthropic --json`: pass with existing tracked test/fixture secret-string warning. +- `bash scripts/opusworkflow.sh --task "default profile smoke" --model-profile default --run-id manual-profile-default` plus artifact lint: pass; runtime not executed. +- `bash scripts/opusworkflow.sh --task "custom profile smoke" --model-profile custom --planner-model claude-sonnet-4-6 --executor-model claude-sonnet-4-6 --run-id manual-profile-custom` plus artifact lint: pass; runtime not executed. +- `bash scripts/opusworkflow.sh --task "bad profile smoke" --model-profile sonnet --executor-provider minimax --run-id manual-profile-bad`: correctly exits 2. - `bash scripts/security-smoke.sh`: pass. -- `bash scripts/opussonnetworkflow.sh --task "manual optional route check" --run-id manual-opussonnet-check` plus artifact lint: pass; runtime not executed. -- `bash scripts/harness-capability-map.sh --write` and `--check`: pass. +- `bash scripts/harness-capability-map.sh --write` and + `bash scripts/harness-capability-map.sh --check --json`: pass. - `bash scripts/harness-eval.sh --json`: pass (`22 tasks`, `19 gates`, `0 mismatches`). -- `bash scripts/metacognition-scorecard.sh --fixtures --json`: pass (`7 green`, - `11 red`). - `env HARNESS_STATIC_CI=1 bash scripts/test-harness.sh`: pass (`141 passed`, - `0 failed`). + `0 failed`; workflow runtime smoke intentionally skipped). - `bash scripts/release-check.sh --static-only`: pass. - `git diff --check`: pass. ## Introspection: Pre-Closeout -- Likely mistake checked: `opussonnet` could appear to replace the MiniMax - default. The README, AGENTS, CLAUDE, skill text, and setup output all label it - as optional/suggested, while default commands still use MiniMax-backed - `/opusworkflow`. -- Likely mistake checked: Sonnet artifacts could imply MiniMax execution. The - run artifact now carries `executor_provider`, and artifact lint validates - MiniMax and Claude/Sonnet providers differently. -- Remaining risk: static gates prove profile shape and artifact honesty, not - live account model access. Runtime identity still requires authenticated - Claude Code checks. +- Likely mistake checked: this could make all-Opus look like the new default. + The default remains `model_profile=minimax`; `opus` is explicit and documented + as a high-cost route. +- Likely mistake checked: Anthropic profiles could leak MiniMax provider state. + `artifact-lint` rejects MiniMax base URLs and MiniMax executor model IDs in + Anthropic routes, and the smoke covers Sonnet/Opus profiles. +- Likely mistake checked: static model selection could be overclaimed as runtime + model proof. Artifacts keep `model_identity_confirmed=false`, + `planner_identity_status=blocked`, and `verification.status=runtime-pending` + until `/status`, sentinel, or equivalent runtime evidence proves identity. +- Remaining risk: live account availability and usage thresholds can still make + Claude Code fall back or block at runtime. This implementation proves the + harness no longer breaks statically when the operator requests a different + governed model route. diff --git a/docs/harness-capability-map.json b/docs/harness-capability-map.json index 4534a36..eb790dd 100644 --- a/docs/harness-capability-map.json +++ b/docs/harness-capability-map.json @@ -379,12 +379,12 @@ "sha256": "4dcbd22f5493e69884e2918455ca1aa0964bd31490681ef49d80e9c5a4069742" }, { - "line_count": 662, + "line_count": 697, "name": "artifact-lint", "path": "scripts/artifact-lint.sh", "purpose": "machine sidecar validator", "required_gate": true, - "sha256": "958bef1364bfe3e83ecd0d10387952e883a28ca2e539d939a65c3e20be73ea20" + "sha256": "a40ca55cf76a2ed9fdb5b94ce4e2d389012d4e303eac16a3abb1488a02d0a056" }, { "line_count": 189, @@ -603,44 +603,44 @@ "sha256": "814fdc31e20687d24b8efbb2b44d67a3ec932aaef5be9a730d791c66bfc2ff03" }, { - "line_count": 501, + "line_count": 525, "name": "opusminimax-doctor", "path": "scripts/opusminimax-doctor.sh", "purpose": "OpusMiniMax provider split doctor", "required_gate": true, - "sha256": "87ce355b7ecd84ddb8b1719da40f5540509d8f045149182700f52e72b5a9432a" + "sha256": "1601190d4e749ffb37ddf657c62e4cdb4f6c7ac384e4ff26095bc45d3af3ba9d" }, { - "line_count": 292, + "line_count": 400, "name": "opusminimax", "path": "scripts/opusminimax.sh", "purpose": "Prepare or explicitly launch the /opusminimax planner workflow.", "required_gate": false, - "sha256": "1e358ad090f810a2eed1d0167c7274a0406fc07eeffbf059167cd81161d3b1de" + "sha256": "6e26c3071ea92f85e7b1eb55fa3608531d7967cb5fd065e40242a0274211b3be" }, { - "line_count": 29, + "line_count": 30, "name": "opussonnetworkflow", "path": "scripts/opussonnetworkflow.sh", "purpose": "optional Claude-only Opus plus Sonnet workflow wrapper", "required_gate": true, - "sha256": "70be9fb377e88766cff9656f348815f31461ad054de52682582d94c161731109" + "sha256": "199a1e495da27067db293bda488b10b8938968565b2d96646380d88c187728c4" }, { - "line_count": 170, + "line_count": 208, "name": "opusworkflow-smoke", "path": "scripts/opusworkflow-smoke.sh", "purpose": "OpusWorkflow cost-optimized route gate", "required_gate": true, - "sha256": "a0c7e5fa48146cded6be1a20a0b7475f7a16f6bb2905f7a2796a2b8ce5786b0a" + "sha256": "dd1c96b147fd8fe84e0cbedc13720a77f4f90d237d5d35d37c9ba9237462b4ab" }, { - "line_count": 75, + "line_count": 139, "name": "opusworkflow", "path": "scripts/opusworkflow.sh", "purpose": "Prepare or explicitly launch the cost-optimized /opusworkflow route.", "required_gate": false, - "sha256": "8ed5b4829d3a45290e9a47f84e225066d264f42485c5bea85f665ab5df23251f" + "sha256": "543e1a86fb6452ab740d5074cfbeaef5bbad4d039b3cd30b2154c3a2f3a86c4d" }, { "line_count": 113, @@ -771,12 +771,12 @@ "sha256": "18c827b8746916ccd198045f0e8c6e4cd30ca714ca2b1c0dc05f6f47b6f3d263" }, { - "line_count": 1908, + "line_count": 1912, "name": "test-harness", "path": "scripts/test-harness.sh", "purpose": "full local harness regression suite", "required_gate": true, - "sha256": "623a3789d0381824ae49d0ecc4797f1dceef665083e98f9b3402da880bdf980b" + "sha256": "d3868be503ca8e971431a7ab51755dd0fdb2f360df1add6192f284b471a11261" }, { "line_count": 127, @@ -1389,7 +1389,7 @@ "core_route": true, "description": "Run the Opus planner plus MiniMax-M2.7-highspeed executor workflow. Use when the user invokes /opusminimax or wants Claude/Opus to plan, adversarially review, and verify while MiniMax executes bounded coding packets.", "group": "execution", - "line_count": 252, + "line_count": 265, "model_invocation": false, "name": "opusminimax", "path": ".claude/skills/opusminimax/SKILL.md", @@ -1403,7 +1403,7 @@ "scripts/opusminimax-benchmark-smoke.sh", "scripts/opusminimax-doctor.sh" ], - "sha256": "be79426f8bc821bc0bd178df1ed49c217da0707a1b438657680dd375ab3b2018", + "sha256": "57caa96ea0e7720f4793e1f8e5b29b51cc38e0eb8d8a97c175c6211019a3c43b", "slash": "/opusminimax", "user_invocable": true }, @@ -1412,7 +1412,7 @@ "core_route": false, "description": "Run the optional Claude-only Opus 4.7 planner plus Sonnet 4.6 executor workflow. Use when the user invokes /opussonnet or installed with --mode opussonnet and wants the harness without MiniMax.", "group": "support", - "line_count": 84, + "line_count": 92, "model_invocation": false, "name": "opussonnet", "path": ".claude/skills/opussonnet/SKILL.md", @@ -1421,7 +1421,7 @@ "related_scripts": [ "scripts/opussonnetworkflow.sh" ], - "sha256": "59c7bcb4e81e4e3e47dfe650c8ced30bb94e23eed2da72a81b7d94f596cfc4c1", + "sha256": "9b37dd45d00296ef878453c122e15ff6c0da5c632932e712e75c6d00944a31a6", "slash": "/opussonnet", "user_invocable": true }, @@ -1430,7 +1430,7 @@ "core_route": true, "description": "Run the cost-optimized Opus planner plus MiniMax-M2.7-highspeed executor workflow end to end. Use when the user invokes /opusworkflow or wants the recommended daily mode for a Claude subscription plus MiniMax Plus-Highspeed Token Plan.", "group": "execution", - "line_count": 164, + "line_count": 194, "model_invocation": false, "name": "opusworkflow", "path": ".claude/skills/opusworkflow/SKILL.md", @@ -1445,7 +1445,7 @@ "scripts/opusworkflow-smoke.sh", "scripts/opussonnetworkflow.sh" ], - "sha256": "c0fb5110fe22f126df178fbd3ff44ba6f7539e649ef90061342c40cdb086c765", + "sha256": "48dc32a99e617bc9f61911fe61b9ec8c63c2361d9c64ef0f3a48413ae4a00602", "slash": "/opusworkflow", "user_invocable": true }, diff --git a/schemas/opusminimax-run.schema.json b/schemas/opusminimax-run.schema.json index 750f265..3acdd24 100644 --- a/schemas/opusminimax-run.schema.json +++ b/schemas/opusminimax-run.schema.json @@ -24,7 +24,10 @@ "minLength": 1 }, "executor_provider": { - "enum": ["minimax", "claude-sonnet"] + "enum": ["minimax", "claude-sonnet", "anthropic"] + }, + "model_profile": { + "enum": ["minimax", "opussonnet", "sonnet", "opus", "default", "custom"] }, "provider_profiles": { "type": "object", @@ -34,6 +37,9 @@ "type": "object", "required": ["planner_requested", "executor_requested"] }, + "model_route": { + "type": "object" + }, "capacity": { "type": "object", "required": ["local_ceiling", "provider_ceiling", "task_packet_count", "safety_cap", "effective_concurrency"] diff --git a/scripts/artifact-lint.sh b/scripts/artifact-lint.sh index 6d8e575..bcc50d2 100755 --- a/scripts/artifact-lint.sh +++ b/scripts/artifact-lint.sh @@ -360,7 +360,9 @@ def validate_opusminimax_run(data: dict[str, Any], errors: list[str]) -> None: executor = profiles.get("executor") if isinstance(profiles, dict) else {} planner_blob = profile_blob(planner).lower() executor_blob = profile_blob(executor) + executor_blob_lower = executor_blob.lower() executor_provider = str(data.get("executor_provider") or "minimax").strip() + model_profile = str(data.get("model_profile") or "").strip() if not planner: error(errors, "opusminimax-run missing planner profile") @@ -368,7 +370,9 @@ def validate_opusminimax_run(data: dict[str, Any], errors: list[str]) -> None: error(errors, "opusminimax-run missing executor profile") if "api.minimax.io/anthropic" in planner_blob or "minimax-m2.7-highspeed" in planner_blob: error(errors, "opusminimax-run planner profile must not route through MiniMax") - if executor_provider not in {"minimax", "claude-sonnet"}: + if model_profile and model_profile not in {"minimax", "opussonnet", "sonnet", "opus", "default", "custom"}: + error(errors, "opusminimax-run model_profile is unsupported") + if executor_provider not in {"minimax", "claude-sonnet", "anthropic"}: error(errors, "opusminimax-run executor_provider is unsupported") if executor_provider == "minimax": if "https://api.minimax.io/anthropic" not in executor_blob: @@ -376,20 +380,51 @@ def validate_opusminimax_run(data: dict[str, Any], errors: list[str]) -> None: if "MiniMax-M2.7-highspeed" not in executor_blob: error(errors, "opusminimax-run executor profile must request MiniMax-M2.7-highspeed") elif executor_provider == "claude-sonnet": - if "api.minimax.io/anthropic" in executor_blob.lower() or "minimax-m2.7-highspeed" in executor_blob.lower(): + if "api.minimax.io/anthropic" in executor_blob_lower or "minimax-m2.7-highspeed" in executor_blob_lower: error(errors, "opusminimax-run Claude Sonnet executor profile must not route through MiniMax") - if "sonnet" not in executor_blob.lower(): + if "sonnet" not in executor_blob_lower: error(errors, "opusminimax-run Claude Sonnet executor profile must request Sonnet") + elif executor_provider == "anthropic": + if "api.minimax.io/anthropic" in executor_blob_lower or "minimax-m2.7-highspeed" in executor_blob_lower: + error(errors, "opusminimax-run Anthropic executor profile must not route through MiniMax") model_ids = data.get("model_ids") if isinstance(data.get("model_ids"), dict) else {} planner_model = str(model_ids.get("planner_requested", "")).lower() executor_model = str(model_ids.get("executor_requested", "")) - if planner_model and "opus" not in planner_model: - error(errors, "opusminimax-run planner_requested must be an Opus model or alias") + executor_model_lower = executor_model.lower() + if not model_profile: + if planner_model and "opus" not in planner_model: + error(errors, "opusminimax-run planner_requested must be an Opus model or alias") + elif model_profile == "minimax": + if "opus" not in planner_model: + error(errors, "opusminimax-run minimax profile planner_requested must be Opus") + elif model_profile == "opussonnet": + if "opus" not in planner_model: + error(errors, "opusminimax-run opussonnet profile planner_requested must be Opus") + if "sonnet" not in executor_model_lower: + error(errors, "opusminimax-run opussonnet profile executor_requested must be Sonnet") + elif model_profile == "sonnet": + if "sonnet" not in planner_model: + error(errors, "opusminimax-run sonnet profile planner_requested must be Sonnet") + if "sonnet" not in executor_model_lower: + error(errors, "opusminimax-run sonnet profile executor_requested must be Sonnet") + elif model_profile == "opus": + if "opus" not in planner_model: + error(errors, "opusminimax-run opus profile planner_requested must be Opus") + if "opus" not in executor_model_lower: + error(errors, "opusminimax-run opus profile executor_requested must be Opus") + elif model_profile == "default": + if planner_model != "default" or executor_model_lower != "default": + error(errors, "opusminimax-run default profile must request default models") + elif model_profile == "custom": + if not planner_model or not executor_model: + error(errors, "opusminimax-run custom profile requires planner and executor models") if executor_provider == "minimax" and executor_model != "MiniMax-M2.7-highspeed": error(errors, "opusminimax-run executor_requested must be MiniMax-M2.7-highspeed") - if executor_provider == "claude-sonnet" and "sonnet" not in executor_model.lower(): + if executor_provider == "claude-sonnet" and "sonnet" not in executor_model_lower: error(errors, "opusminimax-run executor_requested must be a Sonnet model for claude-sonnet provider") + if executor_provider == "anthropic" and "minimax" in executor_model_lower: + error(errors, "opusminimax-run Anthropic executor_requested must not be MiniMax") capacity = data.get("capacity") if isinstance(data.get("capacity"), dict) else {} effective = capacity.get("effective_concurrency") diff --git a/scripts/opusminimax-doctor.sh b/scripts/opusminimax-doctor.sh index 2681b5e..ff7b649 100755 --- a/scripts/opusminimax-doctor.sh +++ b/scripts/opusminimax-doctor.sh @@ -8,6 +8,7 @@ MODE="static" JSON_ONLY=0 FIX_LOCAL_PROFILES=0 EXECUTOR_PROVIDER="minimax" +MODEL_PROFILE="minimax" usage() { cat >&2 <<'EOF' @@ -16,7 +17,8 @@ Usage: bash scripts/opusminimax-doctor.sh --runtime [--fix-local-profiles] [--json] --static is no-secret and does not run provider model calls. ---executor-provider minimax|claude-sonnet selects the executor profile contract. +--model-profile minimax|opussonnet|sonnet|opus|default|custom selects the governed route. +--executor-provider minimax|claude-sonnet|anthropic selects the executor profile contract. --runtime may inspect local Claude auth/version state, but still never prints secrets. --fix-local-profiles repairs ignored planner/executor local profile structure without printing credentials or reading .env files. @@ -45,6 +47,10 @@ while [ "$#" -gt 0 ]; do EXECUTOR_PROVIDER="${2:-}" shift 2 ;; + "--model-profile") + MODEL_PROFILE="${2:-}" + shift 2 + ;; "-h"|"--help") usage exit 0 @@ -56,12 +62,22 @@ while [ "$#" -gt 0 ]; do esac done +case "$MODEL_PROFILE" in + minimax|opussonnet|sonnet|opus|default|custom) ;; + *) usage; exit 2 ;; +esac + case "$EXECUTOR_PROVIDER" in - minimax|claude-sonnet) ;; + minimax|claude-sonnet|anthropic) ;; *) usage; exit 2 ;; esac -python3 - "$ROOT_DIR" "$MODE" "$JSON_ONLY" "$FIX_LOCAL_PROFILES" "$EXECUTOR_PROVIDER" <<'PY' +case "$MODEL_PROFILE:$EXECUTOR_PROVIDER" in + minimax:minimax|opussonnet:claude-sonnet|sonnet:anthropic|opus:anthropic|default:anthropic|custom:anthropic) ;; + *) usage; exit 2 ;; +esac + +python3 - "$ROOT_DIR" "$MODE" "$JSON_ONLY" "$FIX_LOCAL_PROFILES" "$EXECUTOR_PROVIDER" "$MODEL_PROFILE" <<'PY' import json import os import pathlib @@ -76,6 +92,7 @@ MODE = sys.argv[2] JSON_ONLY = sys.argv[3] == "1" FIX_LOCAL_PROFILES = sys.argv[4] == "1" EXECUTOR_PROVIDER = sys.argv[5] +MODEL_PROFILE = sys.argv[6] PROJECT = ROOT / ".claude" / "settings.json" PLANNER = ROOT / ".claude" / "settings.opusminimax-planner.example.json" @@ -274,6 +291,10 @@ def repair_local_profiles( add(checks, "opussonnet local profile repaired", True, "updated ignored local profile" if rel(OPUSSONNET_LOCAL) in changed else "already safe") return + if EXECUTOR_PROVIDER == "anthropic": + add(checks, "anthropic executor local profile repair", True, "no MiniMax executor profile needed") + return + executor_local, executor_state = read_json_quiet(EXECUTOR_LOCAL) if executor_state == "invalid": add(checks, "executor local profile repair", False, "invalid JSON; fix or remove .claude/settings.minimax-executor.local.json") @@ -431,6 +452,8 @@ if MODE == "runtime": add(checks, "sonnet executor local profile exists", local_sonnet_state == "ok", "run --runtime --fix-local-profiles --executor-provider claude-sonnet" if local_sonnet_state != "ok" else "") add(checks, "sonnet executor local profile has no MiniMax base URL", "ANTHROPIC_BASE_URL" not in local_sonnet_env and "minimax" not in json.dumps(local_sonnet_env, sort_keys=True).lower()) add(checks, "sonnet executor local profile requests Sonnet 4.6", "claude-sonnet-4-6" in json.dumps(local_sonnet_env, sort_keys=True)) + elif EXECUTOR_PROVIDER == "anthropic": + add(checks, "anthropic executor profile uses Claude account route", True, f"model_profile={MODEL_PROFILE}") else: local_executor, local_executor_state = read_json_quiet(EXECUTOR_LOCAL) local_executor_env = env(local_executor) @@ -476,6 +499,7 @@ payload = { "artifact_type": "opusminimax-doctor-result", "mode": MODE, "executor_provider": EXECUTOR_PROVIDER, + "model_profile": MODEL_PROFILE, "status": status, "checks": checks, "runtime_model_calls": False, diff --git a/scripts/opusminimax.sh b/scripts/opusminimax.sh index 28ef633..353f769 100755 --- a/scripts/opusminimax.sh +++ b/scripts/opusminimax.sh @@ -11,18 +11,30 @@ INNER_CONTRACT="workflow" RUN_ID="" EXECUTE_PLANNER=0 PLANNER_SETTINGS="${CLAUDE_PLANNER_SETTINGS_PATH:-$ROOT_DIR/.claude/settings.opusminimax-planner.local.json}" -PLANNER_MODEL="${OPUSMINIMAX_PLANNER_MODEL:-claude-opus-4-7}" +PLANNER_MODEL="${OPUSMINIMAX_PLANNER_MODEL:-}" EXECUTOR_PROVIDER="${OPUSMINIMAX_EXECUTOR_PROVIDER:-minimax}" EXECUTOR_MODEL="${OPUSMINIMAX_EXECUTOR_MODEL:-}" +MODEL_PROFILE="${OPUSMINIMAX_MODEL_PROFILE:-}" +PLANNER_MODEL_SET=0 +EXECUTOR_MODEL_SET=0 +EXECUTOR_PROVIDER_SET=0 usage() { cat >&2 <<'EOF' Usage: - bash scripts/opusminimax.sh --task "..." [--mode workflow|benchmark|repair] [--outer-route ROUTE] [--inner-contract CONTRACT] [--executor-provider minimax|claude-sonnet] [--execute-planner] [--planner-settings PATH] + bash scripts/opusminimax.sh --task "..." [--mode workflow|benchmark|repair] [--outer-route ROUTE] [--inner-contract CONTRACT] [--model-profile minimax|opussonnet|sonnet|opus|default|custom] [--executor-provider minimax|claude-sonnet|anthropic] [--execute-planner] [--planner-settings PATH] Default behavior prepares no-secret run artifacts and prints the next command. --execute-planner is the explicit Claude runtime opt-in. +Model profiles are governed routing presets, not runtime identity proof: + minimax Opus judgment + MiniMax execution (default) + opussonnet Opus judgment + Sonnet execution, no MiniMax token + sonnet Sonnet planning + Sonnet execution + opus Opus planning + Opus execution + default Claude Code account default planning + execution + custom Explicit --planner-model and --executor-model values + CONTRACT may be workflow, agentfactory, hiveworkflow, parallel, defineicp, deepretaste, demo, or visualizeworkflow. EOF @@ -56,14 +68,21 @@ while [ "$#" -gt 0 ]; do ;; "--planner-model") PLANNER_MODEL="${2:-}" + PLANNER_MODEL_SET=1 shift 2 ;; "--executor-model") EXECUTOR_MODEL="${2:-}" + EXECUTOR_MODEL_SET=1 shift 2 ;; "--executor-provider") EXECUTOR_PROVIDER="${2:-}" + EXECUTOR_PROVIDER_SET=1 + shift 2 + ;; + "--model-profile") + MODEL_PROFILE="${2:-}" shift 2 ;; "--execute-planner") @@ -94,16 +113,78 @@ case "$INNER_CONTRACT" in workflow|agentfactory|hiveworkflow|parallel|defineicp|deepretaste|demo|visualizeworkflow) ;; *) echo "[opusminimax] invalid inner contract: $INNER_CONTRACT" >&2; exit 2 ;; esac + +if [ -z "$MODEL_PROFILE" ]; then + case "$EXECUTOR_PROVIDER" in + minimax) MODEL_PROFILE="minimax" ;; + claude-sonnet) MODEL_PROFILE="opussonnet" ;; + anthropic) MODEL_PROFILE="custom" ;; + *) MODEL_PROFILE="minimax" ;; + esac +fi + +case "$MODEL_PROFILE" in + minimax|opussonnet|sonnet|opus|default|custom) ;; + *) echo "[opusminimax] invalid model profile: $MODEL_PROFILE" >&2; exit 2 ;; +esac + +if [ "$EXECUTOR_PROVIDER_SET" -eq 0 ]; then + case "$MODEL_PROFILE" in + minimax) EXECUTOR_PROVIDER="minimax" ;; + opussonnet) EXECUTOR_PROVIDER="claude-sonnet" ;; + sonnet|opus|default|custom) EXECUTOR_PROVIDER="anthropic" ;; + esac +fi + case "$EXECUTOR_PROVIDER" in - minimax|claude-sonnet) ;; + minimax|claude-sonnet|anthropic) ;; *) echo "[opusminimax] invalid executor provider: $EXECUTOR_PROVIDER" >&2; exit 2 ;; esac -if [ -z "$EXECUTOR_MODEL" ]; then - if [ "$EXECUTOR_PROVIDER" = "claude-sonnet" ]; then - EXECUTOR_MODEL="claude-sonnet-4-6" - else - EXECUTOR_MODEL="MiniMax-M2.7-highspeed" - fi + +case "$MODEL_PROFILE:$EXECUTOR_PROVIDER" in + minimax:minimax|opussonnet:claude-sonnet|sonnet:anthropic|opus:anthropic|default:anthropic|custom:anthropic) ;; + *) + echo "[opusminimax] model profile '$MODEL_PROFILE' conflicts with executor provider '$EXECUTOR_PROVIDER'" >&2 + exit 2 + ;; +esac + +case "$MODEL_PROFILE" in + minimax) + [ -n "$PLANNER_MODEL" ] || PLANNER_MODEL="claude-opus-4-7" + [ -n "$EXECUTOR_MODEL" ] || EXECUTOR_MODEL="MiniMax-M2.7-highspeed" + ;; + opussonnet) + [ -n "$PLANNER_MODEL" ] || PLANNER_MODEL="claude-opus-4-7" + [ -n "$EXECUTOR_MODEL" ] || EXECUTOR_MODEL="claude-sonnet-4-6" + ;; + sonnet) + [ -n "$PLANNER_MODEL" ] || PLANNER_MODEL="claude-sonnet-4-6" + [ -n "$EXECUTOR_MODEL" ] || EXECUTOR_MODEL="claude-sonnet-4-6" + ;; + opus) + [ -n "$PLANNER_MODEL" ] || PLANNER_MODEL="claude-opus-4-7" + [ -n "$EXECUTOR_MODEL" ] || EXECUTOR_MODEL="claude-opus-4-7" + ;; + default) + [ -n "$PLANNER_MODEL" ] || PLANNER_MODEL="default" + [ -n "$EXECUTOR_MODEL" ] || EXECUTOR_MODEL="default" + ;; + custom) + if [ -z "$PLANNER_MODEL" ] || [ -z "$EXECUTOR_MODEL" ]; then + echo "[opusminimax] --model-profile custom requires --planner-model and --executor-model or OPUSMINIMAX_* model env vars" >&2 + exit 2 + fi + ;; +esac + +if [ "$EXECUTOR_PROVIDER" = "minimax" ] && [ "$EXECUTOR_MODEL" != "MiniMax-M2.7-highspeed" ]; then + echo "[opusminimax] minimax profile requires executor model MiniMax-M2.7-highspeed" >&2 + exit 2 +fi +if [ "$EXECUTOR_PROVIDER" != "minimax" ] && [[ "${EXECUTOR_MODEL,,}" == *"minimax"* ]]; then + echo "[opusminimax] Anthropic model profiles must not request MiniMax executor models" >&2 + exit 2 fi if [ -z "$RUN_ID" ]; then @@ -119,12 +200,12 @@ mkdir -p "$PACKET_DIR" PACKET="$PACKET_DIR/P1.json" RUN_ARTIFACT="$RUN_DIR/opusminimax-run.json" -python3 - "$TASK" "$MODE" "$OUTER_ROUTE" "$INNER_CONTRACT" "$RUN_ID" "$PACKET" "$RUN_ARTIFACT" "$PLANNER_MODEL" "$EXECUTOR_MODEL" "$EXECUTOR_PROVIDER" <<'PY' +python3 - "$TASK" "$MODE" "$OUTER_ROUTE" "$INNER_CONTRACT" "$RUN_ID" "$PACKET" "$RUN_ARTIFACT" "$PLANNER_MODEL" "$EXECUTOR_MODEL" "$EXECUTOR_PROVIDER" "$MODEL_PROFILE" <<'PY' import json import pathlib import sys -task, mode, outer_route, inner_contract, run_id, packet_path, run_artifact, planner_model, executor_model, executor_provider = sys.argv[1:11] +task, mode, outer_route, inner_contract, run_id, packet_path, run_artifact, planner_model, executor_model, executor_provider, model_profile = sys.argv[1:12] packet_path = pathlib.Path(packet_path) run_artifact = pathlib.Path(run_artifact) if executor_provider == "claude-sonnet": @@ -135,6 +216,14 @@ if executor_provider == "claude-sonnet": "provider": "claude-sonnet", } executor_label = "Claude Sonnet executor" +elif executor_provider == "anthropic": + executor_profile = { + "path": ".claude/settings.opusminimax-planner.example.json", + "anthropic_base_url": "", + "model": executor_model, + "provider": "anthropic", + } + executor_label = "Claude/Anthropic executor" else: executor_profile = { "path": ".claude/settings.minimax-executor.example.json", @@ -164,10 +253,11 @@ run = { "run_id": run_id, "outer_route": outer_route, "inner_contract": inner_contract, + "model_profile": model_profile, "executor_provider": executor_provider, "planner_identity_status": "blocked", "executor_identity_status": "configured", - "fallback_status": "none" if outer_route == "opusworkflow" else "explicit_user_override", + "fallback_status": "none" if outer_route == "opusworkflow" and model_profile == "minimax" else "explicit_user_override", "provider_profiles": { "planner": { "path": ".claude/settings.opusminimax-planner.example.json", @@ -180,6 +270,20 @@ run = { "planner_requested": planner_model, "executor_requested": executor_model, }, + "model_route": { + "profile": model_profile, + "planner": { + "provider": "anthropic", + "requested_model": planner_model, + "identity_status": "blocked", + }, + "executor": { + "provider": executor_profile["provider"], + "requested_model": executor_model, + "identity_status": "configured", + }, + "fallback_policy": "fail-closed-unless-explicit", + }, "capacity": { "local_ceiling": 10, "provider_ceiling": 1, @@ -215,7 +319,7 @@ echo "[opusminimax] run artifact: $RUN_ARTIFACT" if [ "$EXECUTE_PLANNER" -eq 0 ]; then echo "[opusminimax] runtime not executed. To launch planner explicitly:" - echo " bash scripts/opusminimax.sh --task \"$TASK\" --mode $MODE --outer-route $OUTER_ROUTE --inner-contract $INNER_CONTRACT --executor-provider $EXECUTOR_PROVIDER --executor-model $EXECUTOR_MODEL --execute-planner" + echo " bash scripts/opusminimax.sh --task \"$TASK\" --mode $MODE --outer-route $OUTER_ROUTE --inner-contract $INNER_CONTRACT --model-profile $MODEL_PROFILE --executor-provider $EXECUTOR_PROVIDER --planner-model $PLANNER_MODEL --executor-model $EXECUTOR_MODEL --execute-planner" exit 0 fi @@ -224,7 +328,7 @@ if [ ! -f "$PLANNER_SETTINGS" ]; then fi DOCTOR_JSON="$(mktemp)" -if ! bash "$ROOT_DIR/scripts/opusminimax-doctor.sh" --runtime --fix-local-profiles --executor-provider "$EXECUTOR_PROVIDER" --json >"$DOCTOR_JSON"; then +if ! bash "$ROOT_DIR/scripts/opusminimax-doctor.sh" --runtime --fix-local-profiles --model-profile "$MODEL_PROFILE" --executor-provider "$EXECUTOR_PROVIDER" --json >"$DOCTOR_JSON"; then echo "[opusminimax] planner identity blocked: runtime doctor failed." >&2 echo "[opusminimax] repair steps: run claude auth login, ensure Opus is available on the account, unset ANTHROPIC_API_KEY for subscription billing, then retry." >&2 rm -f "$DOCTOR_JSON" @@ -288,5 +392,9 @@ if [ ! -f "$PLANNER_SETTINGS" ]; then exit 1 fi -PROMPT="/opusminimax outer_route=$OUTER_ROUTE inner_contract=$INNER_CONTRACT mode=$MODE task=$TASK run_dir=$RUN_DIR planner_model=$PLANNER_MODEL executor_provider=$EXECUTOR_PROVIDER executor_model=$EXECUTOR_MODEL" -claude --model "$PLANNER_MODEL" --effort xhigh --settings "$PLANNER_SETTINGS" -p "$PROMPT" +PROMPT="/opusminimax outer_route=$OUTER_ROUTE inner_contract=$INNER_CONTRACT mode=$MODE task=$TASK run_dir=$RUN_DIR model_profile=$MODEL_PROFILE planner_model=$PLANNER_MODEL executor_provider=$EXECUTOR_PROVIDER executor_model=$EXECUTOR_MODEL" +CLAUDE_ARGS=() +if [ "$PLANNER_MODEL" != "default" ]; then + CLAUDE_ARGS+=(--model "$PLANNER_MODEL") +fi +claude "${CLAUDE_ARGS[@]}" --effort xhigh --settings "$PLANNER_SETTINGS" -p "$PROMPT" diff --git a/scripts/opussonnetworkflow.sh b/scripts/opussonnetworkflow.sh index 000442b..bc2b102 100755 --- a/scripts/opussonnetworkflow.sh +++ b/scripts/opussonnetworkflow.sh @@ -23,6 +23,7 @@ fi echo "[opussonnetworkflow] optional Claude-only mode: Opus 4.7 planning, Sonnet 4.6 execution" exec bash "$ROOT_DIR/scripts/opusworkflow.sh" \ + --model-profile opussonnet \ --executor-provider claude-sonnet \ --planner-model claude-opus-4-7 \ --executor-model claude-sonnet-4-6 \ diff --git a/scripts/opusworkflow-smoke.sh b/scripts/opusworkflow-smoke.sh index eb54376..d73412d 100755 --- a/scripts/opusworkflow-smoke.sh +++ b/scripts/opusworkflow-smoke.sh @@ -64,6 +64,9 @@ require_text "planner_identity_status" scripts/opusminimax.sh require_text "executor_identity_status" scripts/opusminimax.sh require_text "fallback_status" scripts/opusminimax.sh require_text "executor_provider" scripts/opusminimax.sh +require_text "model_profile" scripts/opusminimax.sh +require_text "--model-profile" scripts/opusworkflow.sh +require_text "anthropic" scripts/opusworkflow.sh require_text "claude-sonnet" scripts/opusworkflow.sh require_text "claude-sonnet-4-6" .claude/settings.opussonnet.example.json require_text "claude-sonnet-4-6" .claude/settings.sonnet-executor.example.json @@ -97,28 +100,36 @@ RUN_ID="opusworkflow-smoke" AGENTFACTORY_RUN_ID="opusworkflow-agentfactory-smoke" HIVE_RUN_ID="opusworkflow-hiveworkflow-smoke" SONNET_RUN_ID="opusworkflow-sonnet-smoke" +ALL_SONNET_RUN_ID="opusworkflow-all-sonnet-smoke" +ALL_OPUS_RUN_ID="opusworkflow-all-opus-smoke" RUN_DIR=".taste/opusminimax/$RUN_ID" AGENTFACTORY_RUN_DIR=".taste/opusminimax/$AGENTFACTORY_RUN_ID" HIVE_RUN_DIR=".taste/opusminimax/$HIVE_RUN_ID" SONNET_RUN_DIR=".taste/opusminimax/$SONNET_RUN_ID" +ALL_SONNET_RUN_DIR=".taste/opusminimax/$ALL_SONNET_RUN_ID" +ALL_OPUS_RUN_DIR=".taste/opusminimax/$ALL_OPUS_RUN_ID" OUT_FILE="$(mktemp)" cleanup() { rm -f "$OUT_FILE" - rm -rf "$RUN_DIR" "$AGENTFACTORY_RUN_DIR" "$HIVE_RUN_DIR" "$SONNET_RUN_DIR" + rm -rf "$RUN_DIR" "$AGENTFACTORY_RUN_DIR" "$HIVE_RUN_DIR" "$SONNET_RUN_DIR" "$ALL_SONNET_RUN_DIR" "$ALL_OPUS_RUN_DIR" } trap cleanup EXIT -rm -rf "$RUN_DIR" "$AGENTFACTORY_RUN_DIR" "$HIVE_RUN_DIR" "$SONNET_RUN_DIR" +rm -rf "$RUN_DIR" "$AGENTFACTORY_RUN_DIR" "$HIVE_RUN_DIR" "$SONNET_RUN_DIR" "$ALL_SONNET_RUN_DIR" "$ALL_OPUS_RUN_DIR" bash scripts/opusworkflow.sh --task "cost optimized smoke" --run-id "$RUN_ID" >"$OUT_FILE" bash scripts/opusworkflow.sh --task "governed Hermes smoke" --inner-contract agentfactory --run-id "$AGENTFACTORY_RUN_ID" >>"$OUT_FILE" bash scripts/opusworkflow.sh --task "governed hive smoke" --inner-contract hiveworkflow --run-id "$HIVE_RUN_ID" >>"$OUT_FILE" bash scripts/opusworkflow.sh --task "optional Sonnet smoke" --executor-provider claude-sonnet --run-id "$SONNET_RUN_ID" >>"$OUT_FILE" +bash scripts/opusworkflow.sh --task "all Sonnet smoke" --model-profile sonnet --run-id "$ALL_SONNET_RUN_ID" >>"$OUT_FILE" +bash scripts/opusworkflow.sh --task "all Opus smoke" --model-profile opus --run-id "$ALL_OPUS_RUN_ID" >>"$OUT_FILE" [ -f "$RUN_DIR/opusminimax-run.json" ] || fail "opusworkflow did not create run artifact" [ -f "$RUN_DIR/packets/P1.json" ] || fail "opusworkflow did not create packet" [ -f "$AGENTFACTORY_RUN_DIR/opusminimax-run.json" ] || fail "opusworkflow did not create agentfactory run artifact" [ -f "$HIVE_RUN_DIR/opusminimax-run.json" ] || fail "opusworkflow did not create hiveworkflow run artifact" [ -f "$SONNET_RUN_DIR/opusminimax-run.json" ] || fail "opusworkflow did not create Sonnet run artifact" +[ -f "$ALL_SONNET_RUN_DIR/opusminimax-run.json" ] || fail "opusworkflow did not create all-Sonnet run artifact" +[ -f "$ALL_OPUS_RUN_DIR/opusminimax-run.json" ] || fail "opusworkflow did not create all-Opus run artifact" python3 - "$RUN_DIR/opusminimax-run.json" "$AGENTFACTORY_RUN_DIR/opusminimax-run.json" "$HIVE_RUN_DIR/opusminimax-run.json" <<'PY' import json @@ -137,6 +148,7 @@ for raw_path, contract in zip(sys.argv[1:], expected): assert data.get("planner_identity_status") == "blocked" assert data.get("executor_identity_status") == "configured" assert data.get("fallback_status") == "none" + assert data.get("model_profile") == "minimax" assert models.get("executor_requested") == "MiniMax-M2.7-highspeed" assert capacity.get("provider_ceiling") == 1 assert capacity.get("effective_concurrency") == 1 @@ -154,6 +166,7 @@ profiles = data.get("provider_profiles", {}) executor = profiles.get("executor", {}) assert data.get("artifact_type") == "opusminimax-run" assert data.get("outer_route") == "opusworkflow" +assert data.get("model_profile") == "opussonnet" assert data.get("executor_provider") == "claude-sonnet" assert models.get("executor_requested") == "claude-sonnet-4-6" assert executor.get("anthropic_base_url", "") == "" @@ -161,10 +174,35 @@ assert "sonnet" in executor.get("model", "").lower() assert data.get("claims", {}).get("opus_planned") is False PY +python3 - "$ALL_SONNET_RUN_DIR/opusminimax-run.json" "$ALL_OPUS_RUN_DIR/opusminimax-run.json" <<'PY' +import json +import pathlib +import sys + +sonnet = json.loads(pathlib.Path(sys.argv[1]).read_text(encoding="utf-8")) +opus = json.loads(pathlib.Path(sys.argv[2]).read_text(encoding="utf-8")) +for data, profile, needle in [(sonnet, "sonnet", "sonnet"), (opus, "opus", "opus")]: + models = data.get("model_ids", {}) + executor = data.get("provider_profiles", {}).get("executor", {}) + assert data.get("artifact_type") == "opusminimax-run" + assert data.get("outer_route") == "opusworkflow" + assert data.get("model_profile") == profile + assert data.get("executor_provider") == "anthropic" + assert needle in models.get("planner_requested", "").lower() + assert needle in models.get("executor_requested", "").lower() + assert executor.get("anthropic_base_url", "") == "" + executor_blob = json.dumps(executor).lower() + assert "api.minimax.io/anthropic" not in executor_blob + assert "minimax-m2.7-highspeed" not in executor_blob + assert data.get("claims", {}).get("opus_planned") is False +PY + bash scripts/artifact-lint.sh "$RUN_DIR/opusminimax-run.json" >/dev/null bash scripts/artifact-lint.sh "$RUN_DIR/packets/P1.json" >/dev/null bash scripts/artifact-lint.sh "$AGENTFACTORY_RUN_DIR/opusminimax-run.json" >/dev/null bash scripts/artifact-lint.sh "$HIVE_RUN_DIR/opusminimax-run.json" >/dev/null bash scripts/artifact-lint.sh "$SONNET_RUN_DIR/opusminimax-run.json" >/dev/null +bash scripts/artifact-lint.sh "$ALL_SONNET_RUN_DIR/opusminimax-run.json" >/dev/null +bash scripts/artifact-lint.sh "$ALL_OPUS_RUN_DIR/opusminimax-run.json" >/dev/null echo "[PASS] /opusworkflow cost-optimized smoke passed" diff --git a/scripts/opusworkflow.sh b/scripts/opusworkflow.sh index 15a87c0..8acef2e 100755 --- a/scripts/opusworkflow.sh +++ b/scripts/opusworkflow.sh @@ -8,7 +8,7 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" usage() { cat >&2 <<'EOF' Usage: - bash scripts/opusworkflow.sh --task "..." [--inner-contract CONTRACT] [--run-id ID] [--executor-provider minimax|claude-sonnet] [--execute-planner] [--planner-settings PATH] [--planner-model MODEL] [--executor-model MODEL] + bash scripts/opusworkflow.sh --task "..." [--inner-contract CONTRACT] [--run-id ID] [--model-profile minimax|opussonnet|sonnet|opus|default|custom] [--executor-provider minimax|claude-sonnet|anthropic] [--execute-planner] [--planner-settings PATH] [--planner-model MODEL] [--executor-model MODEL] /opusworkflow is the cost-optimized workflow entrypoint. It reuses scripts/opusminimax.sh in workflow mode, keeping Claude/Opus for judgment and @@ -17,6 +17,14 @@ MiniMax-M2.7-highspeed for bounded execution packets. The optional claude-sonnet executor provider keeps the same workflow governance but uses Claude Code opusplan/Sonnet 4.6 instead of MiniMax. +Model profiles: + minimax Opus judgment + MiniMax execution (default) + opussonnet Opus judgment + Sonnet execution, no MiniMax token + sonnet Sonnet for planning and execution + opus Opus for planning and execution + default Claude Code account default for planning and execution + custom Explicit --planner-model and --executor-model values + CONTRACT may be workflow, agentfactory, hiveworkflow, parallel, defineicp, deepretaste, demo, or visualizeworkflow. EOF @@ -24,8 +32,10 @@ EOF ARGS=() EXECUTOR_PROVIDER="${OPUSWORKFLOW_EXECUTOR_PROVIDER:-minimax}" +MODEL_PROFILE="${OPUSWORKFLOW_MODEL_PROFILE:-}" EXECUTOR_PROVIDER_SET=0 EXECUTOR_MODEL_SET=0 +MODEL_PROFILE_SET=0 while [ "$#" -gt 0 ]; do case "$1" in "--mode") @@ -42,6 +52,12 @@ while [ "$#" -gt 0 ]; do ARGS+=("$1" "$2") shift 2 ;; + "--model-profile") + MODEL_PROFILE="${2:-}" + MODEL_PROFILE_SET=1 + ARGS+=("$1" "$2") + shift 2 + ;; "--executor-model") EXECUTOR_MODEL_SET=1 ARGS+=("$1" "$2") @@ -54,22 +70,70 @@ while [ "$#" -gt 0 ]; do esac done +if [ -z "$MODEL_PROFILE" ]; then + case "$EXECUTOR_PROVIDER" in + minimax) MODEL_PROFILE="minimax" ;; + claude-sonnet) MODEL_PROFILE="opussonnet" ;; + anthropic) MODEL_PROFILE="custom" ;; + esac +fi + +if [ "$EXECUTOR_PROVIDER_SET" -eq 0 ]; then + case "$MODEL_PROFILE" in + minimax) EXECUTOR_PROVIDER="minimax" ;; + opussonnet) EXECUTOR_PROVIDER="claude-sonnet" ;; + sonnet|opus|default|custom) EXECUTOR_PROVIDER="anthropic" ;; + esac +fi + +case "$MODEL_PROFILE" in + minimax) + echo "[opusworkflow] model profile: minimax (Opus judgment, MiniMax-M2.7-highspeed execution)" + ;; + opussonnet) + echo "[opusworkflow] model profile: opussonnet (Opus 4.7 planning, Sonnet 4.6 execution)" + ;; + sonnet) + echo "[opusworkflow] model profile: sonnet (Sonnet 4.6 planning and execution)" + ;; + opus) + echo "[opusworkflow] model profile: opus (Opus 4.7 planning and execution; explicit high-cost route)" + ;; + default) + echo "[opusworkflow] model profile: default (Claude Code account default; runtime identity remains unproven)" + ;; + custom) + echo "[opusworkflow] model profile: custom (explicit planner/executor model request)" + ;; + *) + echo "[opusworkflow] invalid model profile: $MODEL_PROFILE" >&2 + exit 2 + ;; +esac + case "$EXECUTOR_PROVIDER" in minimax) - echo "[opusworkflow] cost-optimized mode: Claude/Opus judgment, MiniMax-M2.7-highspeed execution, default executor concurrency=1" + echo "[opusworkflow] executor provider: minimax" ;; claude-sonnet) if [ "$EXECUTOR_MODEL_SET" -eq 0 ]; then ARGS+=("--executor-model" "claude-sonnet-4-6") fi - echo "[opusworkflow] suggested Claude-only mode: Opus 4.7 planning, Sonnet 4.6 execution, default executor concurrency=1" + echo "[opusworkflow] executor provider: claude-sonnet" + ;; + anthropic) + echo "[opusworkflow] executor provider: anthropic" ;; *) echo "[opusworkflow] invalid executor provider: $EXECUTOR_PROVIDER" >&2 exit 2 ;; esac + if [ "$EXECUTOR_PROVIDER_SET" -eq 0 ]; then ARGS+=("--executor-provider" "$EXECUTOR_PROVIDER") fi +if [ "$MODEL_PROFILE_SET" -eq 0 ]; then + ARGS+=("--model-profile" "$MODEL_PROFILE") +fi exec bash "$ROOT_DIR/scripts/opusminimax.sh" --mode workflow --outer-route opusworkflow "${ARGS[@]}" diff --git a/scripts/test-harness.sh b/scripts/test-harness.sh index e3e3db3..bffd7d1 100755 --- a/scripts/test-harness.sh +++ b/scripts/test-harness.sh @@ -266,6 +266,8 @@ for pattern in \ "planner_identity_status" \ "executor_identity_status" \ "fallback_status" \ + "model_profile" \ + "--model-profile" \ "opusworkflow-smoke"; do if ! grep -Fq -- "$pattern" setup.sh scripts/opusminimax.sh scripts/opusminimax-doctor.sh scripts/harness-eval.sh scripts/release-check.sh scripts/harness-capability-map.sh 2>/dev/null; then OPUSWORKFLOW_OK=false @@ -299,6 +301,7 @@ for pattern in \ "claude-sonnet-4-6" \ "opusplan" \ "executor_provider" \ + "model_profile" \ "claude-sonnet"; do if ! grep -Fq -- "$pattern" setup.sh scripts/opusminimax.sh scripts/opusworkflow.sh scripts/artifact-lint.sh .claude/skills/opussonnet/SKILL.md 2>/dev/null; then OPUSSONNET_OK=false @@ -1096,6 +1099,7 @@ for pattern in \ "unverified-worker-claim" \ "opusminimax-fake-opus-claim" \ "opusminimax-planner-minimax-base-url" \ + "valid-sonnet-model-profile-run" \ "opusminimax-benchmark-aggregate-without-per-task"; do if ! find .taste/fixtures/artifact-lint -type f -name "*$pattern*.json" 2>/dev/null | grep -q .; then ARTIFACT_LINT_OK=false