From 6edda26362ae49c50d90885dd95dd5bb7c80e807 Mon Sep 17 00:00:00 2001 From: Thibault Date: Sat, 20 Jun 2026 10:34:38 +0200 Subject: [PATCH 1/6] Simplify LiteLLM cost routing tiers --- scripts/python/litellm-cost-routing.yaml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/scripts/python/litellm-cost-routing.yaml b/scripts/python/litellm-cost-routing.yaml index 0677bdf..ca61f5b 100644 --- a/scripts/python/litellm-cost-routing.yaml +++ b/scripts/python/litellm-cost-routing.yaml @@ -1,5 +1,7 @@ # LiteLLM OSS self-hosted proxy example. -# Replace the deployment model values with the providers you want to use. +# Two explicit tiers keep the local setup simple: +# - codex-cheap for low-cost/simple work +# - codex-strong for default, medium, and complex work # API keys are read from environment variables and must never be committed. model_list: @@ -8,11 +10,6 @@ model_list: model: openai/gpt-5-mini api_key: os.environ/OPENAI_API_KEY - - model_name: codex-auto - litellm_params: - model: openai/gpt-5 - api_key: os.environ/OPENAI_API_KEY - - model_name: codex-strong litellm_params: model: openai/gpt-5 @@ -21,8 +18,6 @@ model_list: router_settings: fallbacks: - codex-cheap: - - codex-auto - - codex-auto: - codex-strong litellm_settings: From 0d5936cdab464a2ab109c32b9dfbaedb78b985c3 Mon Sep 17 00:00:00 2001 From: Thibault Date: Sat, 20 Jun 2026 10:38:30 +0200 Subject: [PATCH 2/6] Simplify Codex cost router aliases --- scripts/python/codex_cost_router.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/scripts/python/codex_cost_router.py b/scripts/python/codex_cost_router.py index c390144..aab452a 100644 --- a/scripts/python/codex_cost_router.py +++ b/scripts/python/codex_cost_router.py @@ -23,9 +23,10 @@ CONFIG_BACKUP = LOG_DIR / "config.toml.cost_router_backup" BEGIN_MARKER = "# BEGIN CODEX COST ROUTER" END_MARKER = "# END CODEX COST ROUTER" +DEFAULT_MODEL = "codex-strong" DEFAULT_MAX_INPUT_TOKENS = 12_000 DEFAULT_MAX_OUTPUT_TOKENS = 2_000 -MODELS = ("codex-cheap", "codex-auto", "codex-strong") +MODELS = ("codex-cheap", "codex-strong") LITELLM_HOST = "localhost" LITELLM_PORT = 4000 WINDOWS_LITELLM_FALLBACK = Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe") @@ -34,7 +35,6 @@ # match the deployments configured in your local LiteLLM OSS proxy. ESTIMATED_RATES = { "codex-cheap": {"input": 0.15, "output": 0.60}, - "codex-auto": {"input": 0.50, "output": 2.00}, "codex-strong": {"input": 2.00, "output": 8.00}, } @@ -43,6 +43,7 @@ "résumé", "resume", "documentation", + "document", "petite modification", "typo", "readme", @@ -71,7 +72,7 @@ "critical bug", ) -PROFILE_BLOCK = """\ +PROFILE_BLOCK = f"""\ # BEGIN CODEX COST ROUTER [model_providers.litellm] name = "LiteLLM OSS Cost Router" @@ -80,7 +81,7 @@ wire_api = "responses" [profiles.cost-routing] -model = "codex-auto" +model = "{DEFAULT_MODEL}" model_provider = "litellm" model_reasoning_effort = "low" # END CODEX COST ROUTER @@ -238,11 +239,7 @@ def route_model(prompt: str, force_model: str | None = None) -> tuple[str, str]: if force_model: return force_model, "model forced by CLI option" complexity, reason = classify_complexity(prompt) - model = { - "simple": "codex-cheap", - "medium": "codex-auto", - "complex": "codex-strong", - }[complexity] + model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL return model, reason @@ -270,7 +267,7 @@ def enable_router() -> int: config = remove_profile_block(config) updated = config.rstrip() + ("\n\n" if config.strip() else "") + PROFILE_BLOCK write_text(CODEX_CONFIG, updated) - save_state(enabled=True, enabled_at=utc_now(), current_model="codex-auto") + save_state(enabled=True, enabled_at=utc_now(), current_model=DEFAULT_MODEL) print("Cost routing enabled.") print("LiteLLM OSS profile installed: cost-routing") print("Start the profile with:") @@ -307,7 +304,7 @@ def print_status() -> int: print("Codex Cost Router") print("-----------------") print(f"Profile active : {'yes' if router_enabled() else 'no'}") - print(f"Current model : {latest.get('model', state.get('current_model', 'codex-auto'))}") + print(f"Current model : {latest.get('model', state.get('current_model', DEFAULT_MODEL))}") print(f"Last estimated cost: ${latest.get('estimated_cost_usd', 0):.8f}") print(f"Last routing : {latest.get('routing_reason', 'none')}") print(f"Execution mode : {latest.get('execution_mode', 'none')}") @@ -412,7 +409,7 @@ def run_router(args: argparse.Namespace) -> int: output_tokens = args.max_output_tokens compression_ratio = round(input_tokens / max(1, original_tokens), 4) cost = estimate_cost(model, input_tokens, output_tokens) - strong_cost = estimate_cost("codex-strong", input_tokens, output_tokens) + strong_cost = estimate_cost(DEFAULT_MODEL, input_tokens, output_tokens) execution_mode = "dry-run" if args.dry_run else "codex-exec" record = { From 03eb7e6fdb955e788e92e9c22f89d493f2b6feea Mon Sep 17 00:00:00 2001 From: Thibault Date: Sat, 20 Jun 2026 10:39:21 +0200 Subject: [PATCH 3/6] Document simplified LiteLLM routing tiers --- scripts/python/README_Codex_Cost_Routing.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/python/README_Codex_Cost_Routing.md b/scripts/python/README_Codex_Cost_Routing.md index 3aeb132..c0ef584 100644 --- a/scripts/python/README_Codex_Cost_Routing.md +++ b/scripts/python/README_Codex_Cost_Routing.md @@ -6,9 +6,12 @@ Optional cost routing for Codex CLI on Windows using the official open-source The local Python wrapper cleans prompts, compresses noisy logs, estimates tokens, applies budgets, and selects one of these LiteLLM aliases: -- `codex-cheap` -- `codex-auto` -- `codex-strong` +- `codex-cheap` for simple, low-cost tasks +- `codex-strong` for default, medium, and complex tasks + +The previous `codex-auto` middle tier was removed because it pointed to the same +provider model as `codex-strong`, which made the fallback chain redundant. Add a +third alias again only when it maps to a genuinely different model or provider. API keys are never committed or written to a configuration file. @@ -94,7 +97,7 @@ Prompts and API keys are not logged. - `Manage-CodexCostRouting.ps1`: automatic run, status, and stop workflow. - `codex-cost-routing.cmd`: simple Windows launcher. - `codex_cost_router.py`: prompt optimization and one-shot routing. -- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallbacks. +- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallback. ## Notes From a8d241a2d254e465ee595f07c885df7de766b4ed Mon Sep 17 00:00:00 2001 From: Thibault Date: Sat, 20 Jun 2026 11:05:01 +0200 Subject: [PATCH 4/6] Update cost router tests for two-tier routing --- scripts/python/tests/test_codex_cost_router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/python/tests/test_codex_cost_router.py b/scripts/python/tests/test_codex_cost_router.py index cc01b0e..7a4cffd 100644 --- a/scripts/python/tests/test_codex_cost_router.py +++ b/scripts/python/tests/test_codex_cost_router.py @@ -26,7 +26,7 @@ def test_compress_logs_removes_low_value_debug_lines(self) -> None: def test_route_model_uses_expected_aliases(self) -> None: self.assertEqual(ROUTER.route_model("Corrige une typo dans le README")[0], "codex-cheap") - self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-auto") + self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-strong") self.assertEqual(ROUTER.route_model("Audit sécurité production Supabase RLS")[0], "codex-strong") def test_build_optimized_prompt_respects_budget(self) -> None: From 003950d6095044e153eb0b8727e5dc985ddc45e6 Mon Sep 17 00:00:00 2001 From: Thibault Date: Sat, 20 Jun 2026 11:05:28 +0200 Subject: [PATCH 5/6] Update Python README for two-tier routing --- scripts/python/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/python/README.md b/scripts/python/README.md index 771a0c1..dcf6f2a 100644 --- a/scripts/python/README.md +++ b/scripts/python/README.md @@ -52,7 +52,7 @@ Connect the inspector to `http://localhost:8000/mcp`. `codex_cost_router.py` is an optional Windows-friendly wrapper for Codex CLI and a local LiteLLM OSS proxy. It can clean prompts, compress logs, estimate tokens, -apply budgets, and route one-shot Codex tasks to `codex-cheap`, `codex-auto`, or +apply budgets, and route one-shot Codex tasks to `codex-cheap` or `codex-strong`. See [`README_Codex_Cost_Routing.md`](README_Codex_Cost_Routing.md) for setup, From e312226603c70f13b4e83fc275e479cc8114b73d Mon Sep 17 00:00:00 2001 From: Tibo2403 Date: Sat, 20 Jun 2026 15:50:35 +0200 Subject: [PATCH 6/6] fix: capture native validation diagnostics --- scripts/powershell/Optimize-CodexWorkspace.ps1 | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/scripts/powershell/Optimize-CodexWorkspace.ps1 b/scripts/powershell/Optimize-CodexWorkspace.ps1 index 08d2ca7..0a5133b 100644 --- a/scripts/powershell/Optimize-CodexWorkspace.ps1 +++ b/scripts/powershell/Optimize-CodexWorkspace.ps1 @@ -447,18 +447,8 @@ function Invoke-NativeValidation { $startInfo.UseShellExecute = $false $startInfo.WorkingDirectory = $resolvedProject.Path $nativeArguments = @($Arguments | ForEach-Object { ConvertTo-NativeArgument $_ }) - if ($resolvedExecutable.Source -match '\.(cmd|bat)$') { - $startInfo.FileName = $env:ComSpec - $invocation = @( - ConvertTo-NativeArgument $resolvedExecutable.Source - $nativeArguments - ) -join ' ' - $startInfo.Arguments = '/d /s /c "' + $invocation + '"' - } - else { - $startInfo.FileName = $resolvedExecutable.Source - $startInfo.Arguments = $nativeArguments -join ' ' - } + $startInfo.FileName = $resolvedExecutable.Source + $startInfo.Arguments = $nativeArguments -join ' ' $process = [System.Diagnostics.Process]::new() $process.StartInfo = $startInfo