Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 2 additions & 12 deletions scripts/powershell/Optimize-CodexWorkspace.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -447,18 +447,8 @@ function Invoke-NativeValidation {
$startInfo.UseShellExecute = $false
$startInfo.WorkingDirectory = $resolvedProject.Path
$nativeArguments = @($Arguments | ForEach-Object { ConvertTo-NativeArgument $_ })
if ($resolvedExecutable.Source -match '\.(cmd|bat)$') {
$startInfo.FileName = $env:ComSpec
$invocation = @(
ConvertTo-NativeArgument $resolvedExecutable.Source
$nativeArguments
) -join ' '
$startInfo.Arguments = '/d /s /c "' + $invocation + '"'
}
else {
$startInfo.FileName = $resolvedExecutable.Source
$startInfo.Arguments = $nativeArguments -join ' '
}
$startInfo.FileName = $resolvedExecutable.Source
$startInfo.Arguments = $nativeArguments -join ' '

$process = [System.Diagnostics.Process]::new()
$process.StartInfo = $startInfo
Expand Down
2 changes: 1 addition & 1 deletion scripts/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Connect the inspector to `http://localhost:8000/mcp`.

`codex_cost_router.py` is an optional Windows-friendly wrapper for Codex CLI and
a local LiteLLM OSS proxy. It can clean prompts, compress logs, estimate tokens,
apply budgets, and route one-shot Codex tasks to `codex-cheap`, `codex-auto`, or
apply budgets, and route one-shot Codex tasks to `codex-cheap` or
`codex-strong`.

See [`README_Codex_Cost_Routing.md`](README_Codex_Cost_Routing.md) for setup,
Expand Down
11 changes: 7 additions & 4 deletions scripts/python/README_Codex_Cost_Routing.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@ Optional cost routing for Codex CLI on Windows using the official open-source
The local Python wrapper cleans prompts, compresses noisy logs, estimates tokens,
applies budgets, and selects one of these LiteLLM aliases:

- `codex-cheap`
- `codex-auto`
- `codex-strong`
- `codex-cheap` for simple, low-cost tasks
- `codex-strong` for default, medium, and complex tasks

The previous `codex-auto` middle tier was removed because it pointed to the same
provider model as `codex-strong`, which made the fallback chain redundant. Add a
third alias again only when it maps to a genuinely different model or provider.

API keys are never committed or written to a configuration file.

Expand Down Expand Up @@ -94,7 +97,7 @@ Prompts and API keys are not logged.
- `Manage-CodexCostRouting.ps1`: automatic run, status, and stop workflow.
- `codex-cost-routing.cmd`: simple Windows launcher.
- `codex_cost_router.py`: prompt optimization and one-shot routing.
- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallbacks.
- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallback.

## Notes

Expand Down
21 changes: 9 additions & 12 deletions scripts/python/codex_cost_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@
CONFIG_BACKUP = LOG_DIR / "config.toml.cost_router_backup"
BEGIN_MARKER = "# BEGIN CODEX COST ROUTER"
END_MARKER = "# END CODEX COST ROUTER"
DEFAULT_MODEL = "codex-strong"
DEFAULT_MAX_INPUT_TOKENS = 12_000
DEFAULT_MAX_OUTPUT_TOKENS = 2_000
MODELS = ("codex-cheap", "codex-auto", "codex-strong")
MODELS = ("codex-cheap", "codex-strong")
LITELLM_HOST = "localhost"
LITELLM_PORT = 4000
WINDOWS_LITELLM_FALLBACK = Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe")
Expand All @@ -34,7 +35,6 @@
# match the deployments configured in your local LiteLLM OSS proxy.
ESTIMATED_RATES = {
"codex-cheap": {"input": 0.15, "output": 0.60},
"codex-auto": {"input": 0.50, "output": 2.00},
"codex-strong": {"input": 2.00, "output": 8.00},
}

Expand All @@ -43,6 +43,7 @@
"résumé",
"resume",
"documentation",
"document",
"petite modification",
"typo",
"readme",
Expand Down Expand Up @@ -71,7 +72,7 @@
"critical bug",
)

PROFILE_BLOCK = """\
PROFILE_BLOCK = f"""\
# BEGIN CODEX COST ROUTER
[model_providers.litellm]
name = "LiteLLM OSS Cost Router"
Expand All @@ -80,7 +81,7 @@
wire_api = "responses"

[profiles.cost-routing]
model = "codex-auto"
model = "{DEFAULT_MODEL}"
model_provider = "litellm"
model_reasoning_effort = "low"
# END CODEX COST ROUTER
Expand Down Expand Up @@ -238,11 +239,7 @@ def route_model(prompt: str, force_model: str | None = None) -> tuple[str, str]:
if force_model:
return force_model, "model forced by CLI option"
complexity, reason = classify_complexity(prompt)
model = {
"simple": "codex-cheap",
"medium": "codex-auto",
"complex": "codex-strong",
}[complexity]
model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL
return model, reason


Expand Down Expand Up @@ -270,7 +267,7 @@ def enable_router() -> int:
config = remove_profile_block(config)
updated = config.rstrip() + ("\n\n" if config.strip() else "") + PROFILE_BLOCK
write_text(CODEX_CONFIG, updated)
save_state(enabled=True, enabled_at=utc_now(), current_model="codex-auto")
save_state(enabled=True, enabled_at=utc_now(), current_model=DEFAULT_MODEL)
print("Cost routing enabled.")
print("LiteLLM OSS profile installed: cost-routing")
print("Start the profile with:")
Expand Down Expand Up @@ -307,7 +304,7 @@ def print_status() -> int:
print("Codex Cost Router")
print("-----------------")
print(f"Profile active : {'yes' if router_enabled() else 'no'}")
print(f"Current model : {latest.get('model', state.get('current_model', 'codex-auto'))}")
print(f"Current model : {latest.get('model', state.get('current_model', DEFAULT_MODEL))}")
print(f"Last estimated cost: ${latest.get('estimated_cost_usd', 0):.8f}")
print(f"Last routing : {latest.get('routing_reason', 'none')}")
print(f"Execution mode : {latest.get('execution_mode', 'none')}")
Expand Down Expand Up @@ -412,7 +409,7 @@ def run_router(args: argparse.Namespace) -> int:
output_tokens = args.max_output_tokens
compression_ratio = round(input_tokens / max(1, original_tokens), 4)
cost = estimate_cost(model, input_tokens, output_tokens)
strong_cost = estimate_cost("codex-strong", input_tokens, output_tokens)
strong_cost = estimate_cost(DEFAULT_MODEL, input_tokens, output_tokens)
execution_mode = "dry-run" if args.dry_run else "codex-exec"

record = {
Expand Down
11 changes: 3 additions & 8 deletions scripts/python/litellm-cost-routing.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# LiteLLM OSS self-hosted proxy example.
# Replace the deployment model values with the providers you want to use.
# Two explicit tiers keep the local setup simple:
# - codex-cheap for low-cost/simple work
# - codex-strong for default, medium, and complex work
# API keys are read from environment variables and must never be committed.

model_list:
Expand All @@ -8,11 +10,6 @@ model_list:
model: openai/gpt-5-mini
api_key: os.environ/OPENAI_API_KEY

- model_name: codex-auto
litellm_params:
model: openai/gpt-5
api_key: os.environ/OPENAI_API_KEY

- model_name: codex-strong
litellm_params:
model: openai/gpt-5
Expand All @@ -21,8 +18,6 @@ model_list:
router_settings:
fallbacks:
- codex-cheap:
- codex-auto
- codex-auto:
- codex-strong

litellm_settings:
Expand Down
2 changes: 1 addition & 1 deletion scripts/python/tests/test_codex_cost_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_compress_logs_removes_low_value_debug_lines(self) -> None:

def test_route_model_uses_expected_aliases(self) -> None:
self.assertEqual(ROUTER.route_model("Corrige une typo dans le README")[0], "codex-cheap")
self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-auto")
self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-strong")
self.assertEqual(ROUTER.route_model("Audit sécurité production Supabase RLS")[0], "codex-strong")

def test_build_optimized_prompt_respects_budget(self) -> None:
Expand Down