diff --git a/scripts/python/Install-CodexLocalLiteLLMAssets.ps1 b/scripts/python/Install-CodexLocalLiteLLMAssets.ps1 new file mode 100644 index 0000000..bc87468 --- /dev/null +++ b/scripts/python/Install-CodexLocalLiteLLMAssets.ps1 @@ -0,0 +1,24 @@ +[CmdletBinding()] +param() + +$ErrorActionPreference = 'Stop' +$target = Join-Path $env:USERPROFILE '.codex\litellm-proxy' +New-Item -ItemType Directory -Force -Path $target | Out-Null + +$files = @( + 'litellm-cost-routing.yaml', + 'codex_key_session_web.py', + 'Start-CodexKeySessionWeb.ps1' +) + +foreach ($file in $files) { + Copy-Item -LiteralPath (Join-Path $PSScriptRoot $file) -Destination (Join-Path $target $file) -Force +} + +$configSource = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml' +$configTarget = Join-Path $target 'config.yaml' +$text = Get-Content -LiteralPath $configSource -Raw +$text = $text -replace '(?m)^\s*master_key:\s*os\.environ/LITELLM_API_KEY\s*\r?\n','' +Set-Content -LiteralPath $configTarget -Value $text -Encoding UTF8 + +Write-Output "Installed local LiteLLM assets in $target" diff --git a/scripts/python/Manage-CodexCostRouting.ps1 b/scripts/python/Manage-CodexCostRouting.ps1 index b5aefb1..a892878 100644 --- a/scripts/python/Manage-CodexCostRouting.ps1 +++ b/scripts/python/Manage-CodexCostRouting.ps1 @@ -64,6 +64,7 @@ function Get-ProxyProcess { function Remove-SessionSecrets { Remove-Item Env:OPENAI_API_KEY -ErrorAction SilentlyContinue + Remove-Item Env:GEMINI_API_KEY -ErrorAction SilentlyContinue Remove-Item Env:HF_TOKEN -ErrorAction SilentlyContinue Remove-Item Env:LITELLM_API_KEY -ErrorAction SilentlyContinue Remove-Item Env:PYTHONUTF8 -ErrorAction SilentlyContinue @@ -104,6 +105,19 @@ function Set-SessionSecrets { throw 'OPENAI_API_KEY est obligatoire.' } + if (-not $env:GEMINI_API_KEY) { + $secureKey = Read-Host 'GEMINI_API_KEY (optionnel, entree pour activer le dispatching Gemini)' -AsSecureString + if ($secureKey.Length -gt 0) { + $pointer = [Runtime.InteropServices.Marshal]::SecureStringToBSTR($secureKey) + try { + $env:GEMINI_API_KEY = [Runtime.InteropServices.Marshal]::PtrToStringBSTR($pointer) + } + finally { + [Runtime.InteropServices.Marshal]::ZeroFreeBSTR($pointer) + } + } + } + if (-not $env:LITELLM_API_KEY) { $env:LITELLM_API_KEY = 'sk-local-' + [Guid]::NewGuid().ToString('N') } diff --git a/scripts/python/README.md b/scripts/python/README.md index aa34bfe..02ebe3e 100644 --- a/scripts/python/README.md +++ b/scripts/python/README.md @@ -52,16 +52,24 @@ Connect the inspector to `http://localhost:8000/mcp`. `codex_cost_router.py` is an optional Windows-friendly wrapper for Codex CLI and a local LiteLLM OSS proxy. It can clean prompts, compress logs, estimate tokens, -apply budgets, and route one-shot Codex tasks to `codex-cheap` or -`codex-strong`. When `HF_TOKEN` is available, it can also route Hugging Face and -multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast` LiteLLM -aliases, or launch an optional `cost-routing-hf` Codex profile that points -directly at the Hugging Face router. `codex-routing-policy.yaml` keeps the -default provider rules and fallback order editable without changing Python code. +apply budgets, and route one-shot Codex tasks to `codex-light`, +`codex-default`, `codex-long`, or `codex-deep`. The local LiteLLM config +dispatches those aliases across OpenAI and Gemini while keeping API keys in +environment variables. When `HF_TOKEN` is available, it can also route Hugging +Face and multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast` +LiteLLM aliases, or launch an optional `cost-routing-hf` Codex profile that +points directly at the Hugging Face router. `codex-routing-policy.yaml` keeps +the default provider rules and fallback order editable without changing Python +code. See [`README_Codex_Cost_Routing.md`](README_Codex_Cost_Routing.md) for setup, activation, LiteLLM configuration, and usage instructions. +To enter OpenAI, Gemini, or Hugging Face keys through a local page for one +session, run `Start-CodexKeySessionWeb.ps1` and open +`http://127.0.0.1:8787/`. Keys are kept in memory for the LiteLLM subprocess +and are not written to disk. + ## LLM Review Tools `finance_bias_evaluator.py` is a deterministic first-pass checker for finance diff --git a/scripts/python/README_Codex_Cost_Routing.md b/scripts/python/README_Codex_Cost_Routing.md index 20ec04c..2eb5ed9 100644 --- a/scripts/python/README_Codex_Cost_Routing.md +++ b/scripts/python/README_Codex_Cost_Routing.md @@ -6,25 +6,31 @@ Optional cost routing for Codex CLI on Windows using the official open-source The local Python wrapper cleans prompts, compresses noisy logs, estimates tokens, applies budgets, and selects one of these LiteLLM aliases: -- `codex-cheap` for simple, low-cost tasks -- `codex-strong` for default, medium, and complex tasks +- `codex-light` for simple, low-cost and frequent tasks +- `codex-default` for normal coding work +- `codex-long` for long-context reads, log review, and synthesis +- `codex-deep` for difficult debugging, security, and architecture decisions +- `codex-cheap` and `codex-strong` as backward-compatible aliases - `codex-hf-cheap` for simple Hugging Face / open-model tasks when `HF_TOKEN` is set - `codex-hf-fast` for larger Hugging Face / multi-provider tasks when `HF_TOKEN` is set -The previous `codex-auto` middle tier was removed because it pointed to the same -provider model as `codex-strong`, which made the fallback chain redundant. Add a -third alias again only when it maps to a genuinely different model or provider. +OpenAI and Gemini are both configured through LiteLLM model groups. The normal +default keeps most code-generation traffic on OpenAI while letting Gemini absorb +long-context and lower-risk work. This reduces token saturation without sending +high-stakes changes blindly to the cheapest model. -API keys are never committed or written to a configuration file. +API keys are never committed or written to a configuration file. `OPENAI_API_KEY` +is required for the default profile; `GEMINI_API_KEY` is optional but recommended +to activate the OpenAI/Gemini dispatching path. ## Hugging Face Integration Hugging Face can be used in two optional places. First, Hugging Face Inference Providers can sit behind LiteLLM as another -provider pool. The local config includes two optional aliases: +provider pool. The local config still includes two optional aliases: ```yaml codex-hf-cheap -> huggingface/groq/openai/gpt-oss-120b @@ -97,7 +103,7 @@ open_models_only: false max_cost_usd: 0.0 task_provider_rules: - simple: huggingface + simple: auto medium: auto complex: openai @@ -130,14 +136,30 @@ for this command only. The script: 1. installs the official LiteLLM OSS proxy in `C:\tmp\litellm-oss` when needed; 2. asks for the OpenAI key with masked input when it is missing; -3. creates a random local `LITELLM_API_KEY` in memory; -4. starts the LiteLLM proxy in the background; -5. enables the optional Codex `cost-routing` profile. -6. opens Codex with that profile; -7. stops LiteLLM and restores the previous configuration when Codex closes. +3. asks for the Gemini key with masked input when it is missing; this is optional + but enables the Gemini model groups; +4. creates a random local `LITELLM_API_KEY` in memory; +5. starts the LiteLLM proxy in the background; +6. enables the optional Codex `cost-routing` profile. +7. opens Codex with that profile; +8. stops LiteLLM and restores the previous configuration when Codex closes. There is no key to copy and no second terminal is required. +### Optional local web key session + +If you prefer entering keys in a local page for one work session, start: + +```powershell +.\scripts\python\Start-CodexKeySessionWeb.ps1 +``` + +Then open `http://127.0.0.1:8787/`, paste `OPENAI_API_KEY`, +`GEMINI_API_KEY`, or `HF_TOKEN`, and submit the form. The page starts the +LiteLLM proxy on `http://127.0.0.1:4000/v1` with those keys only in the proxy +process environment. The keys are not written to disk and the web server +suppresses request logging. + To launch the optional Hugging Face-facing profile instead of the local LiteLLM proxy: @@ -174,7 +196,7 @@ Optional budgets and forced routing: ```powershell python .\scripts\python\codex_cost_router.py run ` - --force-model codex-strong ` + --force-model codex-deep ` --provider openai ` --max-input-tokens 8000 ` --max-output-tokens 3000 ` @@ -201,8 +223,11 @@ Prompts and API keys are not logged. - `Manage-CodexCostRouting.ps1`: automatic run, status, and stop workflow. - `codex-cost-routing.cmd`: simple Windows launcher. - `codex_cost_router.py`: prompt optimization and one-shot routing. +- `codex_key_session_web.py`: local-only web form for session keys. +- `Start-CodexKeySessionWeb.ps1`: PowerShell launcher for the local key page. - `codex-routing-policy.yaml`: editable routing policy and fallback order. -- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallback. +- `litellm-cost-routing.yaml`: local LiteLLM OSS OpenAI/Gemini model groups, + context-window fallbacks, cooldowns, and compatibility aliases. ## Notes diff --git a/scripts/python/Start-CodexKeySessionWeb.ps1 b/scripts/python/Start-CodexKeySessionWeb.ps1 new file mode 100644 index 0000000..07f7f15 --- /dev/null +++ b/scripts/python/Start-CodexKeySessionWeb.ps1 @@ -0,0 +1,23 @@ +[CmdletBinding()] +param( + [int]$UiPort = 8787, + [int]$ProxyPort = 4000 +) + +$ErrorActionPreference = 'Stop' +$pythonPath = Join-Path $env:USERPROFILE '.cache\codex-runtimes\codex-primary-runtime\dependencies\python\python.exe' +if (-not (Test-Path -LiteralPath $pythonPath)) { + $python = Get-Command python -ErrorAction SilentlyContinue + if (-not $python) { + throw 'Python 3.10+ est introuvable.' + } + $pythonPath = $python.Source +} + +$scriptPath = Join-Path $PSScriptRoot 'codex_key_session_web.py' +$configPath = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml' +if (Test-Path -LiteralPath (Join-Path $PSScriptRoot 'config.yaml')) { + $configPath = Join-Path $PSScriptRoot 'config.yaml' +} + +& $pythonPath $scriptPath --ui-port $UiPort --proxy-port $ProxyPort --config $configPath diff --git a/scripts/python/codex-routing-policy.yaml b/scripts/python/codex-routing-policy.yaml index a1f4700..a1200f9 100644 --- a/scripts/python/codex-routing-policy.yaml +++ b/scripts/python/codex-routing-policy.yaml @@ -7,7 +7,7 @@ open_models_only: false max_cost_usd: 0.0 task_provider_rules: - simple: huggingface + simple: auto medium: auto complex: openai diff --git a/scripts/python/codex_cost_router.py b/scripts/python/codex_cost_router.py index f84a0f2..a7b6af1 100644 --- a/scripts/python/codex_cost_router.py +++ b/scripts/python/codex_cost_router.py @@ -24,15 +24,29 @@ CONFIG_BACKUP = LOG_DIR / "config.toml.cost_router_backup" BEGIN_MARKER = "# BEGIN CODEX COST ROUTER" END_MARKER = "# END CODEX COST ROUTER" -DEFAULT_MODEL = "codex-strong" +LIGHT_MODEL = "codex-light" +DEFAULT_MODEL = "codex-default" +LONG_MODEL = "codex-long" +DEEP_MODEL = "codex-deep" +LEGACY_CHEAP_MODEL = "codex-cheap" +LEGACY_STRONG_MODEL = "codex-strong" HF_FAST_MODEL = "codex-hf-fast" HF_CHEAP_MODEL = "codex-hf-cheap" HF_DIRECT_MODEL = "openai/gpt-oss-120b:fastest" DEFAULT_MAX_INPUT_TOKENS = 12_000 DEFAULT_MAX_OUTPUT_TOKENS = 2_000 -PROVIDERS = ("auto", "openai", "huggingface") +PROVIDERS = ("auto", "openai", "gemini", "huggingface") CODEX_PROVIDERS = ("litellm", "huggingface") -MODELS = ("codex-cheap", DEFAULT_MODEL, HF_FAST_MODEL, HF_CHEAP_MODEL) +MODELS = ( + LIGHT_MODEL, + DEFAULT_MODEL, + LONG_MODEL, + DEEP_MODEL, + LEGACY_CHEAP_MODEL, + LEGACY_STRONG_MODEL, + HF_FAST_MODEL, + HF_CHEAP_MODEL, +) LITELLM_HOST = "localhost" LITELLM_PORT = 4000 WINDOWS_LITELLM_FALLBACK = Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe") @@ -43,7 +57,7 @@ "open_models_only": False, "max_cost_usd": 0.0, "task_provider_rules": { - "simple": "huggingface", + "simple": "auto", "medium": "auto", "complex": "openai", }, @@ -53,8 +67,12 @@ # Approximate placeholders in USD per million tokens. Adjust these estimates to # match the deployments configured in your local LiteLLM OSS proxy. ESTIMATED_RATES = { - "codex-cheap": {"input": 0.15, "output": 0.60}, + LIGHT_MODEL: {"input": 0.20, "output": 0.80}, DEFAULT_MODEL: {"input": 2.00, "output": 8.00}, + LONG_MODEL: {"input": 0.80, "output": 3.00}, + DEEP_MODEL: {"input": 2.50, "output": 10.00}, + LEGACY_CHEAP_MODEL: {"input": 0.20, "output": 0.80}, + LEGACY_STRONG_MODEL: {"input": 2.00, "output": 8.00}, HF_CHEAP_MODEL: {"input": 0.10, "output": 0.30}, HF_FAST_MODEL: {"input": 0.25, "output": 0.75}, } @@ -104,6 +122,19 @@ "provider benchmark", "benchmark providers", ) +LONG_CONTEXT_TERMS = ( + "gros contexte", + "long contexte", + "long context", + "large context", + "logs", + "fichier volumineux", + "large file", + "synthese", + "synthèse", + "summarize", + "compare documents", +) PROFILE_BLOCK = f"""\ # BEGIN CODEX COST ROUTER @@ -111,7 +142,6 @@ name = "LiteLLM OSS Cost Router" base_url = "http://localhost:4000/v1" env_key = "LITELLM_API_KEY" -wire_api = "responses" [model_providers.huggingface] name = "Hugging Face Inference Providers" @@ -122,7 +152,10 @@ [profiles.cost-routing] model = "{DEFAULT_MODEL}" model_provider = "litellm" -model_reasoning_effort = "low" +model_reasoning_effort = "medium" +model_verbosity = "low" +model_auto_compact_token_limit = 64000 +tool_output_token_limit = 8000 [profiles.cost-routing-hf] model = "{HF_DIRECT_MODEL}" @@ -446,22 +479,35 @@ def route_model( complexity, reason = classify_complexity(prompt) normalized = normalize_for_matching(prompt) wants_hf = any(term in normalized for term in HF_TERMS) + wants_long_context = any(term in normalized for term in LONG_CONTEXT_TERMS) if provider == "huggingface": if hf_available(): model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL return model, f"huggingface provider requested; {reason}" - return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using OpenAI tier" + return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using default OpenAI/Gemini tier" if provider == "openai": - model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL + model = LIGHT_MODEL if complexity == "simple" else DEEP_MODEL return model, f"openai provider requested; {reason}" + if provider == "gemini": + model = LIGHT_MODEL if complexity == "simple" and not wants_long_context else LONG_MODEL + return model, f"gemini provider requested; {reason}" + if wants_hf and hf_available(): model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL return model, f"huggingface-related task; {reason}" - model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL + if wants_long_context: + return LONG_MODEL, f"long-context task; {reason}" + + if complexity == "simple": + model = LIGHT_MODEL + elif complexity == "complex": + model = DEEP_MODEL + else: + model = DEFAULT_MODEL return model, reason diff --git a/scripts/python/codex_key_session_web.py b/scripts/python/codex_key_session_web.py new file mode 100644 index 0000000..f957582 --- /dev/null +++ b/scripts/python/codex_key_session_web.py @@ -0,0 +1,309 @@ +"""Local-only web form for starting a LiteLLM Codex session with in-memory keys.""" + +from __future__ import annotations + +import argparse +import html +import os +import secrets +import subprocess +import sys +import time +import urllib.parse +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import ClassVar + + +DEFAULT_HOST = "127.0.0.1" +DEFAULT_UI_PORT = 8787 +DEFAULT_PROXY_PORT = 4000 + + +PAGE = """ + + + + + Codex LiteLLM Session Keys + + + +
+

Codex LiteLLM session keys

+

Keys are kept only in this local process environment and passed to the LiteLLM subprocess. They are not written to disk.

+ {status} +
+ + + + + + + +
+
+ +
+

Proxy URL: http://127.0.0.1:{proxy_port}/v1

+
+ + +""" + + +def find_litellm(root: Path) -> Path: + """Find a local LiteLLM executable.""" + candidates = [ + root / "venv" / "Scripts" / "litellm.exe", + Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe"), + Path.home() / ".codex" / "litellm-proxy" / "venv" / "Scripts" / "litellm.exe", + ] + for candidate in candidates: + if candidate.exists(): + return candidate + raise FileNotFoundError("LiteLLM executable not found. Install LiteLLM first.") + + +def wait_for_port(host: str, port: int, timeout: float = 20.0) -> bool: + """Wait for a TCP port to accept connections.""" + import socket + + deadline = time.time() + timeout + while time.time() < deadline: + try: + with socket.create_connection((host, port), timeout=0.5): + return True + except OSError: + time.sleep(0.25) + return False + + +class SessionState: + """Mutable server state.""" + + process: subprocess.Popen[str] | None = None + message = "No session proxy started from this page yet." + + +class KeySessionHandler(BaseHTTPRequestHandler): + """Serve the local key form and manage the LiteLLM subprocess.""" + + state: ClassVar[SessionState] + config_path: ClassVar[Path] + litellm_path: ClassVar[Path] + proxy_host: ClassVar[str] + proxy_port: ClassVar[int] + + def log_message(self, format: str, *args: object) -> None: + """Suppress default request logs so keys never appear in terminal logs.""" + return + + def do_GET(self) -> None: # noqa: N802 + if self.path not in {"/", "/status"}: + self.send_error(404) + return + self._send_page() + + def do_POST(self) -> None: # noqa: N802 + if self.path == "/start": + self._start_proxy() + return + if self.path == "/stop": + self._stop_proxy("Session proxy stopped.") + self._send_page() + return + self.send_error(404) + + def _send_page(self) -> None: + safe_message = html.escape(self.state.message) + status = f'
{safe_message}
' + body = PAGE.format(status=status, proxy_port=self.proxy_port).encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Cache-Control", "no-store") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def _read_form(self) -> dict[str, str]: + length = int(self.headers.get("Content-Length", "0")) + raw = self.rfile.read(length).decode("utf-8", errors="replace") + parsed = urllib.parse.parse_qs(raw, keep_blank_values=True) + return {key: values[-1].strip() for key, values in parsed.items()} + + def _start_proxy(self) -> None: + form = self._read_form() + openai_key = form.get("OPENAI_API_KEY", "") + gemini_key = form.get("GEMINI_API_KEY", "") + hf_token = form.get("HF_TOKEN", "") + if not any((openai_key, gemini_key, hf_token)): + self.state.message = "Provide at least one provider key." + self._send_page() + return + + self._stop_proxy("Replacing previous session proxy.") + env = os.environ.copy() + env["PYTHONUTF8"] = "1" + env["PYTHONIOENCODING"] = "utf-8" + env["LITELLM_API_KEY"] = "sk-local-" + secrets.token_hex(16) + if openai_key: + env["OPENAI_API_KEY"] = openai_key + if gemini_key: + env["GEMINI_API_KEY"] = gemini_key + if hf_token: + env["HF_TOKEN"] = hf_token + + try: + self.state.process = subprocess.Popen( + [ + str(self.litellm_path), + "--config", + str(self.config_path), + "--host", + self.proxy_host, + "--port", + str(self.proxy_port), + ], + cwd=str(self.config_path.parent), + env=env, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + text=True, + ) + except OSError as exc: + self.state.message = f"Unable to start LiteLLM: {exc}" + self._send_page() + return + + if wait_for_port(self.proxy_host, self.proxy_port): + providers = [] + if openai_key: + providers.append("OpenAI") + if gemini_key: + providers.append("Gemini") + if hf_token: + providers.append("Hugging Face") + self.state.message = "Session proxy started with: " + ", ".join(providers) + else: + self.state.message = "LiteLLM process started, but the proxy port did not become ready yet." + self._send_page() + + def _stop_proxy(self, message: str) -> None: + process = self.state.process + if process and process.poll() is None: + process.terminate() + try: + process.wait(timeout=8) + except subprocess.TimeoutExpired: + process.kill() + process.wait(timeout=8) + self.state.process = None + self.state.message = message + + +def run_server(args: argparse.Namespace) -> int: + """Run the local key session web server.""" + config_path = Path(args.config).resolve() + root = config_path.parent + handler = KeySessionHandler + handler.state = SessionState() + handler.config_path = config_path + handler.litellm_path = find_litellm(root) + handler.proxy_host = args.proxy_host + handler.proxy_port = args.proxy_port + + server = ThreadingHTTPServer((args.host, args.ui_port), handler) + print(f"Open http://{args.host}:{args.ui_port}/ to enter session keys.") + print(f"LiteLLM proxy will run at http://{args.proxy_host}:{args.proxy_port}/v1.") + try: + server.serve_forever() + except KeyboardInterrupt: + handler.state.message = "Stopping web key session." + if handler.state.process and handler.state.process.poll() is None: + handler.state.process.terminate() + return 0 + finally: + server.server_close() + return 0 + + +def parse_args(argv: list[str]) -> argparse.Namespace: + """Parse CLI arguments.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--host", default=DEFAULT_HOST) + parser.add_argument("--ui-port", type=int, default=DEFAULT_UI_PORT) + parser.add_argument("--proxy-host", default=DEFAULT_HOST) + parser.add_argument("--proxy-port", type=int, default=DEFAULT_PROXY_PORT) + parser.add_argument( + "--config", + default=str(Path(__file__).with_name("litellm-cost-routing.yaml")), + help="LiteLLM YAML config path.", + ) + return parser.parse_args(argv) + + +if __name__ == "__main__": + raise SystemExit(run_server(parse_args(sys.argv[1:]))) diff --git a/scripts/python/litellm-cost-routing.yaml b/scripts/python/litellm-cost-routing.yaml index 914ff7b..d7027c4 100644 --- a/scripts/python/litellm-cost-routing.yaml +++ b/scripts/python/litellm-cost-routing.yaml @@ -1,21 +1,92 @@ -# LiteLLM OSS self-hosted proxy example. -# Two explicit tiers keep the local setup simple: -# - codex-cheap for low-cost/simple work -# - codex-strong for default, medium, and complex work -# Optional Hugging Face tiers use HF_TOKEN and let Hugging Face route across -# inference providers behind a single token. +# LiteLLM OSS self-hosted proxy example for Codex. +# Task-oriented aliases let Codex route by workload instead of hard-coding a +# single provider: +# - codex-light: cheap/frequent work, Gemini Flash biased +# - codex-default: normal coding work, OpenAI biased with Gemini relief +# - codex-long: long-context reads and synthesis, Gemini Pro biased +# - codex-deep: high-stakes debugging/security/architecture, OpenAI biased # API keys are read from environment variables and must never be committed. model_list: + # Backward-compatible aliases used by older wrapper calls. - model_name: codex-cheap litellm_params: - model: openai/gpt-5-mini + model: gemini/gemini-3.5-flash + api_key: os.environ/GEMINI_API_KEY + weight: 6 + + - model_name: codex-cheap + litellm_params: + model: openai/gpt-5.4-mini + api_key: os.environ/OPENAI_API_KEY + weight: 4 + + - model_name: codex-strong + litellm_params: + model: openai/gpt-5.5 api_key: os.environ/OPENAI_API_KEY + weight: 8 - model_name: codex-strong litellm_params: - model: openai/gpt-5 + model: gemini/gemini-3.5-pro + api_key: os.environ/GEMINI_API_KEY + weight: 2 + + # Preferred Codex-facing aliases. + - model_name: codex-light + litellm_params: + model: gemini/gemini-3.5-flash + api_key: os.environ/GEMINI_API_KEY + weight: 7 + + - model_name: codex-light + litellm_params: + model: openai/gpt-5.4-mini + api_key: os.environ/OPENAI_API_KEY + weight: 3 + + - model_name: codex-default + litellm_params: + model: openai/gpt-5.5 + api_key: os.environ/OPENAI_API_KEY + weight: 8 + + - model_name: codex-default + litellm_params: + model: gemini/gemini-3.5-pro + api_key: os.environ/GEMINI_API_KEY + weight: 2 + + - model_name: codex-long + litellm_params: + model: gemini/gemini-3.5-pro + api_key: os.environ/GEMINI_API_KEY + weight: 7 + + - model_name: codex-long + litellm_params: + model: gemini/gemini-2.5-pro + api_key: os.environ/GEMINI_API_KEY + weight: 2 + + - model_name: codex-long + litellm_params: + model: openai/gpt-5.5 api_key: os.environ/OPENAI_API_KEY + weight: 1 + + - model_name: codex-deep + litellm_params: + model: openai/gpt-5.5 + api_key: os.environ/OPENAI_API_KEY + weight: 10 + + - model_name: codex-deep + litellm_params: + model: gemini/gemini-3.5-pro + api_key: os.environ/GEMINI_API_KEY + weight: 1 - model_name: codex-hf-cheap litellm_params: @@ -28,17 +99,61 @@ model_list: api_key: os.environ/HF_TOKEN router_settings: + routing_strategy: simple-shuffle + num_retries: 2 + timeout: 180 + cooldown_time: 90 + enable_pre_call_checks: true + model_group_alias: + gpt-5.5: codex-default + gpt-5.4-mini: codex-light + gemini-3.5-pro: codex-long + gemini-3.5-flash: codex-light fallbacks: + - codex-light: + - codex-default + - codex-default: + - codex-long + - codex-light + - codex-long: + - codex-default + - codex-deep: + - codex-default + - codex-long - codex-cheap: - codex-strong + - codex-default + - codex-strong: + - codex-default + - codex-long - codex-hf-cheap: + - codex-light - codex-cheap - - codex-strong - codex-hf-fast: - - codex-strong + - codex-default + - codex-deep + context_window_fallbacks: + - codex-light: + - codex-long + - codex-default: + - codex-long + - codex-deep: + - codex-long + allowed_fails_policy: + AuthenticationErrorAllowedFails: 0 + TimeoutErrorAllowedFails: 2 + RateLimitErrorAllowedFails: 4 litellm_settings: + drop_params: true set_verbose: false + request_timeout: 180 + num_retries: 2 general_settings: master_key: os.environ/LITELLM_API_KEY + disable_spend_logs: true + background_health_checks: true + health_check_interval: 60 + enable_health_check_routing: true + health_check_ignore_transient_errors: true diff --git a/scripts/python/tests/test_codex_cost_router.py b/scripts/python/tests/test_codex_cost_router.py index f838cb7..cf270bb 100644 --- a/scripts/python/tests/test_codex_cost_router.py +++ b/scripts/python/tests/test_codex_cost_router.py @@ -1,4 +1,4 @@ -"""Tests for the optional Codex cost-routing wrapper.""" +"""Tests for the optional Codex cost-routing wrapper.""" import importlib.util import tempfile @@ -25,13 +25,23 @@ def test_compress_logs_removes_low_value_debug_lines(self) -> None: self.assertEqual(ROUTER.compress_logs(text), "ERROR request failed") def test_route_model_uses_expected_aliases(self) -> None: - self.assertEqual(ROUTER.route_model("Corrige une typo dans le README")[0], "codex-cheap") - self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-strong") - self.assertEqual(ROUTER.route_model("Audit sécurité production Supabase RLS")[0], "codex-strong") + self.assertEqual(ROUTER.route_model("Corrige une typo dans le README")[0], "codex-light") + self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-default") + self.assertEqual(ROUTER.route_model("Audit sécurité production Supabase RLS")[0], "codex-deep") def test_route_model_matches_accented_french_keywords(self) -> None: - self.assertEqual(ROUTER.route_model("Prépare un résumé du README")[0], "codex-cheap") - self.assertEqual(ROUTER.route_model("Question de fiscalité pour Odoo")[0], "codex-strong") + self.assertEqual(ROUTER.route_model("Prépare un résumé du README")[0], "codex-light") + self.assertEqual(ROUTER.route_model("Question de fiscalité pour Odoo")[0], "codex-deep") + + def test_route_model_sends_long_context_to_gemini_biased_alias(self) -> None: + self.assertEqual( + ROUTER.route_model("Analyse ces logs et fais une synthese long context")[0], + "codex-long", + ) + self.assertEqual( + ROUTER.route_model("Summarize this large file", provider="gemini")[0], + "codex-long", + ) def test_route_model_can_prefer_hugging_face_when_token_exists(self) -> None: with patch.dict(ROUTER.os.environ, {"HF_TOKEN": "hf_test"}): @@ -47,7 +57,7 @@ def test_route_model_can_prefer_hugging_face_when_token_exists(self) -> None: def test_route_model_falls_back_when_hugging_face_token_is_missing(self) -> None: with patch.dict(ROUTER.os.environ, {}, clear=True): model, reason = ROUTER.route_model("Use Hugging Face providers", provider="huggingface") - self.assertEqual(model, "codex-strong") + self.assertEqual(model, "codex-default") self.assertIn("HF_TOKEN is missing", reason) def test_codex_provider_helpers_select_expected_profiles(self) -> None: @@ -158,3 +168,4 @@ def test_enable_disable_restores_original_config_bytes(self) -> None: if __name__ == "__main__": unittest.main() + diff --git a/scripts/python/tests/test_codex_key_session_web.py b/scripts/python/tests/test_codex_key_session_web.py new file mode 100644 index 0000000..afce691 --- /dev/null +++ b/scripts/python/tests/test_codex_key_session_web.py @@ -0,0 +1,45 @@ +"""Tests for the optional local web key session launcher.""" + +import importlib.util +import unittest +from pathlib import Path +from unittest.mock import MagicMock + + +MODULE_PATH = Path(__file__).resolve().parents[1] / "codex_key_session_web.py" +SPEC = importlib.util.spec_from_file_location("codex_key_session_web", MODULE_PATH) +if SPEC is None or SPEC.loader is None: + raise RuntimeError("Unable to load codex_key_session_web.py") +WEB = importlib.util.module_from_spec(SPEC) +SPEC.loader.exec_module(WEB) + + +class KeySessionWebTests(unittest.TestCase): + def test_parse_args_defaults_to_localhost(self) -> None: + args = WEB.parse_args([]) + self.assertEqual(args.host, "127.0.0.1") + self.assertEqual(args.ui_port, 8787) + self.assertEqual(args.proxy_port, 4000) + + def test_log_message_is_suppressed(self) -> None: + handler = object.__new__(WEB.KeySessionHandler) + self.assertIsNone(handler.log_message("secret %s", "sk-test")) + + def test_stop_proxy_terminates_running_process(self) -> None: + process = MagicMock() + process.poll.return_value = None + state = WEB.SessionState() + state.process = process + + handler = object.__new__(WEB.KeySessionHandler) + handler.state = state + handler._stop_proxy("done") + + process.terminate.assert_called_once() + process.wait.assert_called_once_with(timeout=8) + self.assertIsNone(state.process) + self.assertEqual(state.message, "done") + + +if __name__ == "__main__": + unittest.main()