Codex LiteLLM session keys
+Keys are kept only in this local process environment and passed to the LiteLLM subprocess. They are not written to disk.
+ {status} + + +Proxy URL: http://127.0.0.1:{proxy_port}/v1
diff --git a/scripts/python/Install-CodexLocalLiteLLMAssets.ps1 b/scripts/python/Install-CodexLocalLiteLLMAssets.ps1 new file mode 100644 index 0000000..bc87468 --- /dev/null +++ b/scripts/python/Install-CodexLocalLiteLLMAssets.ps1 @@ -0,0 +1,24 @@ +[CmdletBinding()] +param() + +$ErrorActionPreference = 'Stop' +$target = Join-Path $env:USERPROFILE '.codex\litellm-proxy' +New-Item -ItemType Directory -Force -Path $target | Out-Null + +$files = @( + 'litellm-cost-routing.yaml', + 'codex_key_session_web.py', + 'Start-CodexKeySessionWeb.ps1' +) + +foreach ($file in $files) { + Copy-Item -LiteralPath (Join-Path $PSScriptRoot $file) -Destination (Join-Path $target $file) -Force +} + +$configSource = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml' +$configTarget = Join-Path $target 'config.yaml' +$text = Get-Content -LiteralPath $configSource -Raw +$text = $text -replace '(?m)^\s*master_key:\s*os\.environ/LITELLM_API_KEY\s*\r?\n','' +Set-Content -LiteralPath $configTarget -Value $text -Encoding UTF8 + +Write-Output "Installed local LiteLLM assets in $target" diff --git a/scripts/python/Manage-CodexCostRouting.ps1 b/scripts/python/Manage-CodexCostRouting.ps1 index b5aefb1..a892878 100644 --- a/scripts/python/Manage-CodexCostRouting.ps1 +++ b/scripts/python/Manage-CodexCostRouting.ps1 @@ -64,6 +64,7 @@ function Get-ProxyProcess { function Remove-SessionSecrets { Remove-Item Env:OPENAI_API_KEY -ErrorAction SilentlyContinue + Remove-Item Env:GEMINI_API_KEY -ErrorAction SilentlyContinue Remove-Item Env:HF_TOKEN -ErrorAction SilentlyContinue Remove-Item Env:LITELLM_API_KEY -ErrorAction SilentlyContinue Remove-Item Env:PYTHONUTF8 -ErrorAction SilentlyContinue @@ -104,6 +105,19 @@ function Set-SessionSecrets { throw 'OPENAI_API_KEY est obligatoire.' } + if (-not $env:GEMINI_API_KEY) { + $secureKey = Read-Host 'GEMINI_API_KEY (optionnel, entree pour activer le dispatching Gemini)' -AsSecureString + if ($secureKey.Length -gt 0) { + $pointer = [Runtime.InteropServices.Marshal]::SecureStringToBSTR($secureKey) + try { + $env:GEMINI_API_KEY = [Runtime.InteropServices.Marshal]::PtrToStringBSTR($pointer) + } + finally { + [Runtime.InteropServices.Marshal]::ZeroFreeBSTR($pointer) + } + } + } + if (-not $env:LITELLM_API_KEY) { $env:LITELLM_API_KEY = 'sk-local-' + [Guid]::NewGuid().ToString('N') } diff --git a/scripts/python/README.md b/scripts/python/README.md index aa34bfe..02ebe3e 100644 --- a/scripts/python/README.md +++ b/scripts/python/README.md @@ -52,16 +52,24 @@ Connect the inspector to `http://localhost:8000/mcp`. `codex_cost_router.py` is an optional Windows-friendly wrapper for Codex CLI and a local LiteLLM OSS proxy. It can clean prompts, compress logs, estimate tokens, -apply budgets, and route one-shot Codex tasks to `codex-cheap` or -`codex-strong`. When `HF_TOKEN` is available, it can also route Hugging Face and -multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast` LiteLLM -aliases, or launch an optional `cost-routing-hf` Codex profile that points -directly at the Hugging Face router. `codex-routing-policy.yaml` keeps the -default provider rules and fallback order editable without changing Python code. +apply budgets, and route one-shot Codex tasks to `codex-light`, +`codex-default`, `codex-long`, or `codex-deep`. The local LiteLLM config +dispatches those aliases across OpenAI and Gemini while keeping API keys in +environment variables. When `HF_TOKEN` is available, it can also route Hugging +Face and multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast` +LiteLLM aliases, or launch an optional `cost-routing-hf` Codex profile that +points directly at the Hugging Face router. `codex-routing-policy.yaml` keeps +the default provider rules and fallback order editable without changing Python +code. See [`README_Codex_Cost_Routing.md`](README_Codex_Cost_Routing.md) for setup, activation, LiteLLM configuration, and usage instructions. +To enter OpenAI, Gemini, or Hugging Face keys through a local page for one +session, run `Start-CodexKeySessionWeb.ps1` and open +`http://127.0.0.1:8787/`. Keys are kept in memory for the LiteLLM subprocess +and are not written to disk. + ## LLM Review Tools `finance_bias_evaluator.py` is a deterministic first-pass checker for finance diff --git a/scripts/python/README_Codex_Cost_Routing.md b/scripts/python/README_Codex_Cost_Routing.md index 20ec04c..2eb5ed9 100644 --- a/scripts/python/README_Codex_Cost_Routing.md +++ b/scripts/python/README_Codex_Cost_Routing.md @@ -6,25 +6,31 @@ Optional cost routing for Codex CLI on Windows using the official open-source The local Python wrapper cleans prompts, compresses noisy logs, estimates tokens, applies budgets, and selects one of these LiteLLM aliases: -- `codex-cheap` for simple, low-cost tasks -- `codex-strong` for default, medium, and complex tasks +- `codex-light` for simple, low-cost and frequent tasks +- `codex-default` for normal coding work +- `codex-long` for long-context reads, log review, and synthesis +- `codex-deep` for difficult debugging, security, and architecture decisions +- `codex-cheap` and `codex-strong` as backward-compatible aliases - `codex-hf-cheap` for simple Hugging Face / open-model tasks when `HF_TOKEN` is set - `codex-hf-fast` for larger Hugging Face / multi-provider tasks when `HF_TOKEN` is set -The previous `codex-auto` middle tier was removed because it pointed to the same -provider model as `codex-strong`, which made the fallback chain redundant. Add a -third alias again only when it maps to a genuinely different model or provider. +OpenAI and Gemini are both configured through LiteLLM model groups. The normal +default keeps most code-generation traffic on OpenAI while letting Gemini absorb +long-context and lower-risk work. This reduces token saturation without sending +high-stakes changes blindly to the cheapest model. -API keys are never committed or written to a configuration file. +API keys are never committed or written to a configuration file. `OPENAI_API_KEY` +is required for the default profile; `GEMINI_API_KEY` is optional but recommended +to activate the OpenAI/Gemini dispatching path. ## Hugging Face Integration Hugging Face can be used in two optional places. First, Hugging Face Inference Providers can sit behind LiteLLM as another -provider pool. The local config includes two optional aliases: +provider pool. The local config still includes two optional aliases: ```yaml codex-hf-cheap -> huggingface/groq/openai/gpt-oss-120b @@ -97,7 +103,7 @@ open_models_only: false max_cost_usd: 0.0 task_provider_rules: - simple: huggingface + simple: auto medium: auto complex: openai @@ -130,14 +136,30 @@ for this command only. The script: 1. installs the official LiteLLM OSS proxy in `C:\tmp\litellm-oss` when needed; 2. asks for the OpenAI key with masked input when it is missing; -3. creates a random local `LITELLM_API_KEY` in memory; -4. starts the LiteLLM proxy in the background; -5. enables the optional Codex `cost-routing` profile. -6. opens Codex with that profile; -7. stops LiteLLM and restores the previous configuration when Codex closes. +3. asks for the Gemini key with masked input when it is missing; this is optional + but enables the Gemini model groups; +4. creates a random local `LITELLM_API_KEY` in memory; +5. starts the LiteLLM proxy in the background; +6. enables the optional Codex `cost-routing` profile. +7. opens Codex with that profile; +8. stops LiteLLM and restores the previous configuration when Codex closes. There is no key to copy and no second terminal is required. +### Optional local web key session + +If you prefer entering keys in a local page for one work session, start: + +```powershell +.\scripts\python\Start-CodexKeySessionWeb.ps1 +``` + +Then open `http://127.0.0.1:8787/`, paste `OPENAI_API_KEY`, +`GEMINI_API_KEY`, or `HF_TOKEN`, and submit the form. The page starts the +LiteLLM proxy on `http://127.0.0.1:4000/v1` with those keys only in the proxy +process environment. The keys are not written to disk and the web server +suppresses request logging. + To launch the optional Hugging Face-facing profile instead of the local LiteLLM proxy: @@ -174,7 +196,7 @@ Optional budgets and forced routing: ```powershell python .\scripts\python\codex_cost_router.py run ` - --force-model codex-strong ` + --force-model codex-deep ` --provider openai ` --max-input-tokens 8000 ` --max-output-tokens 3000 ` @@ -201,8 +223,11 @@ Prompts and API keys are not logged. - `Manage-CodexCostRouting.ps1`: automatic run, status, and stop workflow. - `codex-cost-routing.cmd`: simple Windows launcher. - `codex_cost_router.py`: prompt optimization and one-shot routing. +- `codex_key_session_web.py`: local-only web form for session keys. +- `Start-CodexKeySessionWeb.ps1`: PowerShell launcher for the local key page. - `codex-routing-policy.yaml`: editable routing policy and fallback order. -- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallback. +- `litellm-cost-routing.yaml`: local LiteLLM OSS OpenAI/Gemini model groups, + context-window fallbacks, cooldowns, and compatibility aliases. ## Notes diff --git a/scripts/python/Start-CodexKeySessionWeb.ps1 b/scripts/python/Start-CodexKeySessionWeb.ps1 new file mode 100644 index 0000000..07f7f15 --- /dev/null +++ b/scripts/python/Start-CodexKeySessionWeb.ps1 @@ -0,0 +1,23 @@ +[CmdletBinding()] +param( + [int]$UiPort = 8787, + [int]$ProxyPort = 4000 +) + +$ErrorActionPreference = 'Stop' +$pythonPath = Join-Path $env:USERPROFILE '.cache\codex-runtimes\codex-primary-runtime\dependencies\python\python.exe' +if (-not (Test-Path -LiteralPath $pythonPath)) { + $python = Get-Command python -ErrorAction SilentlyContinue + if (-not $python) { + throw 'Python 3.10+ est introuvable.' + } + $pythonPath = $python.Source +} + +$scriptPath = Join-Path $PSScriptRoot 'codex_key_session_web.py' +$configPath = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml' +if (Test-Path -LiteralPath (Join-Path $PSScriptRoot 'config.yaml')) { + $configPath = Join-Path $PSScriptRoot 'config.yaml' +} + +& $pythonPath $scriptPath --ui-port $UiPort --proxy-port $ProxyPort --config $configPath diff --git a/scripts/python/codex-routing-policy.yaml b/scripts/python/codex-routing-policy.yaml index a1f4700..a1200f9 100644 --- a/scripts/python/codex-routing-policy.yaml +++ b/scripts/python/codex-routing-policy.yaml @@ -7,7 +7,7 @@ open_models_only: false max_cost_usd: 0.0 task_provider_rules: - simple: huggingface + simple: auto medium: auto complex: openai diff --git a/scripts/python/codex_cost_router.py b/scripts/python/codex_cost_router.py index f84a0f2..a7b6af1 100644 --- a/scripts/python/codex_cost_router.py +++ b/scripts/python/codex_cost_router.py @@ -24,15 +24,29 @@ CONFIG_BACKUP = LOG_DIR / "config.toml.cost_router_backup" BEGIN_MARKER = "# BEGIN CODEX COST ROUTER" END_MARKER = "# END CODEX COST ROUTER" -DEFAULT_MODEL = "codex-strong" +LIGHT_MODEL = "codex-light" +DEFAULT_MODEL = "codex-default" +LONG_MODEL = "codex-long" +DEEP_MODEL = "codex-deep" +LEGACY_CHEAP_MODEL = "codex-cheap" +LEGACY_STRONG_MODEL = "codex-strong" HF_FAST_MODEL = "codex-hf-fast" HF_CHEAP_MODEL = "codex-hf-cheap" HF_DIRECT_MODEL = "openai/gpt-oss-120b:fastest" DEFAULT_MAX_INPUT_TOKENS = 12_000 DEFAULT_MAX_OUTPUT_TOKENS = 2_000 -PROVIDERS = ("auto", "openai", "huggingface") +PROVIDERS = ("auto", "openai", "gemini", "huggingface") CODEX_PROVIDERS = ("litellm", "huggingface") -MODELS = ("codex-cheap", DEFAULT_MODEL, HF_FAST_MODEL, HF_CHEAP_MODEL) +MODELS = ( + LIGHT_MODEL, + DEFAULT_MODEL, + LONG_MODEL, + DEEP_MODEL, + LEGACY_CHEAP_MODEL, + LEGACY_STRONG_MODEL, + HF_FAST_MODEL, + HF_CHEAP_MODEL, +) LITELLM_HOST = "localhost" LITELLM_PORT = 4000 WINDOWS_LITELLM_FALLBACK = Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe") @@ -43,7 +57,7 @@ "open_models_only": False, "max_cost_usd": 0.0, "task_provider_rules": { - "simple": "huggingface", + "simple": "auto", "medium": "auto", "complex": "openai", }, @@ -53,8 +67,12 @@ # Approximate placeholders in USD per million tokens. Adjust these estimates to # match the deployments configured in your local LiteLLM OSS proxy. ESTIMATED_RATES = { - "codex-cheap": {"input": 0.15, "output": 0.60}, + LIGHT_MODEL: {"input": 0.20, "output": 0.80}, DEFAULT_MODEL: {"input": 2.00, "output": 8.00}, + LONG_MODEL: {"input": 0.80, "output": 3.00}, + DEEP_MODEL: {"input": 2.50, "output": 10.00}, + LEGACY_CHEAP_MODEL: {"input": 0.20, "output": 0.80}, + LEGACY_STRONG_MODEL: {"input": 2.00, "output": 8.00}, HF_CHEAP_MODEL: {"input": 0.10, "output": 0.30}, HF_FAST_MODEL: {"input": 0.25, "output": 0.75}, } @@ -104,6 +122,19 @@ "provider benchmark", "benchmark providers", ) +LONG_CONTEXT_TERMS = ( + "gros contexte", + "long contexte", + "long context", + "large context", + "logs", + "fichier volumineux", + "large file", + "synthese", + "synthèse", + "summarize", + "compare documents", +) PROFILE_BLOCK = f"""\ # BEGIN CODEX COST ROUTER @@ -111,7 +142,6 @@ name = "LiteLLM OSS Cost Router" base_url = "http://localhost:4000/v1" env_key = "LITELLM_API_KEY" -wire_api = "responses" [model_providers.huggingface] name = "Hugging Face Inference Providers" @@ -122,7 +152,10 @@ [profiles.cost-routing] model = "{DEFAULT_MODEL}" model_provider = "litellm" -model_reasoning_effort = "low" +model_reasoning_effort = "medium" +model_verbosity = "low" +model_auto_compact_token_limit = 64000 +tool_output_token_limit = 8000 [profiles.cost-routing-hf] model = "{HF_DIRECT_MODEL}" @@ -446,22 +479,35 @@ def route_model( complexity, reason = classify_complexity(prompt) normalized = normalize_for_matching(prompt) wants_hf = any(term in normalized for term in HF_TERMS) + wants_long_context = any(term in normalized for term in LONG_CONTEXT_TERMS) if provider == "huggingface": if hf_available(): model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL return model, f"huggingface provider requested; {reason}" - return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using OpenAI tier" + return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using default OpenAI/Gemini tier" if provider == "openai": - model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL + model = LIGHT_MODEL if complexity == "simple" else DEEP_MODEL return model, f"openai provider requested; {reason}" + if provider == "gemini": + model = LIGHT_MODEL if complexity == "simple" and not wants_long_context else LONG_MODEL + return model, f"gemini provider requested; {reason}" + if wants_hf and hf_available(): model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL return model, f"huggingface-related task; {reason}" - model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL + if wants_long_context: + return LONG_MODEL, f"long-context task; {reason}" + + if complexity == "simple": + model = LIGHT_MODEL + elif complexity == "complex": + model = DEEP_MODEL + else: + model = DEFAULT_MODEL return model, reason diff --git a/scripts/python/codex_key_session_web.py b/scripts/python/codex_key_session_web.py new file mode 100644 index 0000000..f957582 --- /dev/null +++ b/scripts/python/codex_key_session_web.py @@ -0,0 +1,309 @@ +"""Local-only web form for starting a LiteLLM Codex session with in-memory keys.""" + +from __future__ import annotations + +import argparse +import html +import os +import secrets +import subprocess +import sys +import time +import urllib.parse +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from pathlib import Path +from typing import ClassVar + + +DEFAULT_HOST = "127.0.0.1" +DEFAULT_UI_PORT = 8787 +DEFAULT_PROXY_PORT = 4000 + + +PAGE = """ + +
+ + +Keys are kept only in this local process environment and passed to the LiteLLM subprocess. They are not written to disk.
+ {status} + + +Proxy URL: http://127.0.0.1:{proxy_port}/v1