Tibo2403 · Tibo2403 · Jun 27, 2026 · Jun 27, 2026 · Jun 27, 2026
@@ -0,0 +1,24 @@
+[CmdletBinding()]
+param()
+
+$ErrorActionPreference = 'Stop'
+$target = Join-Path $env:USERPROFILE '.codex\litellm-proxy'
+New-Item -ItemType Directory -Force -Path $target | Out-Null
+
+$files = @(
+    'litellm-cost-routing.yaml',
+    'codex_key_session_web.py',
+    'Start-CodexKeySessionWeb.ps1'
+)
+
+foreach ($file in $files) {
+    Copy-Item -LiteralPath (Join-Path $PSScriptRoot $file) -Destination (Join-Path $target $file) -Force
+}
+
+$configSource = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml'
+$configTarget = Join-Path $target 'config.yaml'
+$text = Get-Content -LiteralPath $configSource -Raw
+$text = $text -replace '(?m)^\s*master_key:\s*os\.environ/LITELLM_API_KEY\s*\r?\n',''
+Set-Content -LiteralPath $configTarget -Value $text -Encoding UTF8
+
+Write-Output "Installed local LiteLLM assets in $target"
@@ -64,6 +64,7 @@ function Get-ProxyProcess {
 
 function Remove-SessionSecrets {
     Remove-Item Env:OPENAI_API_KEY -ErrorAction SilentlyContinue
+    Remove-Item Env:GEMINI_API_KEY -ErrorAction SilentlyContinue
     Remove-Item Env:HF_TOKEN -ErrorAction SilentlyContinue
     Remove-Item Env:LITELLM_API_KEY -ErrorAction SilentlyContinue
     Remove-Item Env:PYTHONUTF8 -ErrorAction SilentlyContinue
@@ -104,6 +105,19 @@ function Set-SessionSecrets {
         throw 'OPENAI_API_KEY est obligatoire.'
     }
 
+    if (-not $env:GEMINI_API_KEY) {
+        $secureKey = Read-Host 'GEMINI_API_KEY (optionnel, entree pour activer le dispatching Gemini)' -AsSecureString
+        if ($secureKey.Length -gt 0) {
+            $pointer = [Runtime.InteropServices.Marshal]::SecureStringToBSTR($secureKey)
+            try {
+                $env:GEMINI_API_KEY = [Runtime.InteropServices.Marshal]::PtrToStringBSTR($pointer)
+            }
+            finally {
+                [Runtime.InteropServices.Marshal]::ZeroFreeBSTR($pointer)
+            }
+        }
+    }
+
     if (-not $env:LITELLM_API_KEY) {
         $env:LITELLM_API_KEY = 'sk-local-' + [Guid]::NewGuid().ToString('N')
     }

@@ -52,16 +52,24 @@ Connect the inspector to `http://localhost:8000/mcp`.
 
 `codex_cost_router.py` is an optional Windows-friendly wrapper for Codex CLI and
 a local LiteLLM OSS proxy. It can clean prompts, compress logs, estimate tokens,
-apply budgets, and route one-shot Codex tasks to `codex-cheap` or
-`codex-strong`. When `HF_TOKEN` is available, it can also route Hugging Face and
-multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast` LiteLLM
-aliases, or launch an optional `cost-routing-hf` Codex profile that points
-directly at the Hugging Face router. `codex-routing-policy.yaml` keeps the
-default provider rules and fallback order editable without changing Python code.
+apply budgets, and route one-shot Codex tasks to `codex-light`,
+`codex-default`, `codex-long`, or `codex-deep`. The local LiteLLM config
+dispatches those aliases across OpenAI and Gemini while keeping API keys in
+environment variables. When `HF_TOKEN` is available, it can also route Hugging
+Face and multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast`
+LiteLLM aliases, or launch an optional `cost-routing-hf` Codex profile that
+points directly at the Hugging Face router. `codex-routing-policy.yaml` keeps
+the default provider rules and fallback order editable without changing Python
+code.
 
 See [`README_Codex_Cost_Routing.md`](README_Codex_Cost_Routing.md) for setup,
 activation, LiteLLM configuration, and usage instructions.
 
+To enter OpenAI, Gemini, or Hugging Face keys through a local page for one
+session, run `Start-CodexKeySessionWeb.ps1` and open
+`http://127.0.0.1:8787/`. Keys are kept in memory for the LiteLLM subprocess
+and are not written to disk.
+
 ## LLM Review Tools
 
 `finance_bias_evaluator.py` is a deterministic first-pass checker for finance

@@ -6,25 +6,31 @@ Optional cost routing for Codex CLI on Windows using the official open-source
 The local Python wrapper cleans prompts, compresses noisy logs, estimates tokens,
 applies budgets, and selects one of these LiteLLM aliases:
 
-- `codex-cheap` for simple, low-cost tasks
-- `codex-strong` for default, medium, and complex tasks
+- `codex-light` for simple, low-cost and frequent tasks
+- `codex-default` for normal coding work
+- `codex-long` for long-context reads, log review, and synthesis
+- `codex-deep` for difficult debugging, security, and architecture decisions
+- `codex-cheap` and `codex-strong` as backward-compatible aliases
 - `codex-hf-cheap` for simple Hugging Face / open-model tasks when `HF_TOKEN`
   is set
 - `codex-hf-fast` for larger Hugging Face / multi-provider tasks when
   `HF_TOKEN` is set
 
-The previous `codex-auto` middle tier was removed because it pointed to the same
-provider model as `codex-strong`, which made the fallback chain redundant. Add a
-third alias again only when it maps to a genuinely different model or provider.
+OpenAI and Gemini are both configured through LiteLLM model groups. The normal
+default keeps most code-generation traffic on OpenAI while letting Gemini absorb
+long-context and lower-risk work. This reduces token saturation without sending
+high-stakes changes blindly to the cheapest model.
 
-API keys are never committed or written to a configuration file.
+API keys are never committed or written to a configuration file. `OPENAI_API_KEY`
+is required for the default profile; `GEMINI_API_KEY` is optional but recommended
+to activate the OpenAI/Gemini dispatching path.
 
 ## Hugging Face Integration
 
 Hugging Face can be used in two optional places.
 
 First, Hugging Face Inference Providers can sit behind LiteLLM as another
-provider pool. The local config includes two optional aliases:
+provider pool. The local config still includes two optional aliases:
 
 ```yaml
 codex-hf-cheap -> huggingface/groq/openai/gpt-oss-120b
@@ -97,7 +103,7 @@ open_models_only: false
 max_cost_usd: 0.0
 
 task_provider_rules:
-  simple: huggingface
+  simple: auto
   medium: auto
   complex: openai
 
@@ -130,14 +136,30 @@ for this command only. The script:
 
 1. installs the official LiteLLM OSS proxy in `C:\tmp\litellm-oss` when needed;
 2. asks for the OpenAI key with masked input when it is missing;
-3. creates a random local `LITELLM_API_KEY` in memory;
-4. starts the LiteLLM proxy in the background;
-5. enables the optional Codex `cost-routing` profile.
-6. opens Codex with that profile;
-7. stops LiteLLM and restores the previous configuration when Codex closes.
+3. asks for the Gemini key with masked input when it is missing; this is optional
+   but enables the Gemini model groups;
+4. creates a random local `LITELLM_API_KEY` in memory;
+5. starts the LiteLLM proxy in the background;
+6. enables the optional Codex `cost-routing` profile.
+7. opens Codex with that profile;
+8. stops LiteLLM and restores the previous configuration when Codex closes.
 
 There is no key to copy and no second terminal is required.
 
+### Optional local web key session
+
+If you prefer entering keys in a local page for one work session, start:
+
+```powershell
+.\scripts\python\Start-CodexKeySessionWeb.ps1
+```
+
+Then open `http://127.0.0.1:8787/`, paste `OPENAI_API_KEY`,
+`GEMINI_API_KEY`, or `HF_TOKEN`, and submit the form. The page starts the
+LiteLLM proxy on `http://127.0.0.1:4000/v1` with those keys only in the proxy
+process environment. The keys are not written to disk and the web server
+suppresses request logging.
+
 To launch the optional Hugging Face-facing profile instead of the local LiteLLM
 proxy:
 
@@ -174,7 +196,7 @@ Optional budgets and forced routing:
 
 ```powershell
 python .\scripts\python\codex_cost_router.py run `
-  --force-model codex-strong `
+  --force-model codex-deep `
   --provider openai `
   --max-input-tokens 8000 `
   --max-output-tokens 3000 `
@@ -201,8 +223,11 @@ Prompts and API keys are not logged.
 - `Manage-CodexCostRouting.ps1`: automatic run, status, and stop workflow.
 - `codex-cost-routing.cmd`: simple Windows launcher.
 - `codex_cost_router.py`: prompt optimization and one-shot routing.
+- `codex_key_session_web.py`: local-only web form for session keys.
+- `Start-CodexKeySessionWeb.ps1`: PowerShell launcher for the local key page.
 - `codex-routing-policy.yaml`: editable routing policy and fallback order.
-- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallback.
+- `litellm-cost-routing.yaml`: local LiteLLM OSS OpenAI/Gemini model groups,
+  context-window fallbacks, cooldowns, and compatibility aliases.
 
 ## Notes
 

@@ -0,0 +1,23 @@
+[CmdletBinding()]
+param(
+    [int]$UiPort = 8787,
+    [int]$ProxyPort = 4000
+)
+
+$ErrorActionPreference = 'Stop'
+$pythonPath = Join-Path $env:USERPROFILE '.cache\codex-runtimes\codex-primary-runtime\dependencies\python\python.exe'
+if (-not (Test-Path -LiteralPath $pythonPath)) {
+    $python = Get-Command python -ErrorAction SilentlyContinue
+    if (-not $python) {
+        throw 'Python 3.10+ est introuvable.'
+    }
+    $pythonPath = $python.Source
+}
+
+$scriptPath = Join-Path $PSScriptRoot 'codex_key_session_web.py'
+$configPath = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml'
+if (Test-Path -LiteralPath (Join-Path $PSScriptRoot 'config.yaml')) {
+    $configPath = Join-Path $PSScriptRoot 'config.yaml'
+}
+
+& $pythonPath $scriptPath --ui-port $UiPort --proxy-port $ProxyPort --config $configPath
@@ -7,7 +7,7 @@ open_models_only: false
 max_cost_usd: 0.0
 
 task_provider_rules:
-  simple: huggingface
+  simple: auto
   medium: auto
   complex: openai
 

@@ -24,15 +24,29 @@
 CONFIG_BACKUP = LOG_DIR / "config.toml.cost_router_backup"
 BEGIN_MARKER = "# BEGIN CODEX COST ROUTER"
 END_MARKER = "# END CODEX COST ROUTER"
-DEFAULT_MODEL = "codex-strong"
+LIGHT_MODEL = "codex-light"
+DEFAULT_MODEL = "codex-default"
+LONG_MODEL = "codex-long"
+DEEP_MODEL = "codex-deep"
+LEGACY_CHEAP_MODEL = "codex-cheap"
+LEGACY_STRONG_MODEL = "codex-strong"
 HF_FAST_MODEL = "codex-hf-fast"
 HF_CHEAP_MODEL = "codex-hf-cheap"
 HF_DIRECT_MODEL = "openai/gpt-oss-120b:fastest"
 DEFAULT_MAX_INPUT_TOKENS = 12_000
 DEFAULT_MAX_OUTPUT_TOKENS = 2_000
-PROVIDERS = ("auto", "openai", "huggingface")
+PROVIDERS = ("auto", "openai", "gemini", "huggingface")
 CODEX_PROVIDERS = ("litellm", "huggingface")
-MODELS = ("codex-cheap", DEFAULT_MODEL, HF_FAST_MODEL, HF_CHEAP_MODEL)
+MODELS = (
+    LIGHT_MODEL,
+    DEFAULT_MODEL,
+    LONG_MODEL,
+    DEEP_MODEL,
+    LEGACY_CHEAP_MODEL,
+    LEGACY_STRONG_MODEL,
+    HF_FAST_MODEL,
+    HF_CHEAP_MODEL,
+)
 LITELLM_HOST = "localhost"
 LITELLM_PORT = 4000
 WINDOWS_LITELLM_FALLBACK = Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe")
@@ -43,7 +57,7 @@
     "open_models_only": False,
     "max_cost_usd": 0.0,
     "task_provider_rules": {
-        "simple": "huggingface",
+        "simple": "auto",
         "medium": "auto",
         "complex": "openai",
     },
@@ -53,8 +67,12 @@
 # Approximate placeholders in USD per million tokens. Adjust these estimates to
 # match the deployments configured in your local LiteLLM OSS proxy.
 ESTIMATED_RATES = {
-    "codex-cheap": {"input": 0.15, "output": 0.60},
+    LIGHT_MODEL: {"input": 0.20, "output": 0.80},
     DEFAULT_MODEL: {"input": 2.00, "output": 8.00},
+    LONG_MODEL: {"input": 0.80, "output": 3.00},
+    DEEP_MODEL: {"input": 2.50, "output": 10.00},
+    LEGACY_CHEAP_MODEL: {"input": 0.20, "output": 0.80},
+    LEGACY_STRONG_MODEL: {"input": 2.00, "output": 8.00},
     HF_CHEAP_MODEL: {"input": 0.10, "output": 0.30},
     HF_FAST_MODEL: {"input": 0.25, "output": 0.75},
 }
@@ -104,14 +122,26 @@
     "provider benchmark",
     "benchmark providers",
 )
+LONG_CONTEXT_TERMS = (
+    "gros contexte",
+    "long contexte",
+    "long context",
+    "large context",
+    "logs",
+    "fichier volumineux",
+    "large file",
+    "synthese",
+    "synthèse",
+    "summarize",
+    "compare documents",
+)
 
 PROFILE_BLOCK = f"""\
 # BEGIN CODEX COST ROUTER
 [model_providers.litellm]
 name = "LiteLLM OSS Cost Router"
 base_url = "http://localhost:4000/v1"
 env_key = "LITELLM_API_KEY"
-wire_api = "responses"
 
 [model_providers.huggingface]
 name = "Hugging Face Inference Providers"
@@ -122,7 +152,10 @@
 [profiles.cost-routing]
 model = "{DEFAULT_MODEL}"
 model_provider = "litellm"
-model_reasoning_effort = "low"
+model_reasoning_effort = "medium"
+model_verbosity = "low"
+model_auto_compact_token_limit = 64000
+tool_output_token_limit = 8000
 
 [profiles.cost-routing-hf]
 model = "{HF_DIRECT_MODEL}"
@@ -446,22 +479,35 @@ def route_model(
     complexity, reason = classify_complexity(prompt)
     normalized = normalize_for_matching(prompt)
     wants_hf = any(term in normalized for term in HF_TERMS)
+    wants_long_context = any(term in normalized for term in LONG_CONTEXT_TERMS)
 
     if provider == "huggingface":
         if hf_available():
             model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL
             return model, f"huggingface provider requested; {reason}"
-        return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using OpenAI tier"
+        return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using default OpenAI/Gemini tier"
 
     if provider == "openai":
-        model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL
+        model = LIGHT_MODEL if complexity == "simple" else DEEP_MODEL
         return model, f"openai provider requested; {reason}"
 
+    if provider == "gemini":
+        model = LIGHT_MODEL if complexity == "simple" and not wants_long_context else LONG_MODEL
+        return model, f"gemini provider requested; {reason}"
+
     if wants_hf and hf_available():
         model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL
         return model, f"huggingface-related task; {reason}"
 
-    model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL
+    if wants_long_context:
+        return LONG_MODEL, f"long-context task; {reason}"
+
+    if complexity == "simple":
+        model = LIGHT_MODEL
+    elif complexity == "complex":
+        model = DEEP_MODEL
+    else:
+        model = DEFAULT_MODEL
     return model, reason