diff --git a/scripts/python/Install-CodexLocalLiteLLMAssets.ps1 b/scripts/python/Install-CodexLocalLiteLLMAssets.ps1
new file mode 100644
index 0000000..bc87468
--- /dev/null
+++ b/scripts/python/Install-CodexLocalLiteLLMAssets.ps1
@@ -0,0 +1,24 @@
+[CmdletBinding()]
+param()
+
+$ErrorActionPreference = 'Stop'
+$target = Join-Path $env:USERPROFILE '.codex\litellm-proxy'
+New-Item -ItemType Directory -Force -Path $target | Out-Null
+
+$files = @(
+    'litellm-cost-routing.yaml',
+    'codex_key_session_web.py',
+    'Start-CodexKeySessionWeb.ps1'
+)
+
+foreach ($file in $files) {
+    Copy-Item -LiteralPath (Join-Path $PSScriptRoot $file) -Destination (Join-Path $target $file) -Force
+}
+
+$configSource = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml'
+$configTarget = Join-Path $target 'config.yaml'
+$text = Get-Content -LiteralPath $configSource -Raw
+$text = $text -replace '(?m)^\s*master_key:\s*os\.environ/LITELLM_API_KEY\s*\r?\n',''
+Set-Content -LiteralPath $configTarget -Value $text -Encoding UTF8
+
+Write-Output "Installed local LiteLLM assets in $target"
diff --git a/scripts/python/Manage-CodexCostRouting.ps1 b/scripts/python/Manage-CodexCostRouting.ps1
index b5aefb1..a892878 100644
--- a/scripts/python/Manage-CodexCostRouting.ps1
+++ b/scripts/python/Manage-CodexCostRouting.ps1
@@ -64,6 +64,7 @@ function Get-ProxyProcess {
 
 function Remove-SessionSecrets {
     Remove-Item Env:OPENAI_API_KEY -ErrorAction SilentlyContinue
+    Remove-Item Env:GEMINI_API_KEY -ErrorAction SilentlyContinue
     Remove-Item Env:HF_TOKEN -ErrorAction SilentlyContinue
     Remove-Item Env:LITELLM_API_KEY -ErrorAction SilentlyContinue
     Remove-Item Env:PYTHONUTF8 -ErrorAction SilentlyContinue
@@ -104,6 +105,19 @@ function Set-SessionSecrets {
         throw 'OPENAI_API_KEY est obligatoire.'
     }
 
+    if (-not $env:GEMINI_API_KEY) {
+        $secureKey = Read-Host 'GEMINI_API_KEY (optionnel, entree pour activer le dispatching Gemini)' -AsSecureString
+        if ($secureKey.Length -gt 0) {
+            $pointer = [Runtime.InteropServices.Marshal]::SecureStringToBSTR($secureKey)
+            try {
+                $env:GEMINI_API_KEY = [Runtime.InteropServices.Marshal]::PtrToStringBSTR($pointer)
+            }
+            finally {
+                [Runtime.InteropServices.Marshal]::ZeroFreeBSTR($pointer)
+            }
+        }
+    }
+
     if (-not $env:LITELLM_API_KEY) {
         $env:LITELLM_API_KEY = 'sk-local-' + [Guid]::NewGuid().ToString('N')
     }
diff --git a/scripts/python/README.md b/scripts/python/README.md
index aa34bfe..02ebe3e 100644
--- a/scripts/python/README.md
+++ b/scripts/python/README.md
@@ -52,16 +52,24 @@ Connect the inspector to `http://localhost:8000/mcp`.
 
 `codex_cost_router.py` is an optional Windows-friendly wrapper for Codex CLI and
 a local LiteLLM OSS proxy. It can clean prompts, compress logs, estimate tokens,
-apply budgets, and route one-shot Codex tasks to `codex-cheap` or
-`codex-strong`. When `HF_TOKEN` is available, it can also route Hugging Face and
-multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast` LiteLLM
-aliases, or launch an optional `cost-routing-hf` Codex profile that points
-directly at the Hugging Face router. `codex-routing-policy.yaml` keeps the
-default provider rules and fallback order editable without changing Python code.
+apply budgets, and route one-shot Codex tasks to `codex-light`,
+`codex-default`, `codex-long`, or `codex-deep`. The local LiteLLM config
+dispatches those aliases across OpenAI and Gemini while keeping API keys in
+environment variables. When `HF_TOKEN` is available, it can also route Hugging
+Face and multi-provider tasks through the `codex-hf-cheap` and `codex-hf-fast`
+LiteLLM aliases, or launch an optional `cost-routing-hf` Codex profile that
+points directly at the Hugging Face router. `codex-routing-policy.yaml` keeps
+the default provider rules and fallback order editable without changing Python
+code.
 
 See [`README_Codex_Cost_Routing.md`](README_Codex_Cost_Routing.md) for setup,
 activation, LiteLLM configuration, and usage instructions.
 
+To enter OpenAI, Gemini, or Hugging Face keys through a local page for one
+session, run `Start-CodexKeySessionWeb.ps1` and open
+`http://127.0.0.1:8787/`. Keys are kept in memory for the LiteLLM subprocess
+and are not written to disk.
+
 ## LLM Review Tools
 
 `finance_bias_evaluator.py` is a deterministic first-pass checker for finance
diff --git a/scripts/python/README_Codex_Cost_Routing.md b/scripts/python/README_Codex_Cost_Routing.md
index 20ec04c..2eb5ed9 100644
--- a/scripts/python/README_Codex_Cost_Routing.md
+++ b/scripts/python/README_Codex_Cost_Routing.md
@@ -6,25 +6,31 @@ Optional cost routing for Codex CLI on Windows using the official open-source
 The local Python wrapper cleans prompts, compresses noisy logs, estimates tokens,
 applies budgets, and selects one of these LiteLLM aliases:
 
-- `codex-cheap` for simple, low-cost tasks
-- `codex-strong` for default, medium, and complex tasks
+- `codex-light` for simple, low-cost and frequent tasks
+- `codex-default` for normal coding work
+- `codex-long` for long-context reads, log review, and synthesis
+- `codex-deep` for difficult debugging, security, and architecture decisions
+- `codex-cheap` and `codex-strong` as backward-compatible aliases
 - `codex-hf-cheap` for simple Hugging Face / open-model tasks when `HF_TOKEN`
   is set
 - `codex-hf-fast` for larger Hugging Face / multi-provider tasks when
   `HF_TOKEN` is set
 
-The previous `codex-auto` middle tier was removed because it pointed to the same
-provider model as `codex-strong`, which made the fallback chain redundant. Add a
-third alias again only when it maps to a genuinely different model or provider.
+OpenAI and Gemini are both configured through LiteLLM model groups. The normal
+default keeps most code-generation traffic on OpenAI while letting Gemini absorb
+long-context and lower-risk work. This reduces token saturation without sending
+high-stakes changes blindly to the cheapest model.
 
-API keys are never committed or written to a configuration file.
+API keys are never committed or written to a configuration file. `OPENAI_API_KEY`
+is required for the default profile; `GEMINI_API_KEY` is optional but recommended
+to activate the OpenAI/Gemini dispatching path.
 
 ## Hugging Face Integration
 
 Hugging Face can be used in two optional places.
 
 First, Hugging Face Inference Providers can sit behind LiteLLM as another
-provider pool. The local config includes two optional aliases:
+provider pool. The local config still includes two optional aliases:
 
 ```yaml
 codex-hf-cheap -> huggingface/groq/openai/gpt-oss-120b
@@ -97,7 +103,7 @@ open_models_only: false
 max_cost_usd: 0.0
 
 task_provider_rules:
-  simple: huggingface
+  simple: auto
   medium: auto
   complex: openai
 
@@ -130,14 +136,30 @@ for this command only. The script:
 
 1. installs the official LiteLLM OSS proxy in `C:\tmp\litellm-oss` when needed;
 2. asks for the OpenAI key with masked input when it is missing;
-3. creates a random local `LITELLM_API_KEY` in memory;
-4. starts the LiteLLM proxy in the background;
-5. enables the optional Codex `cost-routing` profile.
-6. opens Codex with that profile;
-7. stops LiteLLM and restores the previous configuration when Codex closes.
+3. asks for the Gemini key with masked input when it is missing; this is optional
+   but enables the Gemini model groups;
+4. creates a random local `LITELLM_API_KEY` in memory;
+5. starts the LiteLLM proxy in the background;
+6. enables the optional Codex `cost-routing` profile.
+7. opens Codex with that profile;
+8. stops LiteLLM and restores the previous configuration when Codex closes.
 
 There is no key to copy and no second terminal is required.
 
+### Optional local web key session
+
+If you prefer entering keys in a local page for one work session, start:
+
+```powershell
+.\scripts\python\Start-CodexKeySessionWeb.ps1
+```
+
+Then open `http://127.0.0.1:8787/`, paste `OPENAI_API_KEY`,
+`GEMINI_API_KEY`, or `HF_TOKEN`, and submit the form. The page starts the
+LiteLLM proxy on `http://127.0.0.1:4000/v1` with those keys only in the proxy
+process environment. The keys are not written to disk and the web server
+suppresses request logging.
+
 To launch the optional Hugging Face-facing profile instead of the local LiteLLM
 proxy:
 
@@ -174,7 +196,7 @@ Optional budgets and forced routing:
 
 ```powershell
 python .\scripts\python\codex_cost_router.py run `
-  --force-model codex-strong `
+  --force-model codex-deep `
   --provider openai `
   --max-input-tokens 8000 `
   --max-output-tokens 3000 `
@@ -201,8 +223,11 @@ Prompts and API keys are not logged.
 - `Manage-CodexCostRouting.ps1`: automatic run, status, and stop workflow.
 - `codex-cost-routing.cmd`: simple Windows launcher.
 - `codex_cost_router.py`: prompt optimization and one-shot routing.
+- `codex_key_session_web.py`: local-only web form for session keys.
+- `Start-CodexKeySessionWeb.ps1`: PowerShell launcher for the local key page.
 - `codex-routing-policy.yaml`: editable routing policy and fallback order.
-- `litellm-cost-routing.yaml`: local LiteLLM OSS model aliases and fallback.
+- `litellm-cost-routing.yaml`: local LiteLLM OSS OpenAI/Gemini model groups,
+  context-window fallbacks, cooldowns, and compatibility aliases.
 
 ## Notes
 
diff --git a/scripts/python/Start-CodexKeySessionWeb.ps1 b/scripts/python/Start-CodexKeySessionWeb.ps1
new file mode 100644
index 0000000..07f7f15
--- /dev/null
+++ b/scripts/python/Start-CodexKeySessionWeb.ps1
@@ -0,0 +1,23 @@
+[CmdletBinding()]
+param(
+    [int]$UiPort = 8787,
+    [int]$ProxyPort = 4000
+)
+
+$ErrorActionPreference = 'Stop'
+$pythonPath = Join-Path $env:USERPROFILE '.cache\codex-runtimes\codex-primary-runtime\dependencies\python\python.exe'
+if (-not (Test-Path -LiteralPath $pythonPath)) {
+    $python = Get-Command python -ErrorAction SilentlyContinue
+    if (-not $python) {
+        throw 'Python 3.10+ est introuvable.'
+    }
+    $pythonPath = $python.Source
+}
+
+$scriptPath = Join-Path $PSScriptRoot 'codex_key_session_web.py'
+$configPath = Join-Path $PSScriptRoot 'litellm-cost-routing.yaml'
+if (Test-Path -LiteralPath (Join-Path $PSScriptRoot 'config.yaml')) {
+    $configPath = Join-Path $PSScriptRoot 'config.yaml'
+}
+
+& $pythonPath $scriptPath --ui-port $UiPort --proxy-port $ProxyPort --config $configPath
diff --git a/scripts/python/codex-routing-policy.yaml b/scripts/python/codex-routing-policy.yaml
index a1f4700..a1200f9 100644
--- a/scripts/python/codex-routing-policy.yaml
+++ b/scripts/python/codex-routing-policy.yaml
@@ -7,7 +7,7 @@ open_models_only: false
 max_cost_usd: 0.0
 
 task_provider_rules:
-  simple: huggingface
+  simple: auto
   medium: auto
   complex: openai
 
diff --git a/scripts/python/codex_cost_router.py b/scripts/python/codex_cost_router.py
index f84a0f2..a7b6af1 100644
--- a/scripts/python/codex_cost_router.py
+++ b/scripts/python/codex_cost_router.py
@@ -24,15 +24,29 @@
 CONFIG_BACKUP = LOG_DIR / "config.toml.cost_router_backup"
 BEGIN_MARKER = "# BEGIN CODEX COST ROUTER"
 END_MARKER = "# END CODEX COST ROUTER"
-DEFAULT_MODEL = "codex-strong"
+LIGHT_MODEL = "codex-light"
+DEFAULT_MODEL = "codex-default"
+LONG_MODEL = "codex-long"
+DEEP_MODEL = "codex-deep"
+LEGACY_CHEAP_MODEL = "codex-cheap"
+LEGACY_STRONG_MODEL = "codex-strong"
 HF_FAST_MODEL = "codex-hf-fast"
 HF_CHEAP_MODEL = "codex-hf-cheap"
 HF_DIRECT_MODEL = "openai/gpt-oss-120b:fastest"
 DEFAULT_MAX_INPUT_TOKENS = 12_000
 DEFAULT_MAX_OUTPUT_TOKENS = 2_000
-PROVIDERS = ("auto", "openai", "huggingface")
+PROVIDERS = ("auto", "openai", "gemini", "huggingface")
 CODEX_PROVIDERS = ("litellm", "huggingface")
-MODELS = ("codex-cheap", DEFAULT_MODEL, HF_FAST_MODEL, HF_CHEAP_MODEL)
+MODELS = (
+    LIGHT_MODEL,
+    DEFAULT_MODEL,
+    LONG_MODEL,
+    DEEP_MODEL,
+    LEGACY_CHEAP_MODEL,
+    LEGACY_STRONG_MODEL,
+    HF_FAST_MODEL,
+    HF_CHEAP_MODEL,
+)
 LITELLM_HOST = "localhost"
 LITELLM_PORT = 4000
 WINDOWS_LITELLM_FALLBACK = Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe")
@@ -43,7 +57,7 @@
     "open_models_only": False,
     "max_cost_usd": 0.0,
     "task_provider_rules": {
-        "simple": "huggingface",
+        "simple": "auto",
         "medium": "auto",
         "complex": "openai",
     },
@@ -53,8 +67,12 @@
 # Approximate placeholders in USD per million tokens. Adjust these estimates to
 # match the deployments configured in your local LiteLLM OSS proxy.
 ESTIMATED_RATES = {
-    "codex-cheap": {"input": 0.15, "output": 0.60},
+    LIGHT_MODEL: {"input": 0.20, "output": 0.80},
     DEFAULT_MODEL: {"input": 2.00, "output": 8.00},
+    LONG_MODEL: {"input": 0.80, "output": 3.00},
+    DEEP_MODEL: {"input": 2.50, "output": 10.00},
+    LEGACY_CHEAP_MODEL: {"input": 0.20, "output": 0.80},
+    LEGACY_STRONG_MODEL: {"input": 2.00, "output": 8.00},
     HF_CHEAP_MODEL: {"input": 0.10, "output": 0.30},
     HF_FAST_MODEL: {"input": 0.25, "output": 0.75},
 }
@@ -104,6 +122,19 @@
     "provider benchmark",
     "benchmark providers",
 )
+LONG_CONTEXT_TERMS = (
+    "gros contexte",
+    "long contexte",
+    "long context",
+    "large context",
+    "logs",
+    "fichier volumineux",
+    "large file",
+    "synthese",
+    "synthèse",
+    "summarize",
+    "compare documents",
+)
 
 PROFILE_BLOCK = f"""\
 # BEGIN CODEX COST ROUTER
@@ -111,7 +142,6 @@
 name = "LiteLLM OSS Cost Router"
 base_url = "http://localhost:4000/v1"
 env_key = "LITELLM_API_KEY"
-wire_api = "responses"
 
 [model_providers.huggingface]
 name = "Hugging Face Inference Providers"
@@ -122,7 +152,10 @@
 [profiles.cost-routing]
 model = "{DEFAULT_MODEL}"
 model_provider = "litellm"
-model_reasoning_effort = "low"
+model_reasoning_effort = "medium"
+model_verbosity = "low"
+model_auto_compact_token_limit = 64000
+tool_output_token_limit = 8000
 
 [profiles.cost-routing-hf]
 model = "{HF_DIRECT_MODEL}"
@@ -446,22 +479,35 @@ def route_model(
     complexity, reason = classify_complexity(prompt)
     normalized = normalize_for_matching(prompt)
     wants_hf = any(term in normalized for term in HF_TERMS)
+    wants_long_context = any(term in normalized for term in LONG_CONTEXT_TERMS)
 
     if provider == "huggingface":
         if hf_available():
             model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL
             return model, f"huggingface provider requested; {reason}"
-        return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using OpenAI tier"
+        return DEFAULT_MODEL, "huggingface requested but HF_TOKEN is missing; using default OpenAI/Gemini tier"
 
     if provider == "openai":
-        model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL
+        model = LIGHT_MODEL if complexity == "simple" else DEEP_MODEL
         return model, f"openai provider requested; {reason}"
 
+    if provider == "gemini":
+        model = LIGHT_MODEL if complexity == "simple" and not wants_long_context else LONG_MODEL
+        return model, f"gemini provider requested; {reason}"
+
     if wants_hf and hf_available():
         model = HF_CHEAP_MODEL if complexity == "simple" else HF_FAST_MODEL
         return model, f"huggingface-related task; {reason}"
 
-    model = "codex-cheap" if complexity == "simple" else DEFAULT_MODEL
+    if wants_long_context:
+        return LONG_MODEL, f"long-context task; {reason}"
+
+    if complexity == "simple":
+        model = LIGHT_MODEL
+    elif complexity == "complex":
+        model = DEEP_MODEL
+    else:
+        model = DEFAULT_MODEL
     return model, reason
 
 
diff --git a/scripts/python/codex_key_session_web.py b/scripts/python/codex_key_session_web.py
new file mode 100644
index 0000000..f957582
--- /dev/null
+++ b/scripts/python/codex_key_session_web.py
@@ -0,0 +1,309 @@
+"""Local-only web form for starting a LiteLLM Codex session with in-memory keys."""
+
+from __future__ import annotations
+
+import argparse
+import html
+import os
+import secrets
+import subprocess
+import sys
+import time
+import urllib.parse
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import ClassVar
+
+
+DEFAULT_HOST = "127.0.0.1"
+DEFAULT_UI_PORT = 8787
+DEFAULT_PROXY_PORT = 4000
+
+
+PAGE = """<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1">
+  <title>Codex LiteLLM Session Keys</title>
+  <style>
+    :root {{
+      color-scheme: light dark;
+      font-family: Segoe UI, system-ui, sans-serif;
+    }}
+    body {{
+      margin: 0;
+      min-height: 100vh;
+      display: grid;
+      place-items: center;
+      background: Canvas;
+      color: CanvasText;
+    }}
+    main {{
+      width: min(680px, calc(100vw - 32px));
+      border: 1px solid color-mix(in srgb, CanvasText 18%, transparent);
+      border-radius: 8px;
+      padding: 24px;
+    }}
+    h1 {{
+      font-size: 22px;
+      margin: 0 0 8px;
+    }}
+    p {{
+      line-height: 1.5;
+    }}
+    label {{
+      display: block;
+      margin-top: 16px;
+      font-weight: 600;
+    }}
+    input {{
+      width: 100%;
+      box-sizing: border-box;
+      margin-top: 6px;
+      padding: 10px;
+      border-radius: 6px;
+      border: 1px solid color-mix(in srgb, CanvasText 24%, transparent);
+      font: inherit;
+    }}
+    button {{
+      margin-top: 20px;
+      padding: 10px 14px;
+      border: 0;
+      border-radius: 6px;
+      background: #1f6feb;
+      color: white;
+      font: inherit;
+      font-weight: 600;
+      cursor: pointer;
+    }}
+    .status {{
+      margin-top: 16px;
+      padding: 12px;
+      border-radius: 6px;
+      background: color-mix(in srgb, #1f6feb 12%, Canvas);
+      overflow-wrap: anywhere;
+    }}
+    .muted {{
+      opacity: .75;
+      font-size: 14px;
+    }}
+  </style>
+</head>
+<body>
+  <main>
+    <h1>Codex LiteLLM session keys</h1>
+    <p class="muted">Keys are kept only in this local process environment and passed to the LiteLLM subprocess. They are not written to disk.</p>
+    {status}
+    <form method="post" action="/start" autocomplete="off">
+      <label for="openai">OPENAI_API_KEY</label>
+      <input id="openai" name="OPENAI_API_KEY" type="password" placeholder="sk-..." autocomplete="off">
+      <label for="gemini">GEMINI_API_KEY</label>
+      <input id="gemini" name="GEMINI_API_KEY" type="password" placeholder="AI..." autocomplete="off">
+      <label for="hf">HF_TOKEN optional</label>
+      <input id="hf" name="HF_TOKEN" type="password" placeholder="hf_..." autocomplete="off">
+      <button type="submit">Start session proxy</button>
+    </form>
+    <form method="post" action="/stop">
+      <button type="submit">Stop session proxy</button>
+    </form>
+    <p class="muted">Proxy URL: <code>http://127.0.0.1:{proxy_port}/v1</code></p>
+  </main>
+</body>
+</html>
+"""
+
+
+def find_litellm(root: Path) -> Path:
+    """Find a local LiteLLM executable."""
+    candidates = [
+        root / "venv" / "Scripts" / "litellm.exe",
+        Path(r"C:\tmp\litellm-oss\Scripts\litellm.exe"),
+        Path.home() / ".codex" / "litellm-proxy" / "venv" / "Scripts" / "litellm.exe",
+    ]
+    for candidate in candidates:
+        if candidate.exists():
+            return candidate
+    raise FileNotFoundError("LiteLLM executable not found. Install LiteLLM first.")
+
+
+def wait_for_port(host: str, port: int, timeout: float = 20.0) -> bool:
+    """Wait for a TCP port to accept connections."""
+    import socket
+
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            with socket.create_connection((host, port), timeout=0.5):
+                return True
+        except OSError:
+            time.sleep(0.25)
+    return False
+
+
+class SessionState:
+    """Mutable server state."""
+
+    process: subprocess.Popen[str] | None = None
+    message = "No session proxy started from this page yet."
+
+
+class KeySessionHandler(BaseHTTPRequestHandler):
+    """Serve the local key form and manage the LiteLLM subprocess."""
+
+    state: ClassVar[SessionState]
+    config_path: ClassVar[Path]
+    litellm_path: ClassVar[Path]
+    proxy_host: ClassVar[str]
+    proxy_port: ClassVar[int]
+
+    def log_message(self, format: str, *args: object) -> None:
+        """Suppress default request logs so keys never appear in terminal logs."""
+        return
+
+    def do_GET(self) -> None:  # noqa: N802
+        if self.path not in {"/", "/status"}:
+            self.send_error(404)
+            return
+        self._send_page()
+
+    def do_POST(self) -> None:  # noqa: N802
+        if self.path == "/start":
+            self._start_proxy()
+            return
+        if self.path == "/stop":
+            self._stop_proxy("Session proxy stopped.")
+            self._send_page()
+            return
+        self.send_error(404)
+
+    def _send_page(self) -> None:
+        safe_message = html.escape(self.state.message)
+        status = f'<div class="status">{safe_message}</div>'
+        body = PAGE.format(status=status, proxy_port=self.proxy_port).encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Cache-Control", "no-store")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def _read_form(self) -> dict[str, str]:
+        length = int(self.headers.get("Content-Length", "0"))
+        raw = self.rfile.read(length).decode("utf-8", errors="replace")
+        parsed = urllib.parse.parse_qs(raw, keep_blank_values=True)
+        return {key: values[-1].strip() for key, values in parsed.items()}
+
+    def _start_proxy(self) -> None:
+        form = self._read_form()
+        openai_key = form.get("OPENAI_API_KEY", "")
+        gemini_key = form.get("GEMINI_API_KEY", "")
+        hf_token = form.get("HF_TOKEN", "")
+        if not any((openai_key, gemini_key, hf_token)):
+            self.state.message = "Provide at least one provider key."
+            self._send_page()
+            return
+
+        self._stop_proxy("Replacing previous session proxy.")
+        env = os.environ.copy()
+        env["PYTHONUTF8"] = "1"
+        env["PYTHONIOENCODING"] = "utf-8"
+        env["LITELLM_API_KEY"] = "sk-local-" + secrets.token_hex(16)
+        if openai_key:
+            env["OPENAI_API_KEY"] = openai_key
+        if gemini_key:
+            env["GEMINI_API_KEY"] = gemini_key
+        if hf_token:
+            env["HF_TOKEN"] = hf_token
+
+        try:
+            self.state.process = subprocess.Popen(
+                [
+                    str(self.litellm_path),
+                    "--config",
+                    str(self.config_path),
+                    "--host",
+                    self.proxy_host,
+                    "--port",
+                    str(self.proxy_port),
+                ],
+                cwd=str(self.config_path.parent),
+                env=env,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                text=True,
+            )
+        except OSError as exc:
+            self.state.message = f"Unable to start LiteLLM: {exc}"
+            self._send_page()
+            return
+
+        if wait_for_port(self.proxy_host, self.proxy_port):
+            providers = []
+            if openai_key:
+                providers.append("OpenAI")
+            if gemini_key:
+                providers.append("Gemini")
+            if hf_token:
+                providers.append("Hugging Face")
+            self.state.message = "Session proxy started with: " + ", ".join(providers)
+        else:
+            self.state.message = "LiteLLM process started, but the proxy port did not become ready yet."
+        self._send_page()
+
+    def _stop_proxy(self, message: str) -> None:
+        process = self.state.process
+        if process and process.poll() is None:
+            process.terminate()
+            try:
+                process.wait(timeout=8)
+            except subprocess.TimeoutExpired:
+                process.kill()
+                process.wait(timeout=8)
+        self.state.process = None
+        self.state.message = message
+
+
+def run_server(args: argparse.Namespace) -> int:
+    """Run the local key session web server."""
+    config_path = Path(args.config).resolve()
+    root = config_path.parent
+    handler = KeySessionHandler
+    handler.state = SessionState()
+    handler.config_path = config_path
+    handler.litellm_path = find_litellm(root)
+    handler.proxy_host = args.proxy_host
+    handler.proxy_port = args.proxy_port
+
+    server = ThreadingHTTPServer((args.host, args.ui_port), handler)
+    print(f"Open http://{args.host}:{args.ui_port}/ to enter session keys.")
+    print(f"LiteLLM proxy will run at http://{args.proxy_host}:{args.proxy_port}/v1.")
+    try:
+        server.serve_forever()
+    except KeyboardInterrupt:
+        handler.state.message = "Stopping web key session."
+        if handler.state.process and handler.state.process.poll() is None:
+            handler.state.process.terminate()
+        return 0
+    finally:
+        server.server_close()
+    return 0
+
+
+def parse_args(argv: list[str]) -> argparse.Namespace:
+    """Parse CLI arguments."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--host", default=DEFAULT_HOST)
+    parser.add_argument("--ui-port", type=int, default=DEFAULT_UI_PORT)
+    parser.add_argument("--proxy-host", default=DEFAULT_HOST)
+    parser.add_argument("--proxy-port", type=int, default=DEFAULT_PROXY_PORT)
+    parser.add_argument(
+        "--config",
+        default=str(Path(__file__).with_name("litellm-cost-routing.yaml")),
+        help="LiteLLM YAML config path.",
+    )
+    return parser.parse_args(argv)
+
+
+if __name__ == "__main__":
+    raise SystemExit(run_server(parse_args(sys.argv[1:])))
diff --git a/scripts/python/litellm-cost-routing.yaml b/scripts/python/litellm-cost-routing.yaml
index 914ff7b..d7027c4 100644
--- a/scripts/python/litellm-cost-routing.yaml
+++ b/scripts/python/litellm-cost-routing.yaml
@@ -1,21 +1,92 @@
-# LiteLLM OSS self-hosted proxy example.
-# Two explicit tiers keep the local setup simple:
-# - codex-cheap for low-cost/simple work
-# - codex-strong for default, medium, and complex work
-# Optional Hugging Face tiers use HF_TOKEN and let Hugging Face route across
-# inference providers behind a single token.
+# LiteLLM OSS self-hosted proxy example for Codex.
+# Task-oriented aliases let Codex route by workload instead of hard-coding a
+# single provider:
+# - codex-light: cheap/frequent work, Gemini Flash biased
+# - codex-default: normal coding work, OpenAI biased with Gemini relief
+# - codex-long: long-context reads and synthesis, Gemini Pro biased
+# - codex-deep: high-stakes debugging/security/architecture, OpenAI biased
 # API keys are read from environment variables and must never be committed.
 
 model_list:
+  # Backward-compatible aliases used by older wrapper calls.
   - model_name: codex-cheap
     litellm_params:
-      model: openai/gpt-5-mini
+      model: gemini/gemini-3.5-flash
+      api_key: os.environ/GEMINI_API_KEY
+      weight: 6
+
+  - model_name: codex-cheap
+    litellm_params:
+      model: openai/gpt-5.4-mini
+      api_key: os.environ/OPENAI_API_KEY
+      weight: 4
+
+  - model_name: codex-strong
+    litellm_params:
+      model: openai/gpt-5.5
       api_key: os.environ/OPENAI_API_KEY
+      weight: 8
 
   - model_name: codex-strong
     litellm_params:
-      model: openai/gpt-5
+      model: gemini/gemini-3.5-pro
+      api_key: os.environ/GEMINI_API_KEY
+      weight: 2
+
+  # Preferred Codex-facing aliases.
+  - model_name: codex-light
+    litellm_params:
+      model: gemini/gemini-3.5-flash
+      api_key: os.environ/GEMINI_API_KEY
+      weight: 7
+
+  - model_name: codex-light
+    litellm_params:
+      model: openai/gpt-5.4-mini
+      api_key: os.environ/OPENAI_API_KEY
+      weight: 3
+
+  - model_name: codex-default
+    litellm_params:
+      model: openai/gpt-5.5
+      api_key: os.environ/OPENAI_API_KEY
+      weight: 8
+
+  - model_name: codex-default
+    litellm_params:
+      model: gemini/gemini-3.5-pro
+      api_key: os.environ/GEMINI_API_KEY
+      weight: 2
+
+  - model_name: codex-long
+    litellm_params:
+      model: gemini/gemini-3.5-pro
+      api_key: os.environ/GEMINI_API_KEY
+      weight: 7
+
+  - model_name: codex-long
+    litellm_params:
+      model: gemini/gemini-2.5-pro
+      api_key: os.environ/GEMINI_API_KEY
+      weight: 2
+
+  - model_name: codex-long
+    litellm_params:
+      model: openai/gpt-5.5
       api_key: os.environ/OPENAI_API_KEY
+      weight: 1
+
+  - model_name: codex-deep
+    litellm_params:
+      model: openai/gpt-5.5
+      api_key: os.environ/OPENAI_API_KEY
+      weight: 10
+
+  - model_name: codex-deep
+    litellm_params:
+      model: gemini/gemini-3.5-pro
+      api_key: os.environ/GEMINI_API_KEY
+      weight: 1
 
   - model_name: codex-hf-cheap
     litellm_params:
@@ -28,17 +99,61 @@ model_list:
       api_key: os.environ/HF_TOKEN
 
 router_settings:
+  routing_strategy: simple-shuffle
+  num_retries: 2
+  timeout: 180
+  cooldown_time: 90
+  enable_pre_call_checks: true
+  model_group_alias:
+    gpt-5.5: codex-default
+    gpt-5.4-mini: codex-light
+    gemini-3.5-pro: codex-long
+    gemini-3.5-flash: codex-light
   fallbacks:
+    - codex-light:
+        - codex-default
+    - codex-default:
+        - codex-long
+        - codex-light
+    - codex-long:
+        - codex-default
+    - codex-deep:
+        - codex-default
+        - codex-long
     - codex-cheap:
         - codex-strong
+        - codex-default
+    - codex-strong:
+        - codex-default
+        - codex-long
     - codex-hf-cheap:
+        - codex-light
         - codex-cheap
-        - codex-strong
     - codex-hf-fast:
-        - codex-strong
+        - codex-default
+        - codex-deep
+  context_window_fallbacks:
+    - codex-light:
+        - codex-long
+    - codex-default:
+        - codex-long
+    - codex-deep:
+        - codex-long
+  allowed_fails_policy:
+    AuthenticationErrorAllowedFails: 0
+    TimeoutErrorAllowedFails: 2
+    RateLimitErrorAllowedFails: 4
 
 litellm_settings:
+  drop_params: true
   set_verbose: false
+  request_timeout: 180
+  num_retries: 2
 
 general_settings:
   master_key: os.environ/LITELLM_API_KEY
+  disable_spend_logs: true
+  background_health_checks: true
+  health_check_interval: 60
+  enable_health_check_routing: true
+  health_check_ignore_transient_errors: true
diff --git a/scripts/python/tests/test_codex_cost_router.py b/scripts/python/tests/test_codex_cost_router.py
index f838cb7..cf270bb 100644
--- a/scripts/python/tests/test_codex_cost_router.py
+++ b/scripts/python/tests/test_codex_cost_router.py
@@ -1,4 +1,4 @@
-"""Tests for the optional Codex cost-routing wrapper."""
+﻿"""Tests for the optional Codex cost-routing wrapper."""
 
 import importlib.util
 import tempfile
@@ -25,13 +25,23 @@ def test_compress_logs_removes_low_value_debug_lines(self) -> None:
         self.assertEqual(ROUTER.compress_logs(text), "ERROR request failed")
 
     def test_route_model_uses_expected_aliases(self) -> None:
-        self.assertEqual(ROUTER.route_model("Corrige une typo dans le README")[0], "codex-cheap")
-        self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-strong")
-        self.assertEqual(ROUTER.route_model("Audit sécurité production Supabase RLS")[0], "codex-strong")
+        self.assertEqual(ROUTER.route_model("Corrige une typo dans le README")[0], "codex-light")
+        self.assertEqual(ROUTER.route_model("Refactor this Python API")[0], "codex-default")
+        self.assertEqual(ROUTER.route_model("Audit sÃ©curitÃ© production Supabase RLS")[0], "codex-deep")
 
     def test_route_model_matches_accented_french_keywords(self) -> None:
-        self.assertEqual(ROUTER.route_model("Prépare un résumé du README")[0], "codex-cheap")
-        self.assertEqual(ROUTER.route_model("Question de fiscalité pour Odoo")[0], "codex-strong")
+        self.assertEqual(ROUTER.route_model("PrÃ©pare un rÃ©sumÃ© du README")[0], "codex-light")
+        self.assertEqual(ROUTER.route_model("Question de fiscalitÃ© pour Odoo")[0], "codex-deep")
+
+    def test_route_model_sends_long_context_to_gemini_biased_alias(self) -> None:
+        self.assertEqual(
+            ROUTER.route_model("Analyse ces logs et fais une synthese long context")[0],
+            "codex-long",
+        )
+        self.assertEqual(
+            ROUTER.route_model("Summarize this large file", provider="gemini")[0],
+            "codex-long",
+        )
 
     def test_route_model_can_prefer_hugging_face_when_token_exists(self) -> None:
         with patch.dict(ROUTER.os.environ, {"HF_TOKEN": "hf_test"}):
@@ -47,7 +57,7 @@ def test_route_model_can_prefer_hugging_face_when_token_exists(self) -> None:
     def test_route_model_falls_back_when_hugging_face_token_is_missing(self) -> None:
         with patch.dict(ROUTER.os.environ, {}, clear=True):
             model, reason = ROUTER.route_model("Use Hugging Face providers", provider="huggingface")
-            self.assertEqual(model, "codex-strong")
+            self.assertEqual(model, "codex-default")
             self.assertIn("HF_TOKEN is missing", reason)
 
     def test_codex_provider_helpers_select_expected_profiles(self) -> None:
@@ -158,3 +168,4 @@ def test_enable_disable_restores_original_config_bytes(self) -> None:
 
 if __name__ == "__main__":
     unittest.main()
+
diff --git a/scripts/python/tests/test_codex_key_session_web.py b/scripts/python/tests/test_codex_key_session_web.py
new file mode 100644
index 0000000..afce691
--- /dev/null
+++ b/scripts/python/tests/test_codex_key_session_web.py
@@ -0,0 +1,45 @@
+"""Tests for the optional local web key session launcher."""
+
+import importlib.util
+import unittest
+from pathlib import Path
+from unittest.mock import MagicMock
+
+
+MODULE_PATH = Path(__file__).resolve().parents[1] / "codex_key_session_web.py"
+SPEC = importlib.util.spec_from_file_location("codex_key_session_web", MODULE_PATH)
+if SPEC is None or SPEC.loader is None:
+    raise RuntimeError("Unable to load codex_key_session_web.py")
+WEB = importlib.util.module_from_spec(SPEC)
+SPEC.loader.exec_module(WEB)
+
+
+class KeySessionWebTests(unittest.TestCase):
+    def test_parse_args_defaults_to_localhost(self) -> None:
+        args = WEB.parse_args([])
+        self.assertEqual(args.host, "127.0.0.1")
+        self.assertEqual(args.ui_port, 8787)
+        self.assertEqual(args.proxy_port, 4000)
+
+    def test_log_message_is_suppressed(self) -> None:
+        handler = object.__new__(WEB.KeySessionHandler)
+        self.assertIsNone(handler.log_message("secret %s", "sk-test"))
+
+    def test_stop_proxy_terminates_running_process(self) -> None:
+        process = MagicMock()
+        process.poll.return_value = None
+        state = WEB.SessionState()
+        state.process = process
+
+        handler = object.__new__(WEB.KeySessionHandler)
+        handler.state = state
+        handler._stop_proxy("done")
+
+        process.terminate.assert_called_once()
+        process.wait.assert_called_once_with(timeout=8)
+        self.assertIsNone(state.process)
+        self.assertEqual(state.message, "done")
+
+
+if __name__ == "__main__":
+    unittest.main()