From cce915c9a642a8ad79fc09482af57359b1111d0d Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 13:11:07 +0100 Subject: [PATCH 1/4] feat: image fallback to Anthropic with on-the-wire model name swap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Routes image turns from the configured backend (DeepSeek/OpenRouter) to api.anthropic.com so Claude Code's vision capabilities work even when the active backend can't process image content blocks. The wire-side model name is swapped to the canonical Claude name on outbound (so Anthropic accepts the request) and back to the backend name on the inbound response (so Claude Code never sees the swap and the TUI keeps showing the backend model). Components: - launch_claude now starts the proxy and routes Claude Code through it (mirrors what --remote already did). Deliberately does not touch ANTHROPIC_AUTH_TOKEN — whatever credential Claude Code already carries (OAuth bearer from `claude login`, an explicit token, etc.) flows through and is what Anthropic sees on the image-reroute path. start_proxy is a shared helper that sets PROXY_PID/PROXY_PORT/PROXY_LOG as script globals; must not be called via $() — the EXIT trap depends on PROXY_PID reaching the parent shell. SCRIPT_DIR is symlink-resolved so deepclaude works when installed via ~/.local/bin symlink. - proxy/start-proxy.js legacy mode accepts an optional [defaultMode] third arg, threaded through as `defaultMode` so state.mode resolves to e.g. `deepseek` instead of `_single` and MODEL_REMAP[state.mode] fires. - proxy/model-proxy.js: - containsImageBlock walks tool_result.content[] recursively because Claude Code's Read tool wraps a returned PNG in tool_result rather than at the top of the message. - REVERSE_MODEL_REMAP is derived from MODEL_REMAP at module load. Many-to-one collisions (claude-opus-4-6 + claude-opus-4-7 both map to deepseek-v4-pro) collapse last-write-wins. - On forceAnthropicForImage, the proxy: (a) leaves the client auth header intact, (b) swaps body.model backend-name → canonical claude-* so Anthropic recognizes it, (c) drops `thinking` and `context_management` to avoid Anthropic 400s on clear_thinking_* strategies, (d) strips ALL thinking blocks (foreign backends emit signed-but-invalid ones). - UsageNormalizer (SSE) extended with optional modelRewrite, applied to message.model in message_start events to swap the canonical name back to the backend name on the wire to the client. - Non-streaming JSON responses get the same model-name swap on the response body before it's forwarded. - Cost tracking: image-rerouted turns bucket under `anthropic_max` (cost: 0, since Max is subscription quota); anthropic_equivalent still computed via PRICING_PER_M.anthropic so savings is truthful. - Single body parse in the request handler; outbound and content encoding stripped on proxy-mutated paths to avoid client-side ZlibError. Disable the whole feature with DEEPCLAUDE_IMAGE_FALLBACK=off. Co-Authored-By: Claude Opus 4.7 (1M context) --- deepclaude.sh | 91 +++++++++++++- proxy/model-proxy.js | 287 +++++++++++++++++++++++++++++++------------ proxy/start-proxy.js | 5 +- 3 files changed, 298 insertions(+), 85 deletions(-) diff --git a/deepclaude.sh b/deepclaude.sh index 5f59e3a..0b3ec46 100644 --- a/deepclaude.sh +++ b/deepclaude.sh @@ -4,7 +4,17 @@ set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# Resolve SCRIPT_DIR through any symlink chain (e.g. /usr/local/bin/deepclaude +# -> /path/to/repo/deepclaude.sh) so $SCRIPT_DIR/proxy/... works regardless of +# how the script was invoked. +_source="${BASH_SOURCE[0]}" +while [ -L "$_source" ]; do + _dir="$(cd "$(dirname "$_source")" && pwd)" + _source="$(readlink "$_source")" + [[ "$_source" != /* ]] && _source="$_dir/$_source" +done +SCRIPT_DIR="$(cd "$(dirname "$_source")" && pwd)" +unset _source _dir # --- Config --- DEEPSEEK_URL="https://api.deepseek.com/anthropic" @@ -85,6 +95,62 @@ set_model_env() { export CLAUDE_CODE_EFFORT_LEVEL="max" } +backend_long_name() { + case "$1" in + ds|deepseek) echo "deepseek" ;; + or|openrouter) echo "openrouter" ;; + fw|fireworks) echo "fireworks" ;; + anthropic) echo "anthropic" ;; + *) echo "ERROR: Unknown backend '$1'. Use: ds, or, fw, anthropic" >&2; return 1 ;; + esac +} + +# Starts proxy/start-proxy.js in the background and waits for it to bind a +# port. Sets PROXY_PID, PROXY_PORT, PROXY_LOG as script globals so the EXIT +# trap (cleanup_proxy) can see the pid. Must be called WITHOUT command +# substitution — $(start_proxy) would run in a subshell and the globals +# would never reach the parent. +# Requires: RESOLVED_URL, RESOLVED_KEY, BACKEND already set. +start_proxy() { + local backend_long + backend_long=$(backend_long_name "$BACKEND") || exit 1 + + PROXY_LOG="${PROXY_LOG:-/tmp/deepclaude-proxy.$$.log}" + : > "$PROXY_LOG" + node "$SCRIPT_DIR/proxy/start-proxy.js" "$RESOLVED_URL" "$RESOLVED_KEY" "$backend_long" >> "$PROXY_LOG" 2>&1 & + PROXY_PID=$! + + # The proxy emits a banner line, then a bare-numeric port line on a + # successful bind. Match the bare integer to skip the banner; do not + # introduce other numeric-only stdout in proxy startup. + local proxy_port="" + local tries=0 + while [[ -z "$proxy_port" ]] && [[ $tries -lt 30 ]]; do + if kill -0 "$PROXY_PID" 2>/dev/null; then + # `|| true`: with `set -o pipefail`, grep no-match (exit 1) + # would otherwise exit the script; we expect zero matches on + # early iterations before the proxy has emitted its port. + proxy_port=$(grep -E '^[0-9]+$' "$PROXY_LOG" 2>/dev/null | head -1 || true) + else + echo "ERROR: Proxy process died during startup" >&2 + echo " Log: $PROXY_LOG" >&2 + tail -20 "$PROXY_LOG" >&2 2>/dev/null + exit 1 + fi + [[ -z "$proxy_port" ]] && sleep 0.2 + tries=$((tries + 1)) + done + + if [[ -z "$proxy_port" ]]; then + echo "ERROR: Proxy failed to report a port within 6s" >&2 + echo " Log: $PROXY_LOG" >&2 + tail -20 "$PROXY_LOG" >&2 2>/dev/null + exit 1 + fi + + PROXY_PORT="$proxy_port" +} + show_status() { echo "" echo " deepclaude — Backend Status" @@ -207,17 +273,30 @@ launch_claude() { resolve_backend + echo " Starting model proxy for $BACKEND..." + # Call directly (not via $()): start_proxy sets PROXY_PID/PROXY_PORT/PROXY_LOG + # as script globals. A subshell would lose them and the EXIT trap would + # leak the node child. + start_proxy + echo " Proxy log: $PROXY_LOG" + echo " Launching Claude Code via $BACKEND..." - echo " Endpoint: $RESOLVED_URL" + echo " Proxy on :$PROXY_PORT -> $RESOLVED_URL" echo " Model: $RESOLVED_OPUS (main) + $RESOLVED_HAIKU (subagents)" echo "" - export ANTHROPIC_BASE_URL="$RESOLVED_URL" - export ANTHROPIC_AUTH_TOKEN="$RESOLVED_KEY" + # Route through the local proxy. The proxy holds the backend API key + # privately (passed via argv to start-proxy.js) and substitutes it for + # outbound requests to the backend. We deliberately leave Claude Code's + # own auth state alone — whatever it had (OAuth bearer from `claude + # login`, an existing ANTHROPIC_AUTH_TOKEN, etc.) flows through and is + # used only on the image-fallback path where the proxy reroutes to + # api.anthropic.com. + export ANTHROPIC_BASE_URL="http://127.0.0.1:$PROXY_PORT" set_model_env - unset ANTHROPIC_API_KEY - exec claude "$@" + # Don't `exec` — we want the EXIT trap to clean up the proxy. + claude "$@" } launch_remote() { diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 85a9295..c9b2669 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -6,6 +6,7 @@ import { Transform } from 'stream'; const ANTHROPIC_FALLBACK = 'https://api.anthropic.com'; const MODEL_PATHS = ['/v1/messages']; const REQUEST_TIMEOUT_MS = 5 * 60 * 1000; // 5 min per request +const IMAGE_FALLBACK_ENABLED = (process.env.DEEPCLAUDE_IMAGE_FALLBACK || 'anthropic') !== 'off'; const MODEL_REMAP = { deepseek: { @@ -24,26 +25,58 @@ const MODEL_REMAP = { }, }; +// Reverse of MODEL_REMAP: backend-specific name → canonical claude-* name. +// Used on image-fallback to translate the backend name Claude Code sent +// (e.g. `deepseek-v4-pro`) back to a Claude name Anthropic recognizes +// before the request leaves the proxy. The inbound response carries the +// Claude name back; we translate it again so Claude Code sees the +// backend name end-to-end and never knows about the swap. +// +// Many-to-one collisions (e.g. claude-opus-4-6 and claude-opus-4-7 both +// map to deepseek-v4-pro) collapse to "last-write-wins" — which means +// the most recent claude-* name in the table is what Anthropic gets +// for that backend slot. That's the right default. +const REVERSE_MODEL_REMAP = {}; +for (const [backend, table] of Object.entries(MODEL_REMAP)) { + REVERSE_MODEL_REMAP[backend] = {}; + for (const [claudeName, backendName] of Object.entries(table)) { + REVERSE_MODEL_REMAP[backend][backendName] = claudeName; + } +} + const PRICING_PER_M = { - deepseek: { input: 0.44, output: 0.87 }, - openrouter: { input: 0.44, output: 0.87 }, - fireworks: { input: 1.74, output: 3.48 }, - anthropic: { input: 3.00, output: 15.00 }, - _single: { input: 0.44, output: 0.87 }, + deepseek: { input: 0.44, output: 0.87 }, + openrouter: { input: 0.44, output: 0.87 }, + fireworks: { input: 1.74, output: 3.48 }, + anthropic: { input: 3.00, output: 15.00 }, + // Image-rerouted turns: Max OAuth consumes subscription quota, not + // per-token billing, so cost is 0. anthropic_equivalent (computed + // from PRICING_PER_M.anthropic in getCostSummary) still reflects + // what per-token Anthropic would have charged — feeding savings. + anthropic_max: { input: 0, output: 0 }, + _single: { input: 0.44, output: 0.87 }, }; /** * Transform stream that intercepts SSE events and injects missing `usage` * fields. DeepSeek/OpenRouter may omit `usage` in message_start or * message_delta, which crashes Claude Code ("$.input_tokens" is undefined). + * + * Optionally rewrites `message.model` in `message_start` events — used on + * the image-fallback path so Claude Code sees the backend model name on + * the response side even though Anthropic served the claude-* name on + * the wire. */ class UsageNormalizer extends Transform { - constructor(onUsage) { + constructor(onUsage, { modelRewrite } = {}) { super(); this._buf = ''; this._onUsage = onUsage; this._inputTokens = 0; this._outputTokens = 0; + // { from: 'claude-opus-4-7', to: 'deepseek-v4-pro' } — applied to + // message.model in message_start events. Null = no rewrite. + this._modelRewrite = modelRewrite || null; } _transform(chunk, _enc, cb) { @@ -69,6 +102,10 @@ class UsageNormalizer extends Transform { d.message.usage = { input_tokens: 0, output_tokens: 0 }; changed = true; } + if (this._modelRewrite && d.message.model === this._modelRewrite.from) { + d.message.model = this._modelRewrite.to; + changed = true; + } } if (d.type === 'message_delta') { if (d.usage) { @@ -122,6 +159,26 @@ function stripUnsignedThinkingBlocks(body) { } } +// True if any message contains an `image` content block, including nested +// inside `tool_result` content arrays (Claude Code's Read tool wraps a +// returned PNG in tool_result.content rather than at the top level). +function containsImageBlock(messages) { + if (!Array.isArray(messages)) return false; + const blockHasImage = (block) => { + if (!block) return false; + if (block.type === 'image') return true; + if (block.type === 'tool_result' && Array.isArray(block.content)) { + return block.content.some(blockHasImage); + } + return false; + }; + for (const msg of messages) { + if (!Array.isArray(msg.content)) continue; + if (msg.content.some(blockHasImage)) return true; + } + return false; +} + export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, defaultMode }) { return new Promise((resolve, reject) => { const initialTarget = new URL(targetUrl); @@ -270,87 +327,144 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, return; } - // In anthropic mode, everything passes through transparently - const isAnthropicMode = state.mode === 'anthropic'; - const isModelCall = !isAnthropicMode && MODEL_PATHS.includes(urlPath); - const dest = isModelCall ? state.target : new URL(ANTHROPIC_FALLBACK); - - // Build upstream path. target.pathname may overlap with - // clientReq.url (e.g. OpenRouter /api/v1 + /v1/messages). - // Strip the shared prefix to avoid /api/v1/v1/messages. - let fullPath; - if (isModelCall) { - const base = state.target.pathname.replace(/\/$/, ''); - let overlap = ''; - for (let i = 1; i <= Math.min(base.length, urlPath.length); i++) { - if (base.endsWith(urlPath.substring(0, i))) overlap = urlPath.substring(0, i); - } - fullPath = overlap ? base + urlPath.substring(overlap.length) : base + urlPath; - } else { - fullPath = clientReq.url; - } - const reqId = ++reqCount; const t0 = Date.now(); - if (isModelCall) { - console.log(`[MODEL-PROXY] #${reqId} → ${dest.hostname}${fullPath}`); - } + // Routing is deferred to the end-of-body handler so we can + // inspect the request body for image content blocks and flip a + // single request from a non-Anthropic backend to api.anthropic.com. + // Disable with DEEPCLAUDE_IMAGE_FALLBACK=off. + const chunks = []; + clientReq.on('data', c => chunks.push(c)); + clientReq.on('end', () => { + let body = Buffer.concat(chunks); + const isMessagesPath = MODEL_PATHS.includes(urlPath); + + // Single body parse. Downstream mutations all operate on + // `parsed` and we re-stringify once at the end. `parsed` + // stays null for non-messages paths or non-JSON bodies, in + // which case `body` is forwarded verbatim. + let parsed = null; + if (isMessagesPath) { + try { parsed = JSON.parse(body); } catch {} + } - const headers = { ...clientReq.headers, host: dest.host }; - delete headers['content-length']; + const forceAnthropicForImage = ( + IMAGE_FALLBACK_ENABLED && + state.mode !== 'anthropic' && + parsed && + containsImageBlock(parsed.messages) + ); + + const isAnthropicMode = state.mode === 'anthropic' || forceAnthropicForImage; + const isModelCall = !isAnthropicMode && isMessagesPath; + const trackUsage = isModelCall || forceAnthropicForImage; + const dest = isModelCall ? state.target : new URL(ANTHROPIC_FALLBACK); + // Image-rerouted turns get their own bucket: cost=0 (Max is + // subscription quota, not per-token) so total_cost stays + // truthful, while anthropic_equivalent still reflects what + // per-token Anthropic would have charged — feeding savings. + const effectiveMode = forceAnthropicForImage ? 'anthropic_max' : state.mode; + + // For image-reroute, capture the backend model name so we + // can swap it to the canonical claude-* name on outbound + // (Anthropic doesn't recognize backend names like + // `deepseek-v4-pro`) and restore the backend name on the + // inbound response — Claude Code never sees the swap. + let imageRewrite = null; + if (forceAnthropicForImage && parsed?.model) { + const canonical = REVERSE_MODEL_REMAP[state.mode]?.[parsed.model]; + if (canonical) { + imageRewrite = { backend: parsed.model, canonical }; + } + } - if (isModelCall) { - delete headers['authorization']; - delete headers['x-api-key']; - if (state.useBearer) { - headers['authorization'] = `Bearer ${state.apiKey}`; + // Build upstream path. target.pathname may overlap with + // clientReq.url (e.g. OpenRouter /api/v1 + /v1/messages). + // Strip the shared prefix to avoid /api/v1/v1/messages. + let fullPath; + if (isModelCall) { + const base = state.target.pathname.replace(/\/$/, ''); + let overlap = ''; + for (let i = 1; i <= Math.min(base.length, urlPath.length); i++) { + if (base.endsWith(urlPath.substring(0, i))) overlap = urlPath.substring(0, i); + } + fullPath = overlap ? base + urlPath.substring(overlap.length) : base + urlPath; } else { - headers['x-api-key'] = state.apiKey; + fullPath = clientReq.url; } - } - const chunks = []; - clientReq.on('data', c => chunks.push(c)); - clientReq.on('end', () => { - let body = Buffer.concat(chunks); + if (trackUsage) { + const tag = forceAnthropicForImage + ? ` [image→anthropic${imageRewrite ? `, ${imageRewrite.backend}→${imageRewrite.canonical}` : ''}]` + : ''; + console.log(`[MODEL-PROXY] #${reqId} → ${dest.hostname}${fullPath}${tag}`); + } + + const headers = { ...clientReq.headers, host: dest.host }; + delete headers['content-length']; + // Force plain bytes upstream — the proxy mutates response + // bodies (UsageNormalizer toString'es bytes, normalizeJsonBody + // reparses) and would otherwise emit a content-encoding: gzip + // header followed by non-gzip bytes, breaking the client with + // "Decompression error: ZlibError". + delete headers['accept-encoding']; + + if (isModelCall) { + delete headers['authorization']; + delete headers['x-api-key']; + if (state.useBearer) { + headers['authorization'] = `Bearer ${state.apiKey}`; + } else { + headers['x-api-key'] = state.apiKey; + } + } + // For forceAnthropicForImage we leave the client's auth + // headers intact — whatever Claude Code is carrying (OAuth + // bearer from `claude login`, an explicit + // ANTHROPIC_AUTH_TOKEN, etc.) is what Anthropic sees. + + // Body mutations on the parsed object, in order: + // - Image-reroute: drop `thinking` and `context_management` + // (clear_thinking_* requires thinking enabled; Anthropic + // 400s on the mismatch). Swap model → canonical claude-*. + // - Model call: remap Anthropic model names to the + // backend-specific name. + // - Thinking-block strip: + // Anthropic + (prior non-Anthropic session OR + // image-routed) → strip ALL (foreign backends emit + // signed-but-invalid blocks). + // Anthropic + pure Anthropic session → strip unsigned. + // Non-Anthropic model call → strip ALL (backends reject + // blocks they didn't generate). + if (parsed) { + if (forceAnthropicForImage) { + delete parsed.thinking; + delete parsed.context_management; + if (imageRewrite) { + parsed.model = imageRewrite.canonical; + } + } - // Remap Anthropic model names to backend-specific names - if (isModelCall && MODEL_REMAP[state.mode]) { - try { - const parsed = JSON.parse(body); + if (isModelCall && MODEL_REMAP[state.mode]) { const mapped = MODEL_REMAP[state.mode][parsed.model]; if (mapped) { console.log(`[MODEL-PROXY] #${reqId} model remap: ${parsed.model} → ${mapped}`); parsed.model = mapped; - body = Buffer.from(JSON.stringify(parsed)); } - } catch { /* not JSON or parse error, pass through */ } - } + } - // Strip thinking blocks before forwarding. - // Non-Anthropic: strip ALL blocks — backends reject thinking blocks - // they didn't generate, even unsigned ones. - // Anthropic after a non-Anthropic session: also strip ALL, because - // foreign backends generate signed-but-invalid thinking blocks that - // stripUnsignedThinkingBlocks passes through, causing Anthropic 400s. - if (isAnthropicMode && MODEL_PATHS.includes(urlPath)) { - try { - const parsed = JSON.parse(body); - if (state.hadNonAnthropicSession) { + if (isAnthropicMode) { + if (state.hadNonAnthropicSession || forceAnthropicForImage) { stripAllThinkingBlocks(parsed); } else { stripUnsignedThinkingBlocks(parsed); } - body = Buffer.from(JSON.stringify(parsed)); - } catch { /* pass through */ } - } - if (isModelCall) { - try { - const parsed = JSON.parse(body); + } else if (isModelCall) { stripAllThinkingBlocks(parsed); - body = Buffer.from(JSON.stringify(parsed)); - } catch { /* pass through */ } + } + + body = Buffer.from(JSON.stringify(parsed)); } const opts = { @@ -363,7 +477,7 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, }; const proxyReq = httpsRequest(opts, (proxyRes) => { - if (isModelCall) { + if (trackUsage) { const ttfb = Date.now() - t0; console.log(`[MODEL-PROXY] #${reqId} TTFB ${ttfb}ms (status ${proxyRes.statusCode})`); } @@ -371,33 +485,52 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const ct = proxyRes.headers['content-type'] || ''; const isSSE = ct.includes('text/event-stream'); - if (isModelCall && isSSE) { - clientRes.writeHead(proxyRes.statusCode, proxyRes.headers); - const norm = new UsageNormalizer((inp, out) => recordUsage(state.mode, inp, out)); + if (trackUsage && isSSE) { + // Strip content-encoding from forwarded headers — proxy + // mutates the body via UsageNormalizer (toString on + // bytes), so any upstream gzip bytes would arrive at + // the client with the gzip header but non-gzip payload. + const { 'content-encoding': _ce1, ...sseHeaders } = proxyRes.headers; + clientRes.writeHead(proxyRes.statusCode, sseHeaders); + const modelRewrite = imageRewrite + ? { from: imageRewrite.canonical, to: imageRewrite.backend } + : null; + const norm = new UsageNormalizer( + (inp, out) => recordUsage(effectiveMode, inp, out), + { modelRewrite }, + ); proxyRes.pipe(norm).pipe(clientRes); proxyRes.on('end', () => { console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (${norm._inputTokens}in/${norm._outputTokens}out)`); }); - } else if (isModelCall && ct.includes('application/json')) { + } else if (trackUsage && ct.includes('application/json')) { const respChunks = []; proxyRes.on('data', c => respChunks.push(c)); proxyRes.on('end', () => { const raw = Buffer.concat(respChunks); - const fixed = normalizeJsonBody(raw); + let fixed = normalizeJsonBody(raw); try { const j = JSON.parse(fixed); - if (j.usage) recordUsage(state.mode, j.usage.input_tokens, j.usage.output_tokens); + if (j.usage) recordUsage(effectiveMode, j.usage.input_tokens, j.usage.output_tokens); + // Image-reroute: swap response.model back from + // canonical claude-* to the backend name + // Claude Code originally sent. + if (imageRewrite && j.model === imageRewrite.canonical) { + j.model = imageRewrite.backend; + fixed = Buffer.from(JSON.stringify(j)); + } } catch {} - const outHeaders = { ...proxyRes.headers, 'content-length': fixed.length }; + const { 'content-encoding': _ce2, ...jsonHeaders } = proxyRes.headers; + const outHeaders = { ...jsonHeaders, 'content-length': fixed.length }; clientRes.writeHead(proxyRes.statusCode, outHeaders); clientRes.end(fixed); console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (json, ${fixed.length}b)`); }); } else { - // Non-model or unknown content-type: pass through + // Non-model or unknown content-type: pass through unchanged. clientRes.writeHead(proxyRes.statusCode, proxyRes.headers); proxyRes.pipe(clientRes); - if (isModelCall) { + if (trackUsage) { proxyRes.on('end', () => { console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s`); }); diff --git a/proxy/start-proxy.js b/proxy/start-proxy.js index 5847076..cb57f29 100644 --- a/proxy/start-proxy.js +++ b/proxy/start-proxy.js @@ -7,9 +7,10 @@ const BACKEND_DEFS = { fireworks: { url: 'https://api.fireworks.ai/inference/v1', keyEnv: 'FIREWORKS_API_KEY' }, }; -// Legacy mode: start-proxy.js (used by deepclaude.sh/ps1) +// Legacy mode: start-proxy.js [defaultMode] (used by deepclaude.sh/ps1) const targetUrl = process.argv[2] || process.env.CHEAPCLAUDE_TARGET_URL; const apiKey = process.argv[3] || process.env.CHEAPCLAUDE_API_KEY; +const legacyDefaultMode = process.argv[4] || process.env.CHEAPCLAUDE_DEFAULT_MODE; if (targetUrl && apiKey) { // Legacy single-backend mode @@ -24,7 +25,7 @@ if (targetUrl && apiKey) { targetUrl, apiKey, backends: hasBackends ? backends : undefined, - defaultMode: hasBackends ? undefined : undefined, + defaultMode: legacyDefaultMode || undefined, }); console.log(port); } else { From 4525dd07f274af8eefcb2643d9f2e37add14c987 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 13:11:41 +0100 Subject: [PATCH 2/4] chore: mark deepclaude.sh executable So plain `deepclaude` works from a symlink in PATH without users needing to chmod +x after every checkout/branch switch. Co-Authored-By: Claude Opus 4.7 (1M context) --- deepclaude.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 deepclaude.sh diff --git a/deepclaude.sh b/deepclaude.sh old mode 100644 new mode 100755 From 8594594c903896b74de4c5bda291efdb7c35cbc1 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 13:35:19 +0100 Subject: [PATCH 3/4] refactor: trim comments to WHY-only Removed narration and code-walking commentary; kept the non-obvious-WHY notes (clear_thinking_* mismatch, foreign-backend signed thinking blocks, ZlibError on mutated bodies, EXIT-trap preservation, last-write-wins on REVERSE_MODEL_REMAP collisions, must-not-be-\$()-on-start_proxy). Architectural reasoning moves to the PR description where it belongs. Co-Authored-By: Claude Opus 4.7 (1M context) --- deepclaude.sh | 23 ++++--------- proxy/model-proxy.js | 81 ++++++++++++-------------------------------- 2 files changed, 29 insertions(+), 75 deletions(-) diff --git a/deepclaude.sh b/deepclaude.sh index 0b3ec46..0ab5e0c 100755 --- a/deepclaude.sh +++ b/deepclaude.sh @@ -105,11 +105,9 @@ backend_long_name() { esac } -# Starts proxy/start-proxy.js in the background and waits for it to bind a -# port. Sets PROXY_PID, PROXY_PORT, PROXY_LOG as script globals so the EXIT -# trap (cleanup_proxy) can see the pid. Must be called WITHOUT command -# substitution — $(start_proxy) would run in a subshell and the globals -# would never reach the parent. +# Sets PROXY_PID, PROXY_PORT, PROXY_LOG as script globals so the EXIT trap +# can clean up the node child. Must be called WITHOUT command substitution +# — $(start_proxy) runs in a subshell and globals never reach the parent. # Requires: RESOLVED_URL, RESOLVED_KEY, BACKEND already set. start_proxy() { local backend_long @@ -274,9 +272,6 @@ launch_claude() { resolve_backend echo " Starting model proxy for $BACKEND..." - # Call directly (not via $()): start_proxy sets PROXY_PID/PROXY_PORT/PROXY_LOG - # as script globals. A subshell would lose them and the EXIT trap would - # leak the node child. start_proxy echo " Proxy log: $PROXY_LOG" @@ -285,17 +280,13 @@ launch_claude() { echo " Model: $RESOLVED_OPUS (main) + $RESOLVED_HAIKU (subagents)" echo "" - # Route through the local proxy. The proxy holds the backend API key - # privately (passed via argv to start-proxy.js) and substitutes it for - # outbound requests to the backend. We deliberately leave Claude Code's - # own auth state alone — whatever it had (OAuth bearer from `claude - # login`, an existing ANTHROPIC_AUTH_TOKEN, etc.) flows through and is - # used only on the image-fallback path where the proxy reroutes to - # api.anthropic.com. export ANTHROPIC_BASE_URL="http://127.0.0.1:$PROXY_PORT" set_model_env + # Deliberately do not unset ANTHROPIC_AUTH_TOKEN — whatever Claude Code + # is carrying is what authenticates at Anthropic on the image-reroute + # path; the proxy injects backend auth for non-image turns separately. - # Don't `exec` — we want the EXIT trap to clean up the proxy. + # Don't `exec` — the EXIT trap needs to fire to stop the proxy. claude "$@" } diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index c9b2669..36d9144 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -25,17 +25,9 @@ const MODEL_REMAP = { }, }; -// Reverse of MODEL_REMAP: backend-specific name → canonical claude-* name. -// Used on image-fallback to translate the backend name Claude Code sent -// (e.g. `deepseek-v4-pro`) back to a Claude name Anthropic recognizes -// before the request leaves the proxy. The inbound response carries the -// Claude name back; we translate it again so Claude Code sees the -// backend name end-to-end and never knows about the swap. -// -// Many-to-one collisions (e.g. claude-opus-4-6 and claude-opus-4-7 both -// map to deepseek-v4-pro) collapse to "last-write-wins" — which means -// the most recent claude-* name in the table is what Anthropic gets -// for that backend slot. That's the right default. +// Many-to-one collisions in MODEL_REMAP collapse last-write-wins — so +// `deepseek-v4-pro` reverses to `claude-opus-4-7` (the most recent key), +// not `claude-opus-4-6`. That's the right default. const REVERSE_MODEL_REMAP = {}; for (const [backend, table] of Object.entries(MODEL_REMAP)) { REVERSE_MODEL_REMAP[backend] = {}; @@ -49,10 +41,7 @@ const PRICING_PER_M = { openrouter: { input: 0.44, output: 0.87 }, fireworks: { input: 1.74, output: 3.48 }, anthropic: { input: 3.00, output: 15.00 }, - // Image-rerouted turns: Max OAuth consumes subscription quota, not - // per-token billing, so cost is 0. anthropic_equivalent (computed - // from PRICING_PER_M.anthropic in getCostSummary) still reflects - // what per-token Anthropic would have charged — feeding savings. + // Max OAuth burns subscription quota, not per-token cost. anthropic_max: { input: 0, output: 0 }, _single: { input: 0.44, output: 0.87 }, }; @@ -74,8 +63,6 @@ class UsageNormalizer extends Transform { this._onUsage = onUsage; this._inputTokens = 0; this._outputTokens = 0; - // { from: 'claude-opus-4-7', to: 'deepseek-v4-pro' } — applied to - // message.model in message_start events. Null = no rewrite. this._modelRewrite = modelRewrite || null; } @@ -330,20 +317,15 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const reqId = ++reqCount; const t0 = Date.now(); - // Routing is deferred to the end-of-body handler so we can - // inspect the request body for image content blocks and flip a - // single request from a non-Anthropic backend to api.anthropic.com. - // Disable with DEEPCLAUDE_IMAGE_FALLBACK=off. + // Routing is deferred to the end-of-body handler so the body + // can be inspected for image content blocks before the + // dest/headers are decided. const chunks = []; clientReq.on('data', c => chunks.push(c)); clientReq.on('end', () => { let body = Buffer.concat(chunks); const isMessagesPath = MODEL_PATHS.includes(urlPath); - // Single body parse. Downstream mutations all operate on - // `parsed` and we re-stringify once at the end. `parsed` - // stays null for non-messages paths or non-JSON bodies, in - // which case `body` is forwarded verbatim. let parsed = null; if (isMessagesPath) { try { parsed = JSON.parse(body); } catch {} @@ -360,17 +342,10 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const isModelCall = !isAnthropicMode && isMessagesPath; const trackUsage = isModelCall || forceAnthropicForImage; const dest = isModelCall ? state.target : new URL(ANTHROPIC_FALLBACK); - // Image-rerouted turns get their own bucket: cost=0 (Max is - // subscription quota, not per-token) so total_cost stays - // truthful, while anthropic_equivalent still reflects what - // per-token Anthropic would have charged — feeding savings. const effectiveMode = forceAnthropicForImage ? 'anthropic_max' : state.mode; - // For image-reroute, capture the backend model name so we - // can swap it to the canonical claude-* name on outbound - // (Anthropic doesn't recognize backend names like - // `deepseek-v4-pro`) and restore the backend name on the - // inbound response — Claude Code never sees the swap. + // Two-sided swap: outbound to Anthropic, reversed on the + // response so Claude Code never sees a non-backend name. let imageRewrite = null; if (forceAnthropicForImage && parsed?.model) { const canonical = REVERSE_MODEL_REMAP[state.mode]?.[parsed.model]; @@ -419,25 +394,13 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, headers['x-api-key'] = state.apiKey; } } - // For forceAnthropicForImage we leave the client's auth - // headers intact — whatever Claude Code is carrying (OAuth - // bearer from `claude login`, an explicit - // ANTHROPIC_AUTH_TOKEN, etc.) is what Anthropic sees. - - // Body mutations on the parsed object, in order: - // - Image-reroute: drop `thinking` and `context_management` - // (clear_thinking_* requires thinking enabled; Anthropic - // 400s on the mismatch). Swap model → canonical claude-*. - // - Model call: remap Anthropic model names to the - // backend-specific name. - // - Thinking-block strip: - // Anthropic + (prior non-Anthropic session OR - // image-routed) → strip ALL (foreign backends emit - // signed-but-invalid blocks). - // Anthropic + pure Anthropic session → strip unsigned. - // Non-Anthropic model call → strip ALL (backends reject - // blocks they didn't generate). + // forceAnthropicForImage path leaves the client auth header + // intact — whatever Claude Code is carrying authenticates + // at Anthropic. + if (parsed) { + // clear_thinking_* requires thinking enabled; Anthropic + // 400s on the mismatch. if (forceAnthropicForImage) { delete parsed.thinking; delete parsed.context_management; @@ -454,6 +417,10 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, } } + // Foreign backends emit signed-but-invalid thinking + // blocks; strip ALL when crossing into or out of one. + // Pure Anthropic sessions strip only unsigned, preserving + // valid signed blocks for continuity. if (isAnthropicMode) { if (state.hadNonAnthropicSession || forceAnthropicForImage) { stripAllThinkingBlocks(parsed); @@ -486,10 +453,9 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const isSSE = ct.includes('text/event-stream'); if (trackUsage && isSSE) { - // Strip content-encoding from forwarded headers — proxy - // mutates the body via UsageNormalizer (toString on - // bytes), so any upstream gzip bytes would arrive at - // the client with the gzip header but non-gzip payload. + // Mirror of the accept-encoding strip on the request + // side — the response body is being mutated, so the + // forwarded headers must not advertise gzip. const { 'content-encoding': _ce1, ...sseHeaders } = proxyRes.headers; clientRes.writeHead(proxyRes.statusCode, sseHeaders); const modelRewrite = imageRewrite @@ -512,9 +478,6 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, try { const j = JSON.parse(fixed); if (j.usage) recordUsage(effectiveMode, j.usage.input_tokens, j.usage.output_tokens); - // Image-reroute: swap response.model back from - // canonical claude-* to the backend name - // Claude Code originally sent. if (imageRewrite && j.model === imageRewrite.canonical) { j.model = imageRewrite.backend; fixed = Buffer.from(JSON.stringify(j)); From 87b4e2ec8e8405c99c84812d14807c6dd528ec68 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 7 May 2026 13:52:06 +0100 Subject: [PATCH 4/4] fix: strip stale images so image-bearing conversations return to backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, attaching one image silently pinned every subsequent turn to Anthropic Max OAuth — the TUI kept showing the cheap backend while the user's Max quota bled out, defeating the cost-savings premise of the product. Two changes: 1. forceAnthropicForImage now triggers only on images in the LATEST message (a fresh attachment, or a Read tool_result that just came back), not anywhere in conversation history. 2. On non-Anthropic routes, walk parsed.messages and replace every image content block (including those nested in tool_result.content[]) with a text placeholder. Text follow-ups now route back to DeepSeek and work from the assistant's prior textual description of the image. Trade-off: a question that genuinely needs to look at the pixels again ("what color is the third building from the left?") will not have access to the image after the strip. The conversation can re-Read the file if needed, which produces a fresh image in the latest message and routes that single turn back to Anthropic. Co-Authored-By: Claude Opus 4.7 (1M context) --- proxy/model-proxy.js | 57 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 36d9144..446ae03 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -25,9 +25,7 @@ const MODEL_REMAP = { }, }; -// Many-to-one collisions in MODEL_REMAP collapse last-write-wins — so -// `deepseek-v4-pro` reverses to `claude-opus-4-7` (the most recent key), -// not `claude-opus-4-6`. That's the right default. +// Many-to-one collisions in MODEL_REMAP collapse last-write-wins. const REVERSE_MODEL_REMAP = {}; for (const [backend, table] of Object.entries(MODEL_REMAP)) { REVERSE_MODEL_REMAP[backend] = {}; @@ -146,9 +144,8 @@ function stripUnsignedThinkingBlocks(body) { } } -// True if any message contains an `image` content block, including nested -// inside `tool_result` content arrays (Claude Code's Read tool wraps a -// returned PNG in tool_result.content rather than at the top level). +// Recurses into tool_result.content[] because Claude Code's Read tool +// wraps a returned PNG there rather than at the top of the message. function containsImageBlock(messages) { if (!Array.isArray(messages)) return false; const blockHasImage = (block) => { @@ -166,6 +163,32 @@ function containsImageBlock(messages) { return false; } +// Replaces image blocks with a text placeholder, recursing into +// tool_result.content[]. Used on non-Anthropic routes so a single image +// turn earlier in history doesn't pin the rest of the conversation to +// Anthropic — which would silently spend Max quota while the TUI still +// shows the cheap backend. Returns the count of images replaced. +function stripImagesFromMessages(messages) { + if (!Array.isArray(messages)) return 0; + let count = 0; + const strip = (block) => { + if (!block) return block; + if (block.type === 'image') { + count++; + return { type: 'text', text: '[image omitted]' }; + } + if (block.type === 'tool_result' && Array.isArray(block.content)) { + return { ...block, content: block.content.map(strip) }; + } + return block; + }; + for (const msg of messages) { + if (!Array.isArray(msg.content)) continue; + msg.content = msg.content.map(strip); + } + return count; +} + export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, defaultMode }) { return new Promise((resolve, reject) => { const initialTarget = new URL(targetUrl); @@ -331,11 +354,17 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, try { parsed = JSON.parse(body); } catch {} } + // Only the LATEST message triggers the Anthropic reroute + // (a fresh attachment, or a Read tool_result that just came + // back). Stale images in older history are stripped below + // so the conversation can return to the backend on text-only + // follow-ups instead of silently pinning to Max quota. + const lastMsg = parsed?.messages?.[parsed.messages.length - 1]; const forceAnthropicForImage = ( IMAGE_FALLBACK_ENABLED && state.mode !== 'anthropic' && - parsed && - containsImageBlock(parsed.messages) + !!lastMsg && + containsImageBlock([lastMsg]) ); const isAnthropicMode = state.mode === 'anthropic' || forceAnthropicForImage; @@ -409,6 +438,18 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, } } + // Strip stale images from history on non-Anthropic + // routes. Without this, every text follow-up on an + // image-bearing conversation would still detect the + // image and re-route to Anthropic — burning Max quota + // while the TUI advertises the cheap backend. + if (isModelCall) { + const stripped = stripImagesFromMessages(parsed.messages); + if (stripped > 0) { + console.log(`[MODEL-PROXY] #${reqId} stripped ${stripped} stale image block${stripped === 1 ? '' : 's'} from history`); + } + } + if (isModelCall && MODEL_REMAP[state.mode]) { const mapped = MODEL_REMAP[state.mode][parsed.model]; if (mapped) {