diff --git a/deepclaude.sh b/deepclaude.sh old mode 100644 new mode 100755 index 5f59e3a..0ab5e0c --- a/deepclaude.sh +++ b/deepclaude.sh @@ -4,7 +4,17 @@ set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# Resolve SCRIPT_DIR through any symlink chain (e.g. /usr/local/bin/deepclaude +# -> /path/to/repo/deepclaude.sh) so $SCRIPT_DIR/proxy/... works regardless of +# how the script was invoked. +_source="${BASH_SOURCE[0]}" +while [ -L "$_source" ]; do + _dir="$(cd "$(dirname "$_source")" && pwd)" + _source="$(readlink "$_source")" + [[ "$_source" != /* ]] && _source="$_dir/$_source" +done +SCRIPT_DIR="$(cd "$(dirname "$_source")" && pwd)" +unset _source _dir # --- Config --- DEEPSEEK_URL="https://api.deepseek.com/anthropic" @@ -85,6 +95,60 @@ set_model_env() { export CLAUDE_CODE_EFFORT_LEVEL="max" } +backend_long_name() { + case "$1" in + ds|deepseek) echo "deepseek" ;; + or|openrouter) echo "openrouter" ;; + fw|fireworks) echo "fireworks" ;; + anthropic) echo "anthropic" ;; + *) echo "ERROR: Unknown backend '$1'. Use: ds, or, fw, anthropic" >&2; return 1 ;; + esac +} + +# Sets PROXY_PID, PROXY_PORT, PROXY_LOG as script globals so the EXIT trap +# can clean up the node child. Must be called WITHOUT command substitution +# — $(start_proxy) runs in a subshell and globals never reach the parent. +# Requires: RESOLVED_URL, RESOLVED_KEY, BACKEND already set. +start_proxy() { + local backend_long + backend_long=$(backend_long_name "$BACKEND") || exit 1 + + PROXY_LOG="${PROXY_LOG:-/tmp/deepclaude-proxy.$$.log}" + : > "$PROXY_LOG" + node "$SCRIPT_DIR/proxy/start-proxy.js" "$RESOLVED_URL" "$RESOLVED_KEY" "$backend_long" >> "$PROXY_LOG" 2>&1 & + PROXY_PID=$! + + # The proxy emits a banner line, then a bare-numeric port line on a + # successful bind. Match the bare integer to skip the banner; do not + # introduce other numeric-only stdout in proxy startup. + local proxy_port="" + local tries=0 + while [[ -z "$proxy_port" ]] && [[ $tries -lt 30 ]]; do + if kill -0 "$PROXY_PID" 2>/dev/null; then + # `|| true`: with `set -o pipefail`, grep no-match (exit 1) + # would otherwise exit the script; we expect zero matches on + # early iterations before the proxy has emitted its port. + proxy_port=$(grep -E '^[0-9]+$' "$PROXY_LOG" 2>/dev/null | head -1 || true) + else + echo "ERROR: Proxy process died during startup" >&2 + echo " Log: $PROXY_LOG" >&2 + tail -20 "$PROXY_LOG" >&2 2>/dev/null + exit 1 + fi + [[ -z "$proxy_port" ]] && sleep 0.2 + tries=$((tries + 1)) + done + + if [[ -z "$proxy_port" ]]; then + echo "ERROR: Proxy failed to report a port within 6s" >&2 + echo " Log: $PROXY_LOG" >&2 + tail -20 "$PROXY_LOG" >&2 2>/dev/null + exit 1 + fi + + PROXY_PORT="$proxy_port" +} + show_status() { echo "" echo " deepclaude — Backend Status" @@ -207,17 +271,23 @@ launch_claude() { resolve_backend + echo " Starting model proxy for $BACKEND..." + start_proxy + echo " Proxy log: $PROXY_LOG" + echo " Launching Claude Code via $BACKEND..." - echo " Endpoint: $RESOLVED_URL" + echo " Proxy on :$PROXY_PORT -> $RESOLVED_URL" echo " Model: $RESOLVED_OPUS (main) + $RESOLVED_HAIKU (subagents)" echo "" - export ANTHROPIC_BASE_URL="$RESOLVED_URL" - export ANTHROPIC_AUTH_TOKEN="$RESOLVED_KEY" + export ANTHROPIC_BASE_URL="http://127.0.0.1:$PROXY_PORT" set_model_env - unset ANTHROPIC_API_KEY + # Deliberately do not unset ANTHROPIC_AUTH_TOKEN — whatever Claude Code + # is carrying is what authenticates at Anthropic on the image-reroute + # path; the proxy injects backend auth for non-image turns separately. - exec claude "$@" + # Don't `exec` — the EXIT trap needs to fire to stop the proxy. + claude "$@" } launch_remote() { diff --git a/proxy/model-proxy.js b/proxy/model-proxy.js index 85a9295..446ae03 100644 --- a/proxy/model-proxy.js +++ b/proxy/model-proxy.js @@ -6,6 +6,7 @@ import { Transform } from 'stream'; const ANTHROPIC_FALLBACK = 'https://api.anthropic.com'; const MODEL_PATHS = ['/v1/messages']; const REQUEST_TIMEOUT_MS = 5 * 60 * 1000; // 5 min per request +const IMAGE_FALLBACK_ENABLED = (process.env.DEEPCLAUDE_IMAGE_FALLBACK || 'anthropic') !== 'off'; const MODEL_REMAP = { deepseek: { @@ -24,26 +25,43 @@ const MODEL_REMAP = { }, }; +// Many-to-one collisions in MODEL_REMAP collapse last-write-wins. +const REVERSE_MODEL_REMAP = {}; +for (const [backend, table] of Object.entries(MODEL_REMAP)) { + REVERSE_MODEL_REMAP[backend] = {}; + for (const [claudeName, backendName] of Object.entries(table)) { + REVERSE_MODEL_REMAP[backend][backendName] = claudeName; + } +} + const PRICING_PER_M = { - deepseek: { input: 0.44, output: 0.87 }, - openrouter: { input: 0.44, output: 0.87 }, - fireworks: { input: 1.74, output: 3.48 }, - anthropic: { input: 3.00, output: 15.00 }, - _single: { input: 0.44, output: 0.87 }, + deepseek: { input: 0.44, output: 0.87 }, + openrouter: { input: 0.44, output: 0.87 }, + fireworks: { input: 1.74, output: 3.48 }, + anthropic: { input: 3.00, output: 15.00 }, + // Max OAuth burns subscription quota, not per-token cost. + anthropic_max: { input: 0, output: 0 }, + _single: { input: 0.44, output: 0.87 }, }; /** * Transform stream that intercepts SSE events and injects missing `usage` * fields. DeepSeek/OpenRouter may omit `usage` in message_start or * message_delta, which crashes Claude Code ("$.input_tokens" is undefined). + * + * Optionally rewrites `message.model` in `message_start` events — used on + * the image-fallback path so Claude Code sees the backend model name on + * the response side even though Anthropic served the claude-* name on + * the wire. */ class UsageNormalizer extends Transform { - constructor(onUsage) { + constructor(onUsage, { modelRewrite } = {}) { super(); this._buf = ''; this._onUsage = onUsage; this._inputTokens = 0; this._outputTokens = 0; + this._modelRewrite = modelRewrite || null; } _transform(chunk, _enc, cb) { @@ -69,6 +87,10 @@ class UsageNormalizer extends Transform { d.message.usage = { input_tokens: 0, output_tokens: 0 }; changed = true; } + if (this._modelRewrite && d.message.model === this._modelRewrite.from) { + d.message.model = this._modelRewrite.to; + changed = true; + } } if (d.type === 'message_delta') { if (d.usage) { @@ -122,6 +144,51 @@ function stripUnsignedThinkingBlocks(body) { } } +// Recurses into tool_result.content[] because Claude Code's Read tool +// wraps a returned PNG there rather than at the top of the message. +function containsImageBlock(messages) { + if (!Array.isArray(messages)) return false; + const blockHasImage = (block) => { + if (!block) return false; + if (block.type === 'image') return true; + if (block.type === 'tool_result' && Array.isArray(block.content)) { + return block.content.some(blockHasImage); + } + return false; + }; + for (const msg of messages) { + if (!Array.isArray(msg.content)) continue; + if (msg.content.some(blockHasImage)) return true; + } + return false; +} + +// Replaces image blocks with a text placeholder, recursing into +// tool_result.content[]. Used on non-Anthropic routes so a single image +// turn earlier in history doesn't pin the rest of the conversation to +// Anthropic — which would silently spend Max quota while the TUI still +// shows the cheap backend. Returns the count of images replaced. +function stripImagesFromMessages(messages) { + if (!Array.isArray(messages)) return 0; + let count = 0; + const strip = (block) => { + if (!block) return block; + if (block.type === 'image') { + count++; + return { type: 'text', text: '[image omitted]' }; + } + if (block.type === 'tool_result' && Array.isArray(block.content)) { + return { ...block, content: block.content.map(strip) }; + } + return block; + }; + for (const msg of messages) { + if (!Array.isArray(msg.content)) continue; + msg.content = msg.content.map(strip); + } + return count; +} + export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, defaultMode }) { return new Promise((resolve, reject) => { const initialTarget = new URL(targetUrl); @@ -270,87 +337,142 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, return; } - // In anthropic mode, everything passes through transparently - const isAnthropicMode = state.mode === 'anthropic'; - const isModelCall = !isAnthropicMode && MODEL_PATHS.includes(urlPath); - const dest = isModelCall ? state.target : new URL(ANTHROPIC_FALLBACK); - - // Build upstream path. target.pathname may overlap with - // clientReq.url (e.g. OpenRouter /api/v1 + /v1/messages). - // Strip the shared prefix to avoid /api/v1/v1/messages. - let fullPath; - if (isModelCall) { - const base = state.target.pathname.replace(/\/$/, ''); - let overlap = ''; - for (let i = 1; i <= Math.min(base.length, urlPath.length); i++) { - if (base.endsWith(urlPath.substring(0, i))) overlap = urlPath.substring(0, i); - } - fullPath = overlap ? base + urlPath.substring(overlap.length) : base + urlPath; - } else { - fullPath = clientReq.url; - } - const reqId = ++reqCount; const t0 = Date.now(); - if (isModelCall) { - console.log(`[MODEL-PROXY] #${reqId} → ${dest.hostname}${fullPath}`); - } + // Routing is deferred to the end-of-body handler so the body + // can be inspected for image content blocks before the + // dest/headers are decided. + const chunks = []; + clientReq.on('data', c => chunks.push(c)); + clientReq.on('end', () => { + let body = Buffer.concat(chunks); + const isMessagesPath = MODEL_PATHS.includes(urlPath); - const headers = { ...clientReq.headers, host: dest.host }; - delete headers['content-length']; + let parsed = null; + if (isMessagesPath) { + try { parsed = JSON.parse(body); } catch {} + } + + // Only the LATEST message triggers the Anthropic reroute + // (a fresh attachment, or a Read tool_result that just came + // back). Stale images in older history are stripped below + // so the conversation can return to the backend on text-only + // follow-ups instead of silently pinning to Max quota. + const lastMsg = parsed?.messages?.[parsed.messages.length - 1]; + const forceAnthropicForImage = ( + IMAGE_FALLBACK_ENABLED && + state.mode !== 'anthropic' && + !!lastMsg && + containsImageBlock([lastMsg]) + ); + + const isAnthropicMode = state.mode === 'anthropic' || forceAnthropicForImage; + const isModelCall = !isAnthropicMode && isMessagesPath; + const trackUsage = isModelCall || forceAnthropicForImage; + const dest = isModelCall ? state.target : new URL(ANTHROPIC_FALLBACK); + const effectiveMode = forceAnthropicForImage ? 'anthropic_max' : state.mode; + + // Two-sided swap: outbound to Anthropic, reversed on the + // response so Claude Code never sees a non-backend name. + let imageRewrite = null; + if (forceAnthropicForImage && parsed?.model) { + const canonical = REVERSE_MODEL_REMAP[state.mode]?.[parsed.model]; + if (canonical) { + imageRewrite = { backend: parsed.model, canonical }; + } + } - if (isModelCall) { - delete headers['authorization']; - delete headers['x-api-key']; - if (state.useBearer) { - headers['authorization'] = `Bearer ${state.apiKey}`; + // Build upstream path. target.pathname may overlap with + // clientReq.url (e.g. OpenRouter /api/v1 + /v1/messages). + // Strip the shared prefix to avoid /api/v1/v1/messages. + let fullPath; + if (isModelCall) { + const base = state.target.pathname.replace(/\/$/, ''); + let overlap = ''; + for (let i = 1; i <= Math.min(base.length, urlPath.length); i++) { + if (base.endsWith(urlPath.substring(0, i))) overlap = urlPath.substring(0, i); + } + fullPath = overlap ? base + urlPath.substring(overlap.length) : base + urlPath; } else { - headers['x-api-key'] = state.apiKey; + fullPath = clientReq.url; } - } - const chunks = []; - clientReq.on('data', c => chunks.push(c)); - clientReq.on('end', () => { - let body = Buffer.concat(chunks); + if (trackUsage) { + const tag = forceAnthropicForImage + ? ` [image→anthropic${imageRewrite ? `, ${imageRewrite.backend}→${imageRewrite.canonical}` : ''}]` + : ''; + console.log(`[MODEL-PROXY] #${reqId} → ${dest.hostname}${fullPath}${tag}`); + } + + const headers = { ...clientReq.headers, host: dest.host }; + delete headers['content-length']; + // Force plain bytes upstream — the proxy mutates response + // bodies (UsageNormalizer toString'es bytes, normalizeJsonBody + // reparses) and would otherwise emit a content-encoding: gzip + // header followed by non-gzip bytes, breaking the client with + // "Decompression error: ZlibError". + delete headers['accept-encoding']; + + if (isModelCall) { + delete headers['authorization']; + delete headers['x-api-key']; + if (state.useBearer) { + headers['authorization'] = `Bearer ${state.apiKey}`; + } else { + headers['x-api-key'] = state.apiKey; + } + } + // forceAnthropicForImage path leaves the client auth header + // intact — whatever Claude Code is carrying authenticates + // at Anthropic. + + if (parsed) { + // clear_thinking_* requires thinking enabled; Anthropic + // 400s on the mismatch. + if (forceAnthropicForImage) { + delete parsed.thinking; + delete parsed.context_management; + if (imageRewrite) { + parsed.model = imageRewrite.canonical; + } + } - // Remap Anthropic model names to backend-specific names - if (isModelCall && MODEL_REMAP[state.mode]) { - try { - const parsed = JSON.parse(body); + // Strip stale images from history on non-Anthropic + // routes. Without this, every text follow-up on an + // image-bearing conversation would still detect the + // image and re-route to Anthropic — burning Max quota + // while the TUI advertises the cheap backend. + if (isModelCall) { + const stripped = stripImagesFromMessages(parsed.messages); + if (stripped > 0) { + console.log(`[MODEL-PROXY] #${reqId} stripped ${stripped} stale image block${stripped === 1 ? '' : 's'} from history`); + } + } + + if (isModelCall && MODEL_REMAP[state.mode]) { const mapped = MODEL_REMAP[state.mode][parsed.model]; if (mapped) { console.log(`[MODEL-PROXY] #${reqId} model remap: ${parsed.model} → ${mapped}`); parsed.model = mapped; - body = Buffer.from(JSON.stringify(parsed)); } - } catch { /* not JSON or parse error, pass through */ } - } + } - // Strip thinking blocks before forwarding. - // Non-Anthropic: strip ALL blocks — backends reject thinking blocks - // they didn't generate, even unsigned ones. - // Anthropic after a non-Anthropic session: also strip ALL, because - // foreign backends generate signed-but-invalid thinking blocks that - // stripUnsignedThinkingBlocks passes through, causing Anthropic 400s. - if (isAnthropicMode && MODEL_PATHS.includes(urlPath)) { - try { - const parsed = JSON.parse(body); - if (state.hadNonAnthropicSession) { + // Foreign backends emit signed-but-invalid thinking + // blocks; strip ALL when crossing into or out of one. + // Pure Anthropic sessions strip only unsigned, preserving + // valid signed blocks for continuity. + if (isAnthropicMode) { + if (state.hadNonAnthropicSession || forceAnthropicForImage) { stripAllThinkingBlocks(parsed); } else { stripUnsignedThinkingBlocks(parsed); } - body = Buffer.from(JSON.stringify(parsed)); - } catch { /* pass through */ } - } - if (isModelCall) { - try { - const parsed = JSON.parse(body); + } else if (isModelCall) { stripAllThinkingBlocks(parsed); - body = Buffer.from(JSON.stringify(parsed)); - } catch { /* pass through */ } + } + + body = Buffer.from(JSON.stringify(parsed)); } const opts = { @@ -363,7 +485,7 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, }; const proxyReq = httpsRequest(opts, (proxyRes) => { - if (isModelCall) { + if (trackUsage) { const ttfb = Date.now() - t0; console.log(`[MODEL-PROXY] #${reqId} TTFB ${ttfb}ms (status ${proxyRes.statusCode})`); } @@ -371,33 +493,48 @@ export function startModelProxy({ targetUrl, apiKey, startPort = 3200, backends, const ct = proxyRes.headers['content-type'] || ''; const isSSE = ct.includes('text/event-stream'); - if (isModelCall && isSSE) { - clientRes.writeHead(proxyRes.statusCode, proxyRes.headers); - const norm = new UsageNormalizer((inp, out) => recordUsage(state.mode, inp, out)); + if (trackUsage && isSSE) { + // Mirror of the accept-encoding strip on the request + // side — the response body is being mutated, so the + // forwarded headers must not advertise gzip. + const { 'content-encoding': _ce1, ...sseHeaders } = proxyRes.headers; + clientRes.writeHead(proxyRes.statusCode, sseHeaders); + const modelRewrite = imageRewrite + ? { from: imageRewrite.canonical, to: imageRewrite.backend } + : null; + const norm = new UsageNormalizer( + (inp, out) => recordUsage(effectiveMode, inp, out), + { modelRewrite }, + ); proxyRes.pipe(norm).pipe(clientRes); proxyRes.on('end', () => { console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (${norm._inputTokens}in/${norm._outputTokens}out)`); }); - } else if (isModelCall && ct.includes('application/json')) { + } else if (trackUsage && ct.includes('application/json')) { const respChunks = []; proxyRes.on('data', c => respChunks.push(c)); proxyRes.on('end', () => { const raw = Buffer.concat(respChunks); - const fixed = normalizeJsonBody(raw); + let fixed = normalizeJsonBody(raw); try { const j = JSON.parse(fixed); - if (j.usage) recordUsage(state.mode, j.usage.input_tokens, j.usage.output_tokens); + if (j.usage) recordUsage(effectiveMode, j.usage.input_tokens, j.usage.output_tokens); + if (imageRewrite && j.model === imageRewrite.canonical) { + j.model = imageRewrite.backend; + fixed = Buffer.from(JSON.stringify(j)); + } } catch {} - const outHeaders = { ...proxyRes.headers, 'content-length': fixed.length }; + const { 'content-encoding': _ce2, ...jsonHeaders } = proxyRes.headers; + const outHeaders = { ...jsonHeaders, 'content-length': fixed.length }; clientRes.writeHead(proxyRes.statusCode, outHeaders); clientRes.end(fixed); console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s (json, ${fixed.length}b)`); }); } else { - // Non-model or unknown content-type: pass through + // Non-model or unknown content-type: pass through unchanged. clientRes.writeHead(proxyRes.statusCode, proxyRes.headers); proxyRes.pipe(clientRes); - if (isModelCall) { + if (trackUsage) { proxyRes.on('end', () => { console.log(`[MODEL-PROXY] #${reqId} done in ${((Date.now() - t0) / 1000).toFixed(1)}s`); }); diff --git a/proxy/start-proxy.js b/proxy/start-proxy.js index 5847076..cb57f29 100644 --- a/proxy/start-proxy.js +++ b/proxy/start-proxy.js @@ -7,9 +7,10 @@ const BACKEND_DEFS = { fireworks: { url: 'https://api.fireworks.ai/inference/v1', keyEnv: 'FIREWORKS_API_KEY' }, }; -// Legacy mode: start-proxy.js (used by deepclaude.sh/ps1) +// Legacy mode: start-proxy.js [defaultMode] (used by deepclaude.sh/ps1) const targetUrl = process.argv[2] || process.env.CHEAPCLAUDE_TARGET_URL; const apiKey = process.argv[3] || process.env.CHEAPCLAUDE_API_KEY; +const legacyDefaultMode = process.argv[4] || process.env.CHEAPCLAUDE_DEFAULT_MODE; if (targetUrl && apiKey) { // Legacy single-backend mode @@ -24,7 +25,7 @@ if (targetUrl && apiKey) { targetUrl, apiKey, backends: hasBackends ? backends : undefined, - defaultMode: hasBackends ? undefined : undefined, + defaultMode: legacyDefaultMode || undefined, }); console.log(port); } else {