From faca6baaf47af93c3c8c93ec89b37c98ff6c50b8 Mon Sep 17 00:00:00 2001 From: oratis Date: Thu, 2 Jul 2026 12:54:51 +0800 Subject: [PATCH] feat(infra): Anthropic reverse-proxy relay for Cloud Run (packaging/gcp-relay) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A ~110-line, zero-dep transparent relay: forwards /v1/* (streaming included) to api.anthropic.com from Cloud Run. Lets a client on a network that can't reliably reach Anthropic (flaky local proxy) point ANTHROPIC_BASE_URL at the relay instead — GCP egress to Anthropic is stable. Security: the real Anthropic key lives only in Secret Manager and is injected server-side; clients authenticate with a separate, revocable RELAY_TOKEN sent as x-api-key (key-swap gate). NOT an OpenClaw-style billing proxy — no anthropic-beta:claude-code spoofing, no OAuth reuse; requests bill normally. deploy.sh handles secrets + IAM + Cloud Run deploy and prints the config.env lines. Deployed live to oratis-491316 (us-central1). Note: LISA's loadConfigEnv does NOT override an already-set env var, so a shell/launchd ANTHROPIC_BASE_URL shadows config.env — the Mac's serve-command.txt uses `env -u ANTHROPIC_BASE_URL` so config.env wins. Co-Authored-By: Claude Opus 4.8 (1M context) --- packaging/gcp-relay/Dockerfile | 6 ++ packaging/gcp-relay/README.md | 57 ++++++++++++++++ packaging/gcp-relay/deploy.sh | 64 +++++++++++++++++ packaging/gcp-relay/index.mjs | 113 +++++++++++++++++++++++++++++++ packaging/gcp-relay/package.json | 7 ++ 5 files changed, 247 insertions(+) create mode 100644 packaging/gcp-relay/Dockerfile create mode 100644 packaging/gcp-relay/README.md create mode 100755 packaging/gcp-relay/deploy.sh create mode 100644 packaging/gcp-relay/index.mjs create mode 100644 packaging/gcp-relay/package.json diff --git a/packaging/gcp-relay/Dockerfile b/packaging/gcp-relay/Dockerfile new file mode 100644 index 0000000..1d91866 --- /dev/null +++ b/packaging/gcp-relay/Dockerfile @@ -0,0 +1,6 @@ +FROM node:20-slim +WORKDIR /app +COPY package.json index.mjs ./ +ENV NODE_ENV=production +# Cloud Run injects PORT; index.mjs reads it (defaults to 8080). +CMD ["node", "index.mjs"] diff --git a/packaging/gcp-relay/README.md b/packaging/gcp-relay/README.md new file mode 100644 index 0000000..46cd689 --- /dev/null +++ b/packaging/gcp-relay/README.md @@ -0,0 +1,57 @@ +# anthropic-relay — a transparent Claude relay on Cloud Run + +A ~110-line, zero-dependency reverse proxy that forwards the Anthropic API +(`/v1/*`, streaming included) from Cloud Run to `https://api.anthropic.com`. + +## Why + +The LISA backend on the Mac routes all outbound LLM calls through a local HTTP +proxy (`HTTPS_PROXY=127.0.0.1:7897`). When that proxy flaps, the Claude call fails +and the chat turn comes back empty ("Lisa didn't reply"). GCP's egress to +Anthropic is reliable, so pointing the Mac at this relay removes the flaky hop. + +Zero LISA code change: the Anthropic provider already honors `ANTHROPIC_BASE_URL` +(`src/providers/anthropic.ts`). + +## How it works (key-swap gate) + +``` +Mac (ANTHROPIC_BASE_URL=relay, x-api-key=RELAY_TOKEN) + └─HTTPS─▶ Cloud Run relay ──(swap x-api-key → real key)──▶ api.anthropic.com +``` + +- The **real Anthropic key lives only in GCP** (Secret Manager). It never sits on + the Mac. +- The Mac authenticates with a separate **`RELAY_TOKEN`**, sent in the `x-api-key` + header (which the Anthropic SDK already sends). The relay verifies it, then + replaces it with the real key before forwarding. Revoke access by rotating the + token secret — the Anthropic key is untouched. +- Response is streamed straight back, so SSE (`stream: true`) works unchanged. + +### This is NOT an OpenClaw-style billing proxy + +Community "Claude relays" (e.g. `John-Rood/claude-proxy`, +`majdyz/openclaw-claude-proxy`) exist to make API calls **bill against a Claude +Code / Max subscription** instead of usage-based API credits — by injecting +`anthropic-beta: claude-code-20250219` + a Claude Code system prompt, or by +spawning the local `claude` CLI and reusing its OAuth session. Anthropic +fingerprints and **rejects** these ("third-party-app rejection"), and it's against +their terms. This relay does none of that: it forwards authenticated requests that +bill normally against the real key. The goal here is **network reachability, not +cheaper billing.** + +## Deploy + +```bash +ANTHROPIC_API_KEY=sk-ant-... ./deploy.sh +``` + +Prints the relay URL + `RELAY_TOKEN` and the exact `~/.lisa/config.env` lines. +`MIN_INSTANCES=1 ./deploy.sh` keeps one warm instance (no cold-start latency on the +first message). + +## Cost + +Cloud Run scales to zero by default (pay per request + egress; pennies at personal +volume). Anthropic usage bills against your real key as normal. `min-instances=1` +adds a small always-on charge (~$5–15/mo) for lower first-message latency. diff --git a/packaging/gcp-relay/deploy.sh b/packaging/gcp-relay/deploy.sh new file mode 100755 index 0000000..47b1a58 --- /dev/null +++ b/packaging/gcp-relay/deploy.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# Deploy the Anthropic relay to Cloud Run (see README.md). +# +# ANTHROPIC_API_KEY=sk-ant-... ./deploy.sh +# +# Stores the real Anthropic key + a generated RELAY_TOKEN in Secret Manager, grants +# the Cloud Run runtime SA read access, deploys, and prints the exact config.env +# lines for the Mac. Re-runs are idempotent (adds new secret versions, redeploys). +set -euo pipefail +cd "$(dirname "$0")" + +PROJECT="${GCP_PROJECT:-oratis-491316}" +REGION="${REGION:-us-central1}" +SERVICE="${SERVICE:-anthropic-relay}" +MIN_INSTANCES="${MIN_INSTANCES:-0}" # set 1 to avoid cold-start latency on the first message (~$5-15/mo) +: "${ANTHROPIC_API_KEY:?set ANTHROPIC_API_KEY (the real Anthropic key to hold server-side)}" +RELAY_TOKEN="${RELAY_TOKEN:-$(openssl rand -hex 24)}" + +echo "==> project=$PROJECT region=$REGION service=$SERVICE" +gcloud config set project "$PROJECT" >/dev/null +gcloud services enable run.googleapis.com secretmanager.googleapis.com cloudbuild.googleapis.com >/dev/null + +put_secret() { # name, value + if gcloud secrets describe "$1" >/dev/null 2>&1; then + printf '%s' "$2" | gcloud secrets versions add "$1" --data-file=- >/dev/null + else + printf '%s' "$2" | gcloud secrets create "$1" --data-file=- --replication-policy=automatic >/dev/null + fi +} +echo "==> writing secrets" +put_secret anthropic-api-key "$ANTHROPIC_API_KEY" +put_secret relay-token "$RELAY_TOKEN" + +# Cloud Run runtime SA needs to read the secrets. +PROJ_NUM=$(gcloud projects describe "$PROJECT" --format='value(projectNumber)') +RUNTIME_SA="${PROJ_NUM}-compute@developer.gserviceaccount.com" +for s in anthropic-api-key relay-token; do + gcloud secrets add-iam-policy-binding "$s" \ + --member="serviceAccount:${RUNTIME_SA}" --role=roles/secretmanager.secretAccessor >/dev/null 2>&1 || true +done + +echo "==> deploying to Cloud Run" +gcloud run deploy "$SERVICE" \ + --source . \ + --region "$REGION" \ + --allow-unauthenticated \ + --min-instances="$MIN_INSTANCES" \ + --max-instances=3 \ + --cpu=1 --memory=256Mi \ + --timeout=3600 \ + --set-secrets=ANTHROPIC_API_KEY=anthropic-api-key:latest,RELAY_TOKEN=relay-token:latest + +URL=$(gcloud run services describe "$SERVICE" --region "$REGION" --format='value(status.url)') +echo +echo "✓ Relay live: $URL" +echo +echo "On the Mac, in ~/.lisa/config.env:" +echo " ANTHROPIC_BASE_URL=$URL" +echo " ANTHROPIC_API_KEY=$RELAY_TOKEN" +echo " (and REMOVE any HTTPS_PROXY / HTTP_PROXY so calls go direct to the relay)" +echo +echo "Smoke test:" +echo " curl -sS $URL/v1/messages -H 'x-api-key: $RELAY_TOKEN' -H 'anthropic-version: 2023-06-01' \\" +echo " -H 'content-type: application/json' -d '{\"model\":\"claude-sonnet-5\",\"max_tokens\":16,\"messages\":[{\"role\":\"user\",\"content\":\"say hi\"}]}'" diff --git a/packaging/gcp-relay/index.mjs b/packaging/gcp-relay/index.mjs new file mode 100644 index 0000000..7e2fa27 --- /dev/null +++ b/packaging/gcp-relay/index.mjs @@ -0,0 +1,113 @@ +// Anthropic API reverse proxy for Cloud Run. +// +// Why: a client on a network that can't reliably reach api.anthropic.com (e.g. a +// flaky local HTTP proxy) points ANTHROPIC_BASE_URL at this service instead. GCP +// egress to Anthropic is reliable, so the round trip stops failing. +// +// Security model (key-swap gate): the REAL Anthropic key lives only here (Secret +// Manager → env). The client authenticates to the relay with a separate +// RELAY_TOKEN, presented in the `x-api-key` header (which is what the Anthropic +// SDK already sends). The relay checks it, then swaps in the real key before +// forwarding. So: the real key never leaves GCP; the token is revocable; and the +// service isn't an open Anthropic-funded proxy. +// +// This is a TRANSPARENT relay — it does NOT touch billing (no `anthropic-beta: +// claude-code-*` spoofing, no OAuth-session reuse). Requests bill normally against +// the real key, which is ToS-clean. See README.md. +import http from "node:http"; + +const UPSTREAM = process.env.UPSTREAM || "https://api.anthropic.com"; +const RELAY_TOKEN = process.env.RELAY_TOKEN || ""; +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || ""; +const PORT = Number(process.env.PORT) || 8080; + +// Headers we must not copy from client→upstream (hop-by-hop or auth we replace). +const STRIP_REQ = new Set(["host", "content-length", "connection", "x-api-key", "authorization"]); +// Headers we must not copy from upstream→client (let Node re-frame the body). +const STRIP_RES = new Set(["content-encoding", "content-length", "transfer-encoding", "connection"]); + +const presentedToken = (req) => + req.headers["x-api-key"] || + (req.headers["authorization"] || "").replace(/^Bearer\s+/i, "") || + ""; + +const server = http.createServer(async (req, res) => { + try { + if (req.url === "/" || req.url === "/health" || req.url === "/healthz") { + res.writeHead(200, { "content-type": "text/plain" }); + res.end("ok"); + return; + } + // Only proxy the Anthropic API surface. + if (!req.url.startsWith("/v1/")) { + res.writeHead(404, { "content-type": "text/plain" }); + res.end("not found"); + return; + } + // Gate: constant-time-ish compare of the relay token. + if (!RELAY_TOKEN || !ANTHROPIC_API_KEY) { + res.writeHead(503, { "content-type": "text/plain" }); + res.end("relay not configured"); + return; + } + const tok = presentedToken(req); + if (tok.length !== RELAY_TOKEN.length || !safeEqual(tok, RELAY_TOKEN)) { + res.writeHead(401, { "content-type": "application/json" }); + res.end(JSON.stringify({ type: "error", error: { type: "authentication_error", message: "invalid relay token" } })); + return; + } + + // Buffer the request body (chat payloads are small). + const chunks = []; + for await (const c of req) chunks.push(c); + const body = Buffer.concat(chunks); + + // Forward headers, swapping auth to the real key. + const headers = {}; + for (const [k, v] of Object.entries(req.headers)) { + if (!STRIP_REQ.has(k.toLowerCase())) headers[k] = v; + } + headers["x-api-key"] = ANTHROPIC_API_KEY; + if (!headers["anthropic-version"]) headers["anthropic-version"] = "2023-06-01"; + + let upstream; + try { + upstream = await fetch(UPSTREAM + req.url, { + method: req.method, + headers, + body: req.method === "GET" || req.method === "HEAD" ? undefined : body, + }); + } catch (e) { + res.writeHead(502, { "content-type": "application/json" }); + res.end(JSON.stringify({ type: "error", error: { type: "relay_upstream_error", message: String(e && e.message || e) } })); + return; + } + + // Stream the response straight back (SSE for streaming completions). + const respHeaders = {}; + upstream.headers.forEach((v, k) => { if (!STRIP_RES.has(k.toLowerCase())) respHeaders[k] = v; }); + res.writeHead(upstream.status, respHeaders); + if (upstream.body) { + const reader = upstream.body.getReader(); + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + res.write(Buffer.from(value)); + } + } + res.end(); + } catch (e) { + if (!res.headersSent) res.writeHead(500, { "content-type": "text/plain" }); + res.end("relay error"); + } +}); + +function safeEqual(a, b) { + let out = 0; + for (let i = 0; i < a.length; i++) out |= a.charCodeAt(i) ^ b.charCodeAt(i); + return out === 0; +} + +server.headersTimeout = 0; // long streaming turns +server.requestTimeout = 0; +server.listen(PORT, () => console.log(`anthropic relay listening on :${PORT} → ${UPSTREAM}`)); diff --git a/packaging/gcp-relay/package.json b/packaging/gcp-relay/package.json new file mode 100644 index 0000000..48bac44 --- /dev/null +++ b/packaging/gcp-relay/package.json @@ -0,0 +1,7 @@ +{ + "name": "anthropic-relay", + "private": true, + "type": "module", + "engines": { "node": ">=20" }, + "scripts": { "start": "node index.mjs" } +}