From e044cd156ca3db139071c379a1c4deae689488df Mon Sep 17 00:00:00 2001 From: Fsocietyhhh <1211904451@qq.com> Date: Fri, 26 Jun 2026 00:50:11 -0700 Subject: [PATCH] fix(payment): don't reuse a stale pre-auth amount under per-request pricing BlockRun prices each call on input + max_tokens, so the same model can cost different amounts across requests. The pre-auth cache stored one amount per model and blindly re-signed it for the next request. When a later request needed more (e.g. a small turn seeded a cheap amount before a larger one), the signed payment underpaid; the gateway rejected it with a 402 that is not a fresh x402 challenge, and parsing that threw "Failed to parse payment requirements" (HTTP 500). This broke every paid Base model under growing / agentic usage (Codex, long contexts). Free models and Solana (skipPreAuth) were unaffected. Fix: - Reuse a cached pre-auth only when an up-front cost estimate proves it still covers this request (never knowingly underpay). - If a pre-auth is rejected anyway, discard it and re-fetch a clean challenge rather than treating the rejection response as the challenge. - Skip pre-auth entirely when no estimate is available, instead of risking an underpay. Adds payment-preauth.test.ts covering the reuse / skip-on-growth / reject-then- refetch / no-estimator paths. --- src/payment-preauth.test.ts | 151 ++++++++++++++++++++++++++++++++++++ src/payment-preauth.ts | 81 ++++++++++++++----- src/proxy.ts | 4 + 3 files changed, 216 insertions(+), 20 deletions(-) create mode 100644 src/payment-preauth.test.ts diff --git a/src/payment-preauth.test.ts b/src/payment-preauth.test.ts new file mode 100644 index 00000000..bbe71061 --- /dev/null +++ b/src/payment-preauth.test.ts @@ -0,0 +1,151 @@ +/** + * Pre-auth cache correctness under per-request (token-based) pricing. + * + * BlockRun prices each call on input + max_tokens, so the same model can cost + * different amounts. These tests pin the guarantees that keep that from + * underpaying via a stale cached authorization: + * - pre-auth is reused only when an up-front estimate proves it still covers + * the request (fires on a same/cheaper repeat, skipped when the request grows), + * - a rejected pre-auth is discarded and the request re-fetched cleanly — the + * rejection is never treated as a fresh challenge (no "Failed to parse…"), + * - with no estimator, pre-auth is disabled rather than risking an underpay. + */ +import { describe, it, expect, vi } from "vitest"; +import { x402Client } from "@x402/fetch"; +import { registerExactEvmScheme } from "@x402/evm/exact/client"; +import { toClientEvmSigner } from "@x402/evm"; +import { createPublicClient, http } from "viem"; +import { base } from "viem/chains"; +import { privateKeyToAccount } from "viem/accounts"; +import { deriveAllKeys } from "./wallet.js"; +import { createPayFetchWithPreAuth } from "./payment-preauth.js"; + +const MNEMONIC = + "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon art"; + +function testClient(): x402Client { + const keys = deriveAllKeys(MNEMONIC); + const account = privateKeyToAccount(keys.evmPrivateKey); + const pc = createPublicClient({ chain: base, transport: http() }); + const client = new x402Client(); + registerExactEvmScheme(client, { signer: toClientEvmSigner(account, pc) }); + return client; +} + +const CHALLENGE = { + x402Version: 2, + accepts: [ + { + scheme: "exact", + network: "eip155:8453", + amount: "1000", + asset: "0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913", + payTo: "0xe9030014F5DAe217d0A152f02A043567b16c1aBf", + maxTimeoutSeconds: 300, + extra: { name: "USD Coin", version: "2" }, + }, + ], + resource: { url: "https://gw/api", description: "t", mimeType: "application/json" }, +}; + +function challenge402(): Response { + const b64 = Buffer.from(JSON.stringify(CHALLENGE)).toString("base64"); + return new Response(JSON.stringify({ error: "Payment Required" }), { + status: 402, + headers: { + "payment-required": b64, + "www-authenticate": `X402 requirements="${b64}"`, + "content-type": "application/json", + }, + }); +} + +/** A fake gateway: 200 when a payment is attached, a fresh 402 challenge when + * not. `rejectNextPaid` makes the next paid request 402 (an underpayment), to + * exercise the safety-net path. Records whether each call carried payment. */ +function fakeGateway() { + const calls: Array<{ paid: boolean }> = []; + const ctl = { rejectNextPaid: false }; // flip AFTER seeding to reject a pre-auth + const fn = vi.fn(async (input: RequestInfo | URL, init?: RequestInit) => { + const req = new Request(input, init); + const paid = req.headers.has("payment-signature"); + calls.push({ paid }); + if (paid) { + if (ctl.rejectNextPaid) { + ctl.rejectNextPaid = false; + return challenge402(); // underpayment rejected + } + return new Response(JSON.stringify({ ok: true }), { status: 200 }); + } + return challenge402(); + }); + return { fn: fn as unknown as typeof fetch, calls, ctl }; +} + +const URL = "https://gw/api/v1/chat/completions"; +function body(maxTokens = 10) { + return JSON.stringify({ model: "test/model", max_tokens: maxTokens, messages: [] }); +} + +describe("payment pre-auth — per-request pricing safety", () => { + it("reuses pre-auth when the estimate proves the cache still covers it (no extra 402)", async () => { + const est = vi.fn(() => "1000"); // every request estimated equal + const gw = fakeGateway(); + const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est }); + + await pay(URL, { method: "POST", body: body() }); // seed: [unpaid→402, paid→200] + const seeded = gw.calls.length; + expect(gw.calls.map((c) => c.paid)).toEqual([false, true]); + + const res = await pay(URL, { method: "POST", body: body() }); // identical → pre-auth + expect(res.status).toBe(200); + expect(gw.calls.length - seeded).toBe(1); // one round-trip, no 402 + expect(gw.calls[seeded].paid).toBe(true); // it pre-paid + }); + + it("skips pre-auth (clean fresh 402) when the request grows beyond the cached amount", async () => { + let big = false; + const est = vi.fn(() => (big ? "5000" : "1000")); + const gw = fakeGateway(); + const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est }); + + await pay(URL, { method: "POST", body: body(10) }); // seed cover=1000 + const seeded = gw.calls.length; + + big = true; + const res = await pay(URL, { method: "POST", body: body(9000) }); // needs 5000 > 1000 + expect(res.status).toBe(200); // NOT a 500 "Failed to parse payment requirements" + // Skipped pre-auth → clean unpaid request first, then the paid retry. + expect(gw.calls.slice(seeded).map((c) => c.paid)).toEqual([false, true]); + }); + + it("discards a rejected pre-auth and re-fetches cleanly (no parse error)", async () => { + const est = vi.fn(() => "1000"); + const gw = fakeGateway(); + const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, { estimateAmount: est }); + + await pay(URL, { method: "POST", body: body() }); // seed (cache warm, cover=1000) + gw.calls.length = 0; + // Now make the next PAID request (the pre-auth) get rejected by the gateway. + gw.ctl.rejectNextPaid = true; + // pre-auth fires (covered) → rejected 402 → clean refetch → paid retry → 200 + const res = await pay(URL, { method: "POST", body: body() }); + expect(res.status).toBe(200); + const seq = gw.calls.map((c) => c.paid); + expect(seq[0]).toBe(true); // pre-auth attempt (got rejected) + expect(seq).toContain(false); // a CLEAN refetch followed (rejection not reused) + expect(seq[seq.length - 1]).toBe(true); // then paid correctly + }); + + it("disables pre-auth entirely when no estimator is provided (never underpays)", async () => { + const gw = fakeGateway(); + const pay = createPayFetchWithPreAuth(gw.fn, testClient(), undefined, {}); // no estimateAmount + + await pay(URL, { method: "POST", body: body() }); + const seeded = gw.calls.length; + const res = await pay(URL, { method: "POST", body: body() }); // identical + expect(res.status).toBe(200); + // No pre-auth → still a fresh 402 + paid retry, never a pre-signed first call. + expect(gw.calls.slice(seeded).map((c) => c.paid)).toEqual([false, true]); + }); +}); diff --git a/src/payment-preauth.ts b/src/payment-preauth.ts index 2dd16b27..9f8b2a05 100644 --- a/src/payment-preauth.ts +++ b/src/payment-preauth.ts @@ -6,7 +6,19 @@ * On subsequent requests, pre-signs payment and attaches it to the first * request, skipping the 402 round trip (~200ms savings per request). * - * Falls back to normal 402 flow if pre-signed payment is rejected. + * IMPORTANT — pricing is per-request, not per-model. BlockRun prices each call + * on (input tokens + max_tokens reservation), so two calls to the SAME model + * can cost different amounts. A cached payment authorizes one EXACT amount, so + * blindly reusing it for a larger request underpays — the gateway then rejects + * it with a 402 that is NOT a fresh x402 challenge, and parsing that throws + * "Failed to parse payment requirements". To stay correct we: + * 1. only reuse a cached pre-auth when an up-front cost estimate proves the + * cached amount still covers this request (never knowingly underpay), and + * 2. if a pre-auth is rejected anyway, discard it and re-request WITHOUT + * payment to obtain a fresh, canonical challenge — never treat the + * rejection response itself as the challenge. + * + * Falls back to the normal 402 flow whenever pre-auth can't be proven safe. */ import type { x402Client } from "@x402/fetch"; @@ -17,8 +29,17 @@ type PaymentRequired = Parameters["createPayment interface CachedEntry { paymentRequired: PaymentRequired; cachedAt: number; + /** Estimated cost (USDC micro-units) of the request that established this + * entry. The cached payment is known to cover at least this much, so it is + * only reused when a new request's estimate is <= this value. `undefined` + * when the cost couldn't be estimated — in which case pre-auth is skipped. */ + coverMicros: number | undefined; } +/** Up-front per-request cost estimator (USDC micro-units as a string), e.g. + * proxy.ts#estimateAmount. Returns undefined when the model/cost is unknown. */ +type EstimateFn = (modelId: string, bodyLength: number, maxTokens: number) => string | undefined; + const DEFAULT_TTL_MS = 3_600_000; // 1 hour type FetchFn = (input: RequestInfo | URL, init?: RequestInit) => Promise; @@ -27,7 +48,7 @@ export function createPayFetchWithPreAuth( baseFetch: FetchFn, client: x402Client, ttlMs = DEFAULT_TTL_MS, - options?: { skipPreAuth?: boolean }, + options?: { skipPreAuth?: boolean; estimateAmount?: EstimateFn }, ): FetchFn { const httpClient = new x402HTTPClient(client); const cache = new Map(); @@ -36,11 +57,13 @@ export function createPayFetchWithPreAuth( const request = new Request(input, init); const urlPath = new URL(request.url).pathname; - // Extract model from request body to create model-specific cache keys. - // Without this, a cached payment from a paid model (e.g. sonnet) would be - // incorrectly applied to a free model (nvidia/gpt-oss-120b), causing - // payment errors even when the server wouldn't charge for the request. + // Extract model + size from the request body. Model gives a per-model cache + // key (a cached sonnet payment must not be applied to a free model); body + // length + max_tokens drive the up-front cost estimate used to decide + // whether a cached pre-auth still covers this (possibly larger) request. let requestModel = ""; + let bodyLength = 0; + let maxTokens = 0; if (init?.body) { try { const bodyStr = @@ -50,8 +73,10 @@ export function createPayFetchWithPreAuth( ? init.body : ""; if (bodyStr) { - const parsed = JSON.parse(bodyStr) as { model?: string }; + bodyLength = bodyStr.length; + const parsed = JSON.parse(bodyStr) as { model?: string; max_tokens?: number }; requestModel = parsed.model ?? ""; + maxTokens = Number(parsed.max_tokens) || 0; } } catch { /* not JSON, use empty model */ @@ -59,12 +84,27 @@ export function createPayFetchWithPreAuth( } const cacheKey = `${urlPath}:${requestModel}`; - // Try pre-auth if we have cached payment requirements - // Skip for Solana: payments use per-tx blockhashes that expire ~60-90s, - // making cached requirements useless and causing double charges. + // Up-front estimate of what THIS request will cost (USDC micro-units), used + // both to gate pre-auth reuse and to record what a new cache entry covers. + const estimateMicros = (): number | undefined => { + if (!options?.estimateAmount || !requestModel) return undefined; + const est = options.estimateAmount(requestModel, bodyLength, maxTokens); + return est === undefined ? undefined : Number(est); + }; + const needMicros = estimateMicros(); + + // Try pre-auth only when we can PROVE the cached payment still covers this + // request (needMicros <= what the cached entry covered). Skip for Solana: + // payments use per-tx blockhashes that expire ~60-90s, making cached + // requirements useless and causing double charges. const cached = !options?.skipPreAuth ? cache.get(cacheKey) : undefined; - let rejected402: Response | undefined; - if (cached && Date.now() - cached.cachedAt < ttlMs) { + const preAuthCovers = + cached !== undefined && + Date.now() - cached.cachedAt < ttlMs && + cached.coverMicros !== undefined && + needMicros !== undefined && + needMicros <= cached.coverMicros; + if (preAuthCovers) { try { const payload = await client.createPaymentPayload(cached.paymentRequired); const headers = httpClient.encodePaymentSignatureHeader(payload); @@ -76,20 +116,19 @@ export function createPayFetchWithPreAuth( if (response.status !== 402) { return response; // Pre-auth worked — saved ~200ms } - // Pre-auth rejected (params may have changed) — invalidate and reuse - // this 402 below: it already carries the fresh payment requirements, - // so re-requesting without payment would just buy the same 402 again. + // Rejected despite our estimate (server priced it higher than we did). + // The rejection 402 is NOT a reusable challenge, so drop it and fall + // through to a clean, un-paid request that yields a fresh challenge. cache.delete(cacheKey); - rejected402 = response; } catch { - // Pre-auth signing failed — invalidate and fall through + // Pre-auth signing failed — invalidate and fall through. cache.delete(cacheKey); } } - // Normal flow: make request (or reuse the rejected pre-auth 402), handle 402 if needed + // Normal flow: make a clean (un-paid) request and handle the 402 if needed. const clonedRequest = request.clone(); - const response = rejected402 ?? (await baseFetch(request)); + const response = await baseFetch(request); if (response.status !== 402) { return response; } @@ -111,7 +150,9 @@ export function createPayFetchWithPreAuth( /* empty body is fine */ } paymentRequired = httpClient.getPaymentRequiredResponse(getHeader, body); - cache.set(cacheKey, { paymentRequired, cachedAt: Date.now() }); + // Record what this cached payment covers (this request's estimate). It is + // only reused later when a new request's estimate is <= this value. + cache.set(cacheKey, { paymentRequired, cachedAt: Date.now(), coverMicros: needMicros }); } catch (error) { throw new Error( `Failed to parse payment requirements: ${error instanceof Error ? error.message : "Unknown error"}`, diff --git a/src/proxy.ts b/src/proxy.ts index 787b2286..c6dc75e0 100644 --- a/src/proxy.ts +++ b/src/proxy.ts @@ -2024,6 +2024,10 @@ export async function startProxy(options: ProxyOptions): Promise { const payFetch = createPayFetchWithPreAuth(fetch, x402, undefined, { skipPreAuth: paymentChain === "solana", + // Per-request cost estimate so pre-auth is only reused when the cached + // payment still covers the (possibly larger) request — BlockRun prices per + // token, so one model can cost different amounts across requests. + estimateAmount, }); // Create balance monitor for pre-request checks (lazy import to avoid loading @solana/kit on Base chain)