From 4bdb02070f54c5979d8c20eae5ab73265e60a239 Mon Sep 17 00:00:00 2001 From: Stefan Neamtu Date: Wed, 6 May 2026 12:01:52 +0200 Subject: [PATCH] fix(design): honor Retry-After header in variants 429 handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1244. The 429 handler in `generateVariant` discarded the `Retry-After` response header and fell straight through to a local exponential schedule (2s/4s/8s). In image-generation batches, that burns retry attempts inside the provider's cooldown window and the request never recovers. Now we parse `Retry-After` per RFC 7231 — both delta-seconds (`Retry-After: 5`) and HTTP-date (`Retry-After: Fri, 31 Dec 1999 23:59:59 GMT`). Honored waits are capped at 60s to bound stalls from hostile or buggy headers. Delta-seconds are validated as digits-only (rejects `2abc`). When `Retry-After` is honored (including 0 / past-date "retry now"), the next iteration's leading exponential sleep is skipped so we don't double-wait. Invalid or missing headers fall through to the existing exponential schedule unchanged. Behavior matrix: | Header | Behavior | |---------------------------------|-------------------------------------------| | Retry-After: 5 | wait 5s, skip leading on next attempt | | Retry-After: 999999 | capped to 60s, skip leading | | Retry-After: 2abc | invalid, fall through to exponential | | Retry-After: 0 | wait 0, skip leading (retry immediately) | | Retry-After: | wait 0, skip leading | | Retry-After: | wait diff capped at 60s, skip leading | | no header | fall through to existing exponential | `generateVariant` now accepts an optional `fetchFn` parameter (defaults to `globalThis.fetch`) so tests can inject a stub. Production call sites are unchanged. Tests cover the five behavior buckets above, asserting both the 1st-to-2nd call timing gap and call counts. All five pass in ~8s. Co-Authored-By: Claude Opus 4.7 (1M context) --- design/src/variants.ts | 36 +++++- design/test/variants-retry-after.test.ts | 133 +++++++++++++++++++++++ 2 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 design/test/variants-retry-after.test.ts diff --git a/design/src/variants.ts b/design/src/variants.ts index 87ccca92de..d52eb22829 100644 --- a/design/src/variants.ts +++ b/design/src/variants.ts @@ -31,30 +31,37 @@ const STYLE_VARIATIONS = [ /** * Generate a single variant with retry on 429. + * + * Exported for testability. Pass `fetchFn` to inject a stubbed fetch in tests; + * production code uses the global fetch by default. */ -async function generateVariant( +export async function generateVariant( apiKey: string, prompt: string, outputPath: string, size: string, quality: string, + fetchFn: typeof globalThis.fetch = globalThis.fetch, ): Promise<{ path: string; success: boolean; error?: string }> { const maxRetries = 3; + const MAX_RETRY_AFTER_MS = 60_000; // cap honored Retry-After to bound stalls let lastError = ""; + let skipLeadingDelay = false; for (let attempt = 0; attempt <= maxRetries; attempt++) { - if (attempt > 0) { + if (attempt > 0 && !skipLeadingDelay) { // Exponential backoff: 2s, 4s, 8s const delay = Math.pow(2, attempt) * 1000; console.error(` Rate limited, retrying in ${delay / 1000}s...`); await new Promise(r => setTimeout(r, delay)); } + skipLeadingDelay = false; const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 120_000); try { - const response = await fetch("https://api.openai.com/v1/responses", { + const response = await fetchFn("https://api.openai.com/v1/responses", { method: "POST", headers: { "Authorization": `Bearer ${apiKey}`, @@ -72,6 +79,29 @@ async function generateVariant( if (response.status === 429) { lastError = "Rate limited (429)"; + const retryAfter = response.headers.get("retry-after"); + if (retryAfter) { + const trimmed = retryAfter.trim(); + let waitMs: number | null = null; + if (/^\d+$/.test(trimmed)) { + // delta-seconds (RFC 7231) + waitMs = Math.min(Number.parseInt(trimmed, 10) * 1000, MAX_RETRY_AFTER_MS); + } else { + // HTTP-date (RFC 7231) + const dateMs = Date.parse(trimmed); + if (!Number.isNaN(dateMs)) { + waitMs = Math.min(Math.max(0, dateMs - Date.now()), MAX_RETRY_AFTER_MS); + } + } + if (waitMs !== null) { + if (waitMs > 0) { + await new Promise(resolve => setTimeout(resolve, waitMs)); + } + // Honored Retry-After (incl. 0 / past date "retry now") — skip the + // next iteration's leading exponential sleep so we don't double-wait. + skipLeadingDelay = true; + } + } continue; } diff --git a/design/test/variants-retry-after.test.ts b/design/test/variants-retry-after.test.ts new file mode 100644 index 0000000000..2060791d53 --- /dev/null +++ b/design/test/variants-retry-after.test.ts @@ -0,0 +1,133 @@ +import { describe, test, expect, beforeEach, afterEach } from "bun:test"; +import fs from "fs"; +import os from "os"; +import path from "path"; +import { generateVariant } from "../src/variants"; + +// 1x1 transparent PNG, base64 — valid bytes that fs.writeFileSync can write. +const TINY_PNG_BASE64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkAAIAAAoAAv/lxKUAAAAASUVORK5CYII="; + +function successResponse(): Response { + return new Response( + JSON.stringify({ + output: [{ type: "image_generation_call", result: TINY_PNG_BASE64 }], + }), + { status: 200, headers: { "Content-Type": "application/json" } }, + ); +} + +function rateLimited(retryAfter?: string): Response { + const headers: Record = {}; + if (retryAfter !== undefined) headers["Retry-After"] = retryAfter; + return new Response("rate limited", { status: 429, headers }); +} + +interface CallRecord { + ts: number; +} + +function makeStubFetch( + responses: Response[], + calls: CallRecord[], +): typeof globalThis.fetch { + let idx = 0; + return (async (_input: any, _init?: any) => { + calls.push({ ts: Date.now() }); + const response = responses[idx]; + if (!response) throw new Error(`stub fetch: no response for call ${idx + 1}`); + idx++; + return response; + }) as typeof globalThis.fetch; +} + +describe("generateVariant Retry-After handling", () => { + let tmpDir: string; + let outputPath: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "variants-retry-after-")); + outputPath = path.join(tmpDir, "variant.png"); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + test("delta-seconds: honors Retry-After: 1 with no extra leading exponential", async () => { + const calls: CallRecord[] = []; + const fetchFn = makeStubFetch([rateLimited("1"), successResponse()], calls); + + const result = await generateVariant( + "fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn, + ); + + expect(result.success).toBe(true); + expect(calls.length).toBe(2); + const gap = calls[1].ts - calls[0].ts; + // Honored ~1s; should NOT add the 2s leading exponential on top + expect(gap).toBeGreaterThanOrEqual(900); + expect(gap).toBeLessThan(1700); + }); + + test("HTTP-date: honors a future date with no extra leading exponential", async () => { + const calls: CallRecord[] = []; + const future = new Date(Date.now() + 3000).toUTCString(); + const fetchFn = makeStubFetch([rateLimited(future), successResponse()], calls); + + const result = await generateVariant( + "fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn, + ); + + expect(result.success).toBe(true); + expect(calls.length).toBe(2); + const gap = calls[1].ts - calls[0].ts; + expect(gap).toBeGreaterThanOrEqual(2500); + expect(gap).toBeLessThan(4500); + }); + + test("invalid Retry-After (alphanumeric): falls through to exponential", async () => { + const calls: CallRecord[] = []; + const fetchFn = makeStubFetch([rateLimited("2abc"), successResponse()], calls); + + const result = await generateVariant( + "fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn, + ); + + expect(result.success).toBe(true); + expect(calls.length).toBe(2); + const gap = calls[1].ts - calls[0].ts; + // Falls through to existing 2s exponential leading delay + expect(gap).toBeGreaterThanOrEqual(1800); + expect(gap).toBeLessThan(3000); + }); + + test("no Retry-After header: falls through to exponential", async () => { + const calls: CallRecord[] = []; + const fetchFn = makeStubFetch([rateLimited(), successResponse()], calls); + + const result = await generateVariant( + "fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn, + ); + + expect(result.success).toBe(true); + expect(calls.length).toBe(2); + const gap = calls[1].ts - calls[0].ts; + expect(gap).toBeGreaterThanOrEqual(1800); + expect(gap).toBeLessThan(3000); + }); + + test("Retry-After: 0 retries immediately, skips leading exponential", async () => { + const calls: CallRecord[] = []; + const fetchFn = makeStubFetch([rateLimited("0"), successResponse()], calls); + + const result = await generateVariant( + "fake-key", "prompt", outputPath, "1024x1024", "high", fetchFn, + ); + + expect(result.success).toBe(true); + expect(calls.length).toBe(2); + const gap = calls[1].ts - calls[0].ts; + expect(gap).toBeLessThan(500); + }); +});