diff --git a/tests/e2e-prod/harness/client.ts b/tests/e2e-prod/harness/client.ts index 7c6b05a..bae3837 100644 --- a/tests/e2e-prod/harness/client.ts +++ b/tests/e2e-prod/harness/client.ts @@ -61,7 +61,16 @@ export class ApiClient { headers["Content-Type"] = headers["Content-Type"] ?? "application/json"; } const t0 = performance.now(); - const res = await fetch(url, { method, headers, body }); + // redirect: "manual" stops fetch from transparently following 3xx + // responses. The default `"follow"` makes tests assert against + // whatever the redirect target replies — which silently defeats + // CSRF-discipline checks like "/api/billing/checkout via GET must + // be rejected" if the endpoint ever started returning 302 → + // Stripe (fetch would follow to Stripe and the test would assert + // against Stripe's response, not ours). We test API endpoints, so + // a 3xx from any of our routes is a real signal that callers must + // see, not transparently swallow. + const res = await fetch(url, { method, headers, body, redirect: "manual" }); const raw = await res.text(); const latencyMs = performance.now() - t0; let parsed: T | null = null; diff --git a/tests/e2e-prod/suites/02-authz.test.ts b/tests/e2e-prod/suites/02-authz.test.ts index 6235d4e..b3aa4a6 100644 --- a/tests/e2e-prod/suites/02-authz.test.ts +++ b/tests/e2e-prod/suites/02-authz.test.ts @@ -37,17 +37,25 @@ test("authz: random API key without 'e2a_' prefix returns 401", async () => { }); test("authz: 401 body does NOT leak hint about key validity", async () => { + // Both keys are bogus. r1 has the e2a_ prefix and the right length; r2 + // is unrelated garbage. A correct auth gate must return byte-identical + // 401 responses for both so an attacker can't distinguish "key shape + // is right but invalid" from "completely malformed input." const r1 = await client.get("/api/v1/agents", { apiKey: "e2a_00000000000000000000000000000000" }); const r2 = await client.get("/api/v1/agents", { apiKey: "garbage" }); - if (r1.raw === r2.raw) { - info(SUITE, "401-uniform", "401 bodies are identical for malformed vs bogus-but-shaped — good (no oracle)"); - } else { - info( + assert.equal(r1.status, 401, `r1 expected 401, got ${r1.status}`); + assert.equal(r2.status, 401, `r2 expected 401, got ${r2.status}`); + if (r1.raw !== r2.raw) { + fail( SUITE, - "401-uniform", - `401 bodies differ between malformed and well-shaped bogus keys. Could be a weak side-channel oracle. Bodies: "${r1.raw.slice(0, 80)}" vs "${r2.raw.slice(0, 80)}"`, + "401-oracle", + `401 bodies differ between malformed and well-shaped bogus keys — side-channel oracle. Bodies: "${r1.raw.slice(0, 80)}" vs "${r2.raw.slice(0, 80)}"`, + ); + assert.fail( + `401 bodies must be byte-identical to avoid leaking key-shape info; r1=${JSON.stringify(r1.raw.slice(0, 60))} r2=${JSON.stringify(r2.raw.slice(0, 60))}`, ); } + info(SUITE, "401-uniform", "401 bodies are identical for malformed vs bogus-but-shaped — good (no oracle)"); }); test("authz: bogus 'from' on /send returns 4xx (cannot impersonate)", async () => { diff --git a/tests/e2e-prod/suites/03-concurrency.test.ts b/tests/e2e-prod/suites/03-concurrency.test.ts index 21aeb8b..e6b9627 100644 --- a/tests/e2e-prod/suites/03-concurrency.test.ts +++ b/tests/e2e-prod/suites/03-concurrency.test.ts @@ -101,7 +101,15 @@ test("concurrency: parallel PUT toggles converge to a final state (no 500)", asy assert.equal(typeof final.body?.hitl_enabled, "boolean"); }); -test("concurrency: parallel DELETE of the same agent — one succeeds, rest 403/4xx, never 500", async () => { +test("concurrency: parallel DELETE of the same agent is idempotent under contention (no 5xx)", async () => { + // Two valid designs exist: + // - First-writer-wins: one 2xx, rest 403/404 (anti-enumeration). + // - Idempotent delete: all 2xx (DELETE is conceptually a state assertion). + // Both are defensible. The non-negotiable invariant is "no 5xx under + // contention" — the test name was renamed from "one succeeds, rest 4xx" + // because the previous assert (>=1 success) accepted all 4 returning + // 2xx and only emitted info(). If you want to lock in first-writer-wins + // specifically, tighten the assertion to ok.length === 1. const slug = uniqueSlug("del"); const c = await client.post<{ email: string }>("/api/v1/agents", { body: { slug, name: "del", agent_mode: "local" }, @@ -117,8 +125,15 @@ test("concurrency: parallel DELETE of the same agent — one succeeds, rest 403/ const fivexx = results.filter((r) => r.status >= 500); assert.equal(fivexx.length, 0, `no 5xx under parallel delete, got: ${results.map((r) => r.status).join(",")}`); assert.ok(ok.length >= 1, `at least one delete should succeed, got ${ok.length}: ${results.map((r) => r.status).join(",")}`); + // Final state check: a GET after all the parallel deletes must say 403 + // (anti-enumeration on deleted) — confirms the agent is actually gone + // regardless of which delete "won." + const after = await client.get(`/api/v1/agents/${encodeURIComponent(email)}`); + assert.equal(after.status, 403, `after parallel delete, GET expected 403, got ${after.status}`); if (ok.length > 1) { - info(SUITE, "delete-idempotent", `${ok.length} parallel deletes all returned 2xx — endpoint is idempotent (fine, but worth noting)`); + info(SUITE, "delete-idempotent", `${ok.length} parallel deletes returned 2xx — server treats DELETE as idempotent`); + } else { + info(SUITE, "delete-first-writer-wins", `${ok.length} parallel delete succeeded, ${results.length - ok.length} got 4xx — first-writer-wins design`); } }); diff --git a/tests/e2e-prod/suites/06-reliability.test.ts b/tests/e2e-prod/suites/06-reliability.test.ts index 05c4cfd..daab2f7 100644 --- a/tests/e2e-prod/suites/06-reliability.test.ts +++ b/tests/e2e-prod/suites/06-reliability.test.ts @@ -74,30 +74,51 @@ test("reliability: WS to own agent opens", async () => { }); test("reliability: WS without api_key fails to open", async () => { + let opened = false; try { - await openWS(wsUrl(sharedAgentEmail), null, 3_000); - fail(SUITE, "ws-unauth-open", "WS opened without auth — should have rejected"); + const ws = await openWS(wsUrl(sharedAgentEmail), null, 3_000); + opened = true; + try { ws.close(); } catch {} } catch { - info(SUITE, "ws-unauth-rejected", "WS without api_key correctly rejected"); + // expected — rejection is correct + } + if (opened) { + fail(SUITE, "ws-unauth-open", "WS opened without auth — should have rejected"); + assert.fail("WS without api_key opened successfully — auth gate broken"); } + info(SUITE, "ws-unauth-rejected", "WS without api_key correctly rejected"); }); test("reliability: WS with wrong api_key fails to open", async () => { + let opened = false; try { - await openWS(wsUrl(sharedAgentEmail), "e2a_00000000000000000000000000000000", 3_000); - fail(SUITE, "ws-badkey-open", "WS opened with bogus key — should have rejected"); + const ws = await openWS(wsUrl(sharedAgentEmail), "e2a_00000000000000000000000000000000", 3_000); + opened = true; + try { ws.close(); } catch {} } catch { - info(SUITE, "ws-badkey-rejected", "WS with bogus api_key correctly rejected"); + // expected } + if (opened) { + fail(SUITE, "ws-badkey-open", "WS opened with bogus key — should have rejected"); + assert.fail("WS with bogus api_key opened successfully — auth gate broken"); + } + info(SUITE, "ws-badkey-rejected", "WS with bogus api_key correctly rejected"); }); test("reliability: WS to non-owned agent fails to open", async () => { + let opened = false; try { - await openWS(wsUrl("nobody@example.com"), client.env.apiKey, 3_000); - fail(SUITE, "ws-cross-tenant", "WS opened against non-owned agent — cross-tenant break"); + const ws = await openWS(wsUrl("nobody@example.com"), client.env.apiKey, 3_000); + opened = true; + try { ws.close(); } catch {} } catch { - info(SUITE, "ws-cross-tenant-rejected", "WS to non-owned agent correctly rejected"); + // expected + } + if (opened) { + fail(SUITE, "ws-cross-tenant", "WS opened against non-owned agent — cross-tenant break"); + assert.fail("WS opened against an unowned agent — cross-tenant guard broken"); } + info(SUITE, "ws-cross-tenant-rejected", "WS to non-owned agent correctly rejected"); }); test("reliability: WS reconnect cycle (open → close → open) works", async () => { diff --git a/tests/e2e-prod/suites/10-domains.test.ts b/tests/e2e-prod/suites/10-domains.test.ts index a1afbf2..a865ac0 100644 --- a/tests/e2e-prod/suites/10-domains.test.ts +++ b/tests/e2e-prod/suites/10-domains.test.ts @@ -28,7 +28,10 @@ test("domains: register returns 201 with DNS records + zero-counted Domain", asy ); if (r.status !== 201) { fail(SUITE, "register-non-201", `expected 201, got ${r.status}: ${r.raw.slice(0, 240)}`); - return; + // Hard assert — the primary register test of the domains suite must + // fail loudly if POST /domains regresses, not just record a JSON + // finding behind a green node:test dot. + assert.fail(`POST /api/v1/domains expected 201, got ${r.status}: ${r.raw.slice(0, 200)}`); } track("domain", domain); assert.equal(r.body?.domain, domain, "echoed domain matches"); diff --git a/tests/e2e-prod/suites/11-messaging.test.ts b/tests/e2e-prod/suites/11-messaging.test.ts index 8eee84f..31c890f 100644 --- a/tests/e2e-prod/suites/11-messaging.test.ts +++ b/tests/e2e-prod/suites/11-messaging.test.ts @@ -62,6 +62,9 @@ test("messaging: pagination roundtrip — limit=3 then follow next_token; no dup const overlap = [...ids1].filter((id) => ids2.has(id)); if (overlap.length > 0) { fail(SUITE, "pagination-duplicate-ids", `${overlap.length} ids appear on both pages: ${overlap.slice(0, 5).join(",")}`); + // Cleanup before throwing so we don't leak HITL-held messages. + for (const id of queued) await client.post(`/api/v1/messages/${id}/reject`, { body: { reason: "e2e pagination cleanup" } }); + assert.fail(`pagination roundtrip returned ${overlap.length} duplicate id(s) — pagination is broken`); } // Cleanup. for (const id of queued) await client.post(`/api/v1/messages/${id}/reject`, { body: { reason: "e2e pagination cleanup" } }); @@ -90,9 +93,15 @@ test("messaging: Idempotency-Key replay — same key+body returns same message_i "idem-key-not-replayed", `same Idempotency-Key + same body yielded different message_id: ${firstId} → ${r2.body?.message_id}. Server should replay original response, not re-queue.`, ); - } else { - info(SUITE, "idem-key-replayed", `Idempotency-Key replay correct: same key+body → same message_id ${firstId}`); + // Hard assert — Idempotency-Key semantics are a financial-stakes + // contract (double-send protection on approve). Don't paper over. + await client.post(`/api/v1/messages/${firstId}/reject`, { body: { reason: "e2e idem cleanup pre-fail" } }); + if (r2.body?.message_id) { + await client.post(`/api/v1/messages/${r2.body.message_id}/reject`, { body: { reason: "e2e idem cleanup pre-fail" } }); + } + assert.fail(`Idempotency-Key replay broken: ${firstId} !== ${r2.body?.message_id}`); } + info(SUITE, "idem-key-replayed", `Idempotency-Key replay correct: same key+body → same message_id ${firstId}`); // Different body, same key → 422 per spec. const r3 = await client.post("/api/v1/send", { diff --git a/tests/e2e-prod/suites/13-billing-surface.test.ts b/tests/e2e-prod/suites/13-billing-surface.test.ts index 34ce6c0..b9eed62 100644 --- a/tests/e2e-prod/suites/13-billing-surface.test.ts +++ b/tests/e2e-prod/suites/13-billing-surface.test.ts @@ -103,6 +103,9 @@ test("billing: GET /api/billing/health returns 200", async () => { test("billing: GET /api/billing/checkout is rejected (CSRF discipline — POST only)", async () => { // Under SameSite=Lax, a top-level GET navigation forwards the session cookie. // The billing endpoints were intentionally moved to POST-only as a CSRF defense. + // Harness uses redirect:"manual" (client.ts) so any 200 or 3xx here is the + // smoking-gun leak: it means a top-level click could mint a real Stripe + // checkout session for a victim's account. const r = await client.get("/api/billing/checkout"); if (r.status === 200 || (r.status >= 300 && r.status < 400)) { fail( @@ -110,7 +113,7 @@ test("billing: GET /api/billing/checkout is rejected (CSRF discipline — POST o "billing-checkout-get-leak", `GET /api/billing/checkout returned ${r.status} — should be 405/404 to prevent CSRF via top-level navigation. Body: ${r.raw.slice(0, 200)}`, ); - return; + assert.fail(`GET /api/billing/checkout must reject with 4xx; got ${r.status}`); } // 405 / 404 / 401 are all acceptable rejection codes here. assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`); @@ -124,7 +127,7 @@ test("billing: GET /api/billing/portal is rejected (CSRF discipline — POST onl "billing-portal-get-leak", `GET /api/billing/portal returned ${r.status} — should be 405/404 to prevent CSRF. Body: ${r.raw.slice(0, 200)}`, ); - return; + assert.fail(`GET /api/billing/portal must reject with 4xx; got ${r.status}`); } assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`); }); @@ -142,6 +145,7 @@ test("billing: POST /api/billing/checkout without auth returns 401", async () => "billing-checkout-no-auth-accepted", `POST /api/billing/checkout with no creds returned ${r.status} — must require auth`, ); + assert.fail(`POST /api/billing/checkout without auth must reject with 4xx; got ${r.status}`); } assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`); });