Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion tests/e2e-prod/harness/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,16 @@ export class ApiClient {
headers["Content-Type"] = headers["Content-Type"] ?? "application/json";
}
const t0 = performance.now();
const res = await fetch(url, { method, headers, body });
// redirect: "manual" stops fetch from transparently following 3xx
// responses. The default `"follow"` makes tests assert against
// whatever the redirect target replies — which silently defeats
// CSRF-discipline checks like "/api/billing/checkout via GET must
// be rejected" if the endpoint ever started returning 302 →
// Stripe (fetch would follow to Stripe and the test would assert
// against Stripe's response, not ours). We test API endpoints, so
// a 3xx from any of our routes is a real signal that callers must
// see, not transparently swallow.
const res = await fetch(url, { method, headers, body, redirect: "manual" });
const raw = await res.text();
const latencyMs = performance.now() - t0;
let parsed: T | null = null;
Expand Down
20 changes: 14 additions & 6 deletions tests/e2e-prod/suites/02-authz.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,25 @@ test("authz: random API key without 'e2a_' prefix returns 401", async () => {
});

test("authz: 401 body does NOT leak hint about key validity", async () => {
// Both keys are bogus. r1 has the e2a_ prefix and the right length; r2
// is unrelated garbage. A correct auth gate must return byte-identical
// 401 responses for both so an attacker can't distinguish "key shape
// is right but invalid" from "completely malformed input."
const r1 = await client.get("/api/v1/agents", { apiKey: "e2a_00000000000000000000000000000000" });
const r2 = await client.get("/api/v1/agents", { apiKey: "garbage" });
if (r1.raw === r2.raw) {
info(SUITE, "401-uniform", "401 bodies are identical for malformed vs bogus-but-shaped — good (no oracle)");
} else {
info(
assert.equal(r1.status, 401, `r1 expected 401, got ${r1.status}`);
assert.equal(r2.status, 401, `r2 expected 401, got ${r2.status}`);
if (r1.raw !== r2.raw) {
fail(
SUITE,
"401-uniform",
`401 bodies differ between malformed and well-shaped bogus keys. Could be a weak side-channel oracle. Bodies: "${r1.raw.slice(0, 80)}" vs "${r2.raw.slice(0, 80)}"`,
"401-oracle",
`401 bodies differ between malformed and well-shaped bogus keys — side-channel oracle. Bodies: "${r1.raw.slice(0, 80)}" vs "${r2.raw.slice(0, 80)}"`,
);
assert.fail(
`401 bodies must be byte-identical to avoid leaking key-shape info; r1=${JSON.stringify(r1.raw.slice(0, 60))} r2=${JSON.stringify(r2.raw.slice(0, 60))}`,
);
}
info(SUITE, "401-uniform", "401 bodies are identical for malformed vs bogus-but-shaped — good (no oracle)");
});

test("authz: bogus 'from' on /send returns 4xx (cannot impersonate)", async () => {
Expand Down
19 changes: 17 additions & 2 deletions tests/e2e-prod/suites/03-concurrency.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,15 @@ test("concurrency: parallel PUT toggles converge to a final state (no 500)", asy
assert.equal(typeof final.body?.hitl_enabled, "boolean");
});

test("concurrency: parallel DELETE of the same agent — one succeeds, rest 403/4xx, never 500", async () => {
test("concurrency: parallel DELETE of the same agent is idempotent under contention (no 5xx)", async () => {
// Two valid designs exist:
// - First-writer-wins: one 2xx, rest 403/404 (anti-enumeration).
// - Idempotent delete: all 2xx (DELETE is conceptually a state assertion).
// Both are defensible. The non-negotiable invariant is "no 5xx under
// contention" — the test name was renamed from "one succeeds, rest 4xx"
// because the previous assert (>=1 success) accepted all 4 returning
// 2xx and only emitted info(). If you want to lock in first-writer-wins
// specifically, tighten the assertion to ok.length === 1.
const slug = uniqueSlug("del");
const c = await client.post<{ email: string }>("/api/v1/agents", {
body: { slug, name: "del", agent_mode: "local" },
Expand All @@ -117,8 +125,15 @@ test("concurrency: parallel DELETE of the same agent — one succeeds, rest 403/
const fivexx = results.filter((r) => r.status >= 500);
assert.equal(fivexx.length, 0, `no 5xx under parallel delete, got: ${results.map((r) => r.status).join(",")}`);
assert.ok(ok.length >= 1, `at least one delete should succeed, got ${ok.length}: ${results.map((r) => r.status).join(",")}`);
// Final state check: a GET after all the parallel deletes must say 403
// (anti-enumeration on deleted) — confirms the agent is actually gone
// regardless of which delete "won."
const after = await client.get(`/api/v1/agents/${encodeURIComponent(email)}`);
assert.equal(after.status, 403, `after parallel delete, GET expected 403, got ${after.status}`);
if (ok.length > 1) {
info(SUITE, "delete-idempotent", `${ok.length} parallel deletes all returned 2xx — endpoint is idempotent (fine, but worth noting)`);
info(SUITE, "delete-idempotent", `${ok.length} parallel deletes returned 2xx — server treats DELETE as idempotent`);
} else {
info(SUITE, "delete-first-writer-wins", `${ok.length} parallel delete succeeded, ${results.length - ok.length} got 4xx — first-writer-wins design`);
}
});

Expand Down
39 changes: 30 additions & 9 deletions tests/e2e-prod/suites/06-reliability.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,30 +74,51 @@ test("reliability: WS to own agent opens", async () => {
});

test("reliability: WS without api_key fails to open", async () => {
let opened = false;
try {
await openWS(wsUrl(sharedAgentEmail), null, 3_000);
fail(SUITE, "ws-unauth-open", "WS opened without auth — should have rejected");
const ws = await openWS(wsUrl(sharedAgentEmail), null, 3_000);
opened = true;
try { ws.close(); } catch {}
} catch {
info(SUITE, "ws-unauth-rejected", "WS without api_key correctly rejected");
// expected — rejection is correct
}
if (opened) {
fail(SUITE, "ws-unauth-open", "WS opened without auth — should have rejected");
assert.fail("WS without api_key opened successfully — auth gate broken");
}
info(SUITE, "ws-unauth-rejected", "WS without api_key correctly rejected");
});

test("reliability: WS with wrong api_key fails to open", async () => {
let opened = false;
try {
await openWS(wsUrl(sharedAgentEmail), "e2a_00000000000000000000000000000000", 3_000);
fail(SUITE, "ws-badkey-open", "WS opened with bogus key — should have rejected");
const ws = await openWS(wsUrl(sharedAgentEmail), "e2a_00000000000000000000000000000000", 3_000);
opened = true;
try { ws.close(); } catch {}
} catch {
info(SUITE, "ws-badkey-rejected", "WS with bogus api_key correctly rejected");
// expected
}
if (opened) {
fail(SUITE, "ws-badkey-open", "WS opened with bogus key — should have rejected");
assert.fail("WS with bogus api_key opened successfully — auth gate broken");
}
info(SUITE, "ws-badkey-rejected", "WS with bogus api_key correctly rejected");
});

test("reliability: WS to non-owned agent fails to open", async () => {
let opened = false;
try {
await openWS(wsUrl("nobody@example.com"), client.env.apiKey, 3_000);
fail(SUITE, "ws-cross-tenant", "WS opened against non-owned agent — cross-tenant break");
const ws = await openWS(wsUrl("nobody@example.com"), client.env.apiKey, 3_000);
opened = true;
try { ws.close(); } catch {}
} catch {
info(SUITE, "ws-cross-tenant-rejected", "WS to non-owned agent correctly rejected");
// expected
}
if (opened) {
fail(SUITE, "ws-cross-tenant", "WS opened against non-owned agent — cross-tenant break");
assert.fail("WS opened against an unowned agent — cross-tenant guard broken");
}
info(SUITE, "ws-cross-tenant-rejected", "WS to non-owned agent correctly rejected");
});

test("reliability: WS reconnect cycle (open → close → open) works", async () => {
Expand Down
5 changes: 4 additions & 1 deletion tests/e2e-prod/suites/10-domains.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ test("domains: register returns 201 with DNS records + zero-counted Domain", asy
);
if (r.status !== 201) {
fail(SUITE, "register-non-201", `expected 201, got ${r.status}: ${r.raw.slice(0, 240)}`);
return;
// Hard assert — the primary register test of the domains suite must
// fail loudly if POST /domains regresses, not just record a JSON
// finding behind a green node:test dot.
assert.fail(`POST /api/v1/domains expected 201, got ${r.status}: ${r.raw.slice(0, 200)}`);
}
track("domain", domain);
assert.equal(r.body?.domain, domain, "echoed domain matches");
Expand Down
13 changes: 11 additions & 2 deletions tests/e2e-prod/suites/11-messaging.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ test("messaging: pagination roundtrip — limit=3 then follow next_token; no dup
const overlap = [...ids1].filter((id) => ids2.has(id));
if (overlap.length > 0) {
fail(SUITE, "pagination-duplicate-ids", `${overlap.length} ids appear on both pages: ${overlap.slice(0, 5).join(",")}`);
// Cleanup before throwing so we don't leak HITL-held messages.
for (const id of queued) await client.post(`/api/v1/messages/${id}/reject`, { body: { reason: "e2e pagination cleanup" } });
assert.fail(`pagination roundtrip returned ${overlap.length} duplicate id(s) — pagination is broken`);
}
// Cleanup.
for (const id of queued) await client.post(`/api/v1/messages/${id}/reject`, { body: { reason: "e2e pagination cleanup" } });
Expand Down Expand Up @@ -90,9 +93,15 @@ test("messaging: Idempotency-Key replay — same key+body returns same message_i
"idem-key-not-replayed",
`same Idempotency-Key + same body yielded different message_id: ${firstId} → ${r2.body?.message_id}. Server should replay original response, not re-queue.`,
);
} else {
info(SUITE, "idem-key-replayed", `Idempotency-Key replay correct: same key+body → same message_id ${firstId}`);
// Hard assert — Idempotency-Key semantics are a financial-stakes
// contract (double-send protection on approve). Don't paper over.
await client.post(`/api/v1/messages/${firstId}/reject`, { body: { reason: "e2e idem cleanup pre-fail" } });
if (r2.body?.message_id) {
await client.post(`/api/v1/messages/${r2.body.message_id}/reject`, { body: { reason: "e2e idem cleanup pre-fail" } });
}
assert.fail(`Idempotency-Key replay broken: ${firstId} !== ${r2.body?.message_id}`);
}
info(SUITE, "idem-key-replayed", `Idempotency-Key replay correct: same key+body → same message_id ${firstId}`);

// Different body, same key → 422 per spec.
const r3 = await client.post("/api/v1/send", {
Expand Down
8 changes: 6 additions & 2 deletions tests/e2e-prod/suites/13-billing-surface.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,17 @@ test("billing: GET /api/billing/health returns 200", async () => {
test("billing: GET /api/billing/checkout is rejected (CSRF discipline — POST only)", async () => {
// Under SameSite=Lax, a top-level GET navigation forwards the session cookie.
// The billing endpoints were intentionally moved to POST-only as a CSRF defense.
// Harness uses redirect:"manual" (client.ts) so any 200 or 3xx here is the
// smoking-gun leak: it means a top-level <a> click could mint a real Stripe
// checkout session for a victim's account.
const r = await client.get("/api/billing/checkout");
if (r.status === 200 || (r.status >= 300 && r.status < 400)) {
fail(
SUITE,
"billing-checkout-get-leak",
`GET /api/billing/checkout returned ${r.status} — should be 405/404 to prevent CSRF via top-level navigation. Body: ${r.raw.slice(0, 200)}`,
);
return;
assert.fail(`GET /api/billing/checkout must reject with 4xx; got ${r.status}`);
}
// 405 / 404 / 401 are all acceptable rejection codes here.
assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`);
Expand All @@ -124,7 +127,7 @@ test("billing: GET /api/billing/portal is rejected (CSRF discipline — POST onl
"billing-portal-get-leak",
`GET /api/billing/portal returned ${r.status} — should be 405/404 to prevent CSRF. Body: ${r.raw.slice(0, 200)}`,
);
return;
assert.fail(`GET /api/billing/portal must reject with 4xx; got ${r.status}`);
}
assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`);
});
Expand All @@ -142,6 +145,7 @@ test("billing: POST /api/billing/checkout without auth returns 401", async () =>
"billing-checkout-no-auth-accepted",
`POST /api/billing/checkout with no creds returned ${r.status} — must require auth`,
);
assert.fail(`POST /api/billing/checkout without auth must reject with 4xx; got ${r.status}`);
}
assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`);
});
Expand Down
Loading