Mnexa-AI · jiashuoz · May 28, 2026 · May 27, 2026
@@ -61,7 +61,16 @@ export class ApiClient {
       headers["Content-Type"] = headers["Content-Type"] ?? "application/json";
     }
     const t0 = performance.now();
-    const res = await fetch(url, { method, headers, body });
+    // redirect: "manual" stops fetch from transparently following 3xx
+    // responses. The default `"follow"` makes tests assert against
+    // whatever the redirect target replies — which silently defeats
+    // CSRF-discipline checks like "/api/billing/checkout via GET must
+    // be rejected" if the endpoint ever started returning 302 →
+    // Stripe (fetch would follow to Stripe and the test would assert
+    // against Stripe's response, not ours). We test API endpoints, so
+    // a 3xx from any of our routes is a real signal that callers must
+    // see, not transparently swallow.
+    const res = await fetch(url, { method, headers, body, redirect: "manual" });
     const raw = await res.text();
     const latencyMs = performance.now() - t0;
     let parsed: T | null = null;

@@ -37,17 +37,25 @@ test("authz: random API key without 'e2a_' prefix returns 401", async () => {
 });
 
 test("authz: 401 body does NOT leak hint about key validity", async () => {
+  // Both keys are bogus. r1 has the e2a_ prefix and the right length; r2
+  // is unrelated garbage. A correct auth gate must return byte-identical
+  // 401 responses for both so an attacker can't distinguish "key shape
+  // is right but invalid" from "completely malformed input."
   const r1 = await client.get("/api/v1/agents", { apiKey: "e2a_00000000000000000000000000000000" });
   const r2 = await client.get("/api/v1/agents", { apiKey: "garbage" });
-  if (r1.raw === r2.raw) {
-    info(SUITE, "401-uniform", "401 bodies are identical for malformed vs bogus-but-shaped — good (no oracle)");
-  } else {
-    info(
+  assert.equal(r1.status, 401, `r1 expected 401, got ${r1.status}`);
+  assert.equal(r2.status, 401, `r2 expected 401, got ${r2.status}`);
+  if (r1.raw !== r2.raw) {
+    fail(
       SUITE,
-      "401-uniform",
-      `401 bodies differ between malformed and well-shaped bogus keys. Could be a weak side-channel oracle. Bodies: "${r1.raw.slice(0, 80)}" vs "${r2.raw.slice(0, 80)}"`,
+      "401-oracle",
+      `401 bodies differ between malformed and well-shaped bogus keys — side-channel oracle. Bodies: "${r1.raw.slice(0, 80)}" vs "${r2.raw.slice(0, 80)}"`,
+    );
+    assert.fail(
+      `401 bodies must be byte-identical to avoid leaking key-shape info; r1=${JSON.stringify(r1.raw.slice(0, 60))} r2=${JSON.stringify(r2.raw.slice(0, 60))}`,
     );
   }
+  info(SUITE, "401-uniform", "401 bodies are identical for malformed vs bogus-but-shaped — good (no oracle)");
 });
 
 test("authz: bogus 'from' on /send returns 4xx (cannot impersonate)", async () => {

@@ -101,7 +101,15 @@ test("concurrency: parallel PUT toggles converge to a final state (no 500)", asy
   assert.equal(typeof final.body?.hitl_enabled, "boolean");
 });
 
-test("concurrency: parallel DELETE of the same agent — one succeeds, rest 403/4xx, never 500", async () => {
+test("concurrency: parallel DELETE of the same agent is idempotent under contention (no 5xx)", async () => {
+  // Two valid designs exist:
+  //   - First-writer-wins: one 2xx, rest 403/404 (anti-enumeration).
+  //   - Idempotent delete: all 2xx (DELETE is conceptually a state assertion).
+  // Both are defensible. The non-negotiable invariant is "no 5xx under
+  // contention" — the test name was renamed from "one succeeds, rest 4xx"
+  // because the previous assert (>=1 success) accepted all 4 returning
+  // 2xx and only emitted info(). If you want to lock in first-writer-wins
+  // specifically, tighten the assertion to ok.length === 1.
   const slug = uniqueSlug("del");
   const c = await client.post<{ email: string }>("/api/v1/agents", {
     body: { slug, name: "del", agent_mode: "local" },
@@ -117,8 +125,15 @@ test("concurrency: parallel DELETE of the same agent — one succeeds, rest 403/
   const fivexx = results.filter((r) => r.status >= 500);
   assert.equal(fivexx.length, 0, `no 5xx under parallel delete, got: ${results.map((r) => r.status).join(",")}`);
   assert.ok(ok.length >= 1, `at least one delete should succeed, got ${ok.length}: ${results.map((r) => r.status).join(",")}`);
+  // Final state check: a GET after all the parallel deletes must say 403
+  // (anti-enumeration on deleted) — confirms the agent is actually gone
+  // regardless of which delete "won."
+  const after = await client.get(`/api/v1/agents/${encodeURIComponent(email)}`);
+  assert.equal(after.status, 403, `after parallel delete, GET expected 403, got ${after.status}`);
   if (ok.length > 1) {
-    info(SUITE, "delete-idempotent", `${ok.length} parallel deletes all returned 2xx — endpoint is idempotent (fine, but worth noting)`);
+    info(SUITE, "delete-idempotent", `${ok.length} parallel deletes returned 2xx — server treats DELETE as idempotent`);
+  } else {
+    info(SUITE, "delete-first-writer-wins", `${ok.length} parallel delete succeeded, ${results.length - ok.length} got 4xx — first-writer-wins design`);
   }
 });
 

@@ -74,30 +74,51 @@ test("reliability: WS to own agent opens", async () => {
 });
 
 test("reliability: WS without api_key fails to open", async () => {
+  let opened = false;
   try {
-    await openWS(wsUrl(sharedAgentEmail), null, 3_000);
-    fail(SUITE, "ws-unauth-open", "WS opened without auth — should have rejected");
+    const ws = await openWS(wsUrl(sharedAgentEmail), null, 3_000);
+    opened = true;
+    try { ws.close(); } catch {}
   } catch {
-    info(SUITE, "ws-unauth-rejected", "WS without api_key correctly rejected");
+    // expected — rejection is correct
+  }
+  if (opened) {
+    fail(SUITE, "ws-unauth-open", "WS opened without auth — should have rejected");
+    assert.fail("WS without api_key opened successfully — auth gate broken");
   }
+  info(SUITE, "ws-unauth-rejected", "WS without api_key correctly rejected");
 });
 
 test("reliability: WS with wrong api_key fails to open", async () => {
+  let opened = false;
   try {
-    await openWS(wsUrl(sharedAgentEmail), "e2a_00000000000000000000000000000000", 3_000);
-    fail(SUITE, "ws-badkey-open", "WS opened with bogus key — should have rejected");
+    const ws = await openWS(wsUrl(sharedAgentEmail), "e2a_00000000000000000000000000000000", 3_000);
+    opened = true;
+    try { ws.close(); } catch {}
   } catch {
-    info(SUITE, "ws-badkey-rejected", "WS with bogus api_key correctly rejected");
+    // expected
   }
+  if (opened) {
+    fail(SUITE, "ws-badkey-open", "WS opened with bogus key — should have rejected");
+    assert.fail("WS with bogus api_key opened successfully — auth gate broken");
+  }
+  info(SUITE, "ws-badkey-rejected", "WS with bogus api_key correctly rejected");
 });
 
 test("reliability: WS to non-owned agent fails to open", async () => {
+  let opened = false;
   try {
-    await openWS(wsUrl("nobody@example.com"), client.env.apiKey, 3_000);
-    fail(SUITE, "ws-cross-tenant", "WS opened against non-owned agent — cross-tenant break");
+    const ws = await openWS(wsUrl("nobody@example.com"), client.env.apiKey, 3_000);
+    opened = true;
+    try { ws.close(); } catch {}
   } catch {
-    info(SUITE, "ws-cross-tenant-rejected", "WS to non-owned agent correctly rejected");
+    // expected
+  }
+  if (opened) {
+    fail(SUITE, "ws-cross-tenant", "WS opened against non-owned agent — cross-tenant break");
+    assert.fail("WS opened against an unowned agent — cross-tenant guard broken");
   }
+  info(SUITE, "ws-cross-tenant-rejected", "WS to non-owned agent correctly rejected");
 });
 
 test("reliability: WS reconnect cycle (open → close → open) works", async () => {

@@ -28,7 +28,10 @@ test("domains: register returns 201 with DNS records + zero-counted Domain", asy
   );
   if (r.status !== 201) {
     fail(SUITE, "register-non-201", `expected 201, got ${r.status}: ${r.raw.slice(0, 240)}`);
-    return;
+    // Hard assert — the primary register test of the domains suite must
+    // fail loudly if POST /domains regresses, not just record a JSON
+    // finding behind a green node:test dot.
+    assert.fail(`POST /api/v1/domains expected 201, got ${r.status}: ${r.raw.slice(0, 200)}`);
   }
   track("domain", domain);
   assert.equal(r.body?.domain, domain, "echoed domain matches");

@@ -62,6 +62,9 @@ test("messaging: pagination roundtrip — limit=3 then follow next_token; no dup
   const overlap = [...ids1].filter((id) => ids2.has(id));
   if (overlap.length > 0) {
     fail(SUITE, "pagination-duplicate-ids", `${overlap.length} ids appear on both pages: ${overlap.slice(0, 5).join(",")}`);
+    // Cleanup before throwing so we don't leak HITL-held messages.
+    for (const id of queued) await client.post(`/api/v1/messages/${id}/reject`, { body: { reason: "e2e pagination cleanup" } });
+    assert.fail(`pagination roundtrip returned ${overlap.length} duplicate id(s) — pagination is broken`);
   }
   // Cleanup.
   for (const id of queued) await client.post(`/api/v1/messages/${id}/reject`, { body: { reason: "e2e pagination cleanup" } });
@@ -90,9 +93,15 @@ test("messaging: Idempotency-Key replay — same key+body returns same message_i
       "idem-key-not-replayed",
       `same Idempotency-Key + same body yielded different message_id: ${firstId} → ${r2.body?.message_id}. Server should replay original response, not re-queue.`,
     );
-  } else {
-    info(SUITE, "idem-key-replayed", `Idempotency-Key replay correct: same key+body → same message_id ${firstId}`);
+    // Hard assert — Idempotency-Key semantics are a financial-stakes
+    // contract (double-send protection on approve). Don't paper over.
+    await client.post(`/api/v1/messages/${firstId}/reject`, { body: { reason: "e2e idem cleanup pre-fail" } });
+    if (r2.body?.message_id) {
+      await client.post(`/api/v1/messages/${r2.body.message_id}/reject`, { body: { reason: "e2e idem cleanup pre-fail" } });
+    }
+    assert.fail(`Idempotency-Key replay broken: ${firstId} !== ${r2.body?.message_id}`);
   }
+  info(SUITE, "idem-key-replayed", `Idempotency-Key replay correct: same key+body → same message_id ${firstId}`);
 
   // Different body, same key → 422 per spec.
   const r3 = await client.post("/api/v1/send", {

@@ -103,14 +103,17 @@ test("billing: GET /api/billing/health returns 200", async () => {
 test("billing: GET /api/billing/checkout is rejected (CSRF discipline — POST only)", async () => {
   // Under SameSite=Lax, a top-level GET navigation forwards the session cookie.
   // The billing endpoints were intentionally moved to POST-only as a CSRF defense.
+  // Harness uses redirect:"manual" (client.ts) so any 200 or 3xx here is the
+  // smoking-gun leak: it means a top-level <a> click could mint a real Stripe
+  // checkout session for a victim's account.
   const r = await client.get("/api/billing/checkout");
   if (r.status === 200 || (r.status >= 300 && r.status < 400)) {
     fail(
       SUITE,
       "billing-checkout-get-leak",
       `GET /api/billing/checkout returned ${r.status} — should be 405/404 to prevent CSRF via top-level navigation. Body: ${r.raw.slice(0, 200)}`,
     );
-    return;
+    assert.fail(`GET /api/billing/checkout must reject with 4xx; got ${r.status}`);
   }
   // 405 / 404 / 401 are all acceptable rejection codes here.
   assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`);
@@ -124,7 +127,7 @@ test("billing: GET /api/billing/portal is rejected (CSRF discipline — POST onl
       "billing-portal-get-leak",
       `GET /api/billing/portal returned ${r.status} — should be 405/404 to prevent CSRF. Body: ${r.raw.slice(0, 200)}`,
     );
-    return;
+    assert.fail(`GET /api/billing/portal must reject with 4xx; got ${r.status}`);
   }
   assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`);
 });
@@ -142,6 +145,7 @@ test("billing: POST /api/billing/checkout without auth returns 401", async () =>
       "billing-checkout-no-auth-accepted",
       `POST /api/billing/checkout with no creds returned ${r.status} — must require auth`,
     );
+    assert.fail(`POST /api/billing/checkout without auth must reject with 4xx; got ${r.status}`);
   }
   assert.ok(r.status >= 400 && r.status < 500, `expected 4xx, got ${r.status}: ${r.raw.slice(0, 200)}`);
 });