Mnexa-AI · jiashuoz · May 28, 2026 · May 27, 2026
@@ -1,4 +1,4 @@
-import { test, before, after } from "node:test";
+import { test, after } from "node:test";
 import assert from "node:assert/strict";
 import { ApiClient } from "../harness/client.ts";
 import { cleanup, track } from "../harness/cleanup.ts";

@@ -76,10 +76,14 @@ test("authz: PUT /agents/<email-i-dont-own> returns 403 or 4xx", async () => {
 
 test("authz: DELETE /agents/<email-i-dont-own> returns 403 or 4xx (no cross-tenant delete)", async () => {
   const r = await client.delete(`/api/v1/agents/${encodeURIComponent("nobody@example.com")}`);
-  assert.ok(r.status === 403 || (r.status >= 400 && r.status < 500), `expected 4xx, got ${r.status}`);
+  // assert.ok throws on a non-4xx response — the explicit 200/204 check
+  // that used to live below was dead code (unreachable past the assert).
+  // The fail-tag is preserved for triage clarity if the assert ever fires.
   if (r.status === 200 || r.status === 204) {
-    fail(SUITE, "cross-tenant-delete", "CRITICAL: deleted an agent we don't own");
+    fail(SUITE, "cross-tenant-delete", `CRITICAL: deleted an agent we don't own; got ${r.status}`);
+    assert.fail(`cross-tenant DELETE returned ${r.status} — agent we don't own was deleted`);
   }
+  assert.ok(r.status === 403 || (r.status >= 400 && r.status < 500), `expected 4xx, got ${r.status}`);
 });
 
 test("authz: GET /agents/<email>/messages of unowned agent returns 4xx", async () => {

@@ -11,7 +11,12 @@ const SUITE = "08-mcp";
 const mcp = new StdioMcpClient();
 
 before(async () => {
-  await mcp.start("node", ["/Users/joshzhang/Desktop/e2a/mcp/dist/index.js"], {
+  // Default to the repo-relative dist path so the suite works for any
+  // contributor / CI runner. Hardcoded absolute path was unportable.
+  // Override with E2A_MCP_DIST if the dist lives elsewhere.
+  const mcpDist =
+    process.env.E2A_MCP_DIST ?? new URL("../../../mcp/dist/index.js", import.meta.url).pathname;
+  await mcp.start("node", [mcpDist], {
     E2A_API_KEY: apiClient.env.apiKey,
     E2A_BASE_URL: apiClient.env.apiUrl,
     E2A_AGENT_EMAIL: apiClient.env.primaryAgentEmail,

@@ -105,16 +105,12 @@ test("postfix #7: bare LF in subject is also rejected (no carriage return)", asy
   assert.equal(r.status, 400, `expected 400 for bare LF, got ${r.status}: ${r.raw.slice(0, 200)}`);
 });
 
-test("postfix #1 #2: /send 429 includes Retry-After header (probed via invalid payloads + 1 quota-hit guard)", async () => {
-  // We can't probe the send rate limit on prod without queueing real HITL
-  // notifications. Instead we verify the header CONTRACT: the docs (and
-  // OpenAPI) now say 429 carries Retry-After. We'll skip the active probe.
-  info(
-    SUITE,
-    "retry-after-probe-skipped",
-    "skipping active 60-send rate-limit probe to avoid triggering HITL notification emails — see issue #146",
-  );
-});
+// Skipped: actively probing the /send rate limit queues 60+ real HITL
+// notifications to the owner inbox (see auto-memory feedback note). The
+// /agents Retry-After test below covers the header CONTRACT via a cheaper
+// path that doesn't fan out to SMTP. Marked as test.skip so it doesn't
+// pollute the green-pass count.
+test.skip("postfix #1 #2: /send 429 includes Retry-After header (skipped — would queue real HITL notifications)", async () => {});
 
 test("postfix #1 #2: /agents 429 includes Retry-After header (active probe — does NOT send mail)", async () => {
   // Agent creation is a pure CRUD op; failing creates don't fan out to SMTP.

@@ -226,20 +226,35 @@ test("messaging: reply to bogus message ID returns 404", async () => {
 });
 
 test("messaging: reply with empty body returns 400", async () => {
-  // Find any message we own to attempt reply against; if none, skip.
-  const list = await client.get<{ messages: Array<{ id: string; direction?: string }> }>("/api/v1/messages", { query: { limit: 5 } });
-  const candidate = list.body?.messages?.find((m) => m.direction === "inbound") ?? list.body?.messages?.[0];
+  // /reply requires the target message be inbound and belong to the
+  // agent in the path. The previous version fell back to any message
+  // including outbound, which routinely 404'd before the 400-missing-
+  // body check ran — so the test passed without ever exercising the
+  // "empty body returns 400" branch. Now: pull from the agent-scoped
+  // inbound listing, skip cleanly if none exist.
+  const email = client.env.primaryAgentEmail;
+  const list = await client.get<{ messages: Array<{ id: string; direction?: string }> }>(
+    `/api/v1/agents/${encodeURIComponent(email)}/messages`,
+    { query: { limit: 5, direction: "inbound" } },
+  );
+  const candidate = list.body?.messages?.find((m) => m.direction === "inbound" || m.direction === undefined);
   if (!candidate) {
-    info(SUITE, "reply-empty-skipped", "no messages in inbox to attempt reply against");
+    info(SUITE, "reply-empty-skipped", `no inbound messages on ${email} — cannot exercise empty-body reply check`);
     return;
   }
-  const email = client.env.primaryAgentEmail;
   const r = await client.post(
     `/api/v1/agents/${encodeURIComponent(email)}/messages/${encodeURIComponent(candidate.id)}/reply`,
     { body: {} },
   );
-  // Spec: 400 missing body, OR 404 if message isn't owned by THIS agent.
-  assert.ok(r.status >= 400 && r.status < 500, `expected 4xx (400 or 404), got ${r.status}: ${r.raw.slice(0, 200)}`);
+  // Now that we picked from the agent-scoped inbound list, 400 is the
+  // expected response (missing body). 404 here would mean the inbound
+  // listing returned a stale id — flag it informationally rather than
+  // assert away a different bug.
+  if (r.status === 404) {
+    info(SUITE, "reply-empty-404-on-listed-msg", `inbound list returned ${candidate.id} but /reply 404'd — possible listing/storage skew`);
+    return;
+  }
+  assert.equal(r.status, 400, `expected 400 (empty body) on owned inbound message, got ${r.status}: ${r.raw.slice(0, 200)}`);
 });
 
 test("messaging: /messages search filters — surface what's supported", async () => {

@@ -11,7 +11,11 @@ const SUITE = "12-mcp-extended";
 const mcp = new StdioMcpClient();
 
 before(async () => {
-  await mcp.start("node", ["/Users/joshzhang/Desktop/e2a/mcp/dist/index.js"], {
+  // Default to the repo-relative dist path so the suite works for any
+  // contributor / CI runner. Override with E2A_MCP_DIST if needed.
+  const mcpDist =
+    process.env.E2A_MCP_DIST ?? new URL("../../../mcp/dist/index.js", import.meta.url).pathname;
+  await mcp.start("node", [mcpDist], {
     E2A_API_KEY: apiClient.env.apiKey,
     E2A_BASE_URL: apiClient.env.apiUrl,
     E2A_AGENT_EMAIL: apiClient.env.primaryAgentEmail,

@@ -74,12 +74,20 @@ test("billing: usage.agents counts roughly match the actual /agents list", async
   const agents = await client.get<{ agents: unknown[] }>("/api/v1/agents");
   const actual = agents.body?.agents?.length ?? 0;
   const reported = limits.body.usage.agents!;
-  // Allow ±1 drift for in-flight resources, but a big mismatch is a bug.
-  if (Math.abs(reported - actual) > 1) {
+  const drift = Math.abs(reported - actual);
+  // ±1 is benign timing under 1 RPS concurrent creates (the two HTTP calls
+  // are not atomic). Wider drift signals a real counter-staleness bug — the
+  // limits primitive is supposed to read live, not cache counts.
+  if (drift > 1) {
     info(
       SUITE,
       "usage-agents-drift",
-      `usage.agents=${reported} but /agents list has ${actual} — drift larger than ±1 may indicate counter is stale`,
+      `usage.agents=${reported}, /agents list=${actual}, drift=${drift} — wider than expected timing race`,
+    );
+    // Hard-fail past a meaningful threshold; anything inside it is noise.
+    assert.ok(
+      drift <= 3,
+      `usage.agents counter drift ${drift} (reported=${reported}, actual=${actual}) — counter is stale or broken`,
     );
   } else {
     info(SUITE, "usage-agents-consistent", `usage.agents=${reported}, /agents list=${actual} — within ±1`);