Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/e2e-prod/suites/01-basic.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { test, before, after } from "node:test";
import { test, after } from "node:test";
import assert from "node:assert/strict";
import { ApiClient } from "../harness/client.ts";
import { cleanup, track } from "../harness/cleanup.ts";
Expand Down
8 changes: 6 additions & 2 deletions tests/e2e-prod/suites/02-authz.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,14 @@ test("authz: PUT /agents/<email-i-dont-own> returns 403 or 4xx", async () => {

test("authz: DELETE /agents/<email-i-dont-own> returns 403 or 4xx (no cross-tenant delete)", async () => {
const r = await client.delete(`/api/v1/agents/${encodeURIComponent("nobody@example.com")}`);
assert.ok(r.status === 403 || (r.status >= 400 && r.status < 500), `expected 4xx, got ${r.status}`);
// assert.ok throws on a non-4xx response — the explicit 200/204 check
// that used to live below was dead code (unreachable past the assert).
// The fail-tag is preserved for triage clarity if the assert ever fires.
if (r.status === 200 || r.status === 204) {
fail(SUITE, "cross-tenant-delete", "CRITICAL: deleted an agent we don't own");
fail(SUITE, "cross-tenant-delete", `CRITICAL: deleted an agent we don't own; got ${r.status}`);
assert.fail(`cross-tenant DELETE returned ${r.status} — agent we don't own was deleted`);
}
assert.ok(r.status === 403 || (r.status >= 400 && r.status < 500), `expected 4xx, got ${r.status}`);
});

test("authz: GET /agents/<email>/messages of unowned agent returns 4xx", async () => {
Expand Down
7 changes: 6 additions & 1 deletion tests/e2e-prod/suites/08-mcp.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@ const SUITE = "08-mcp";
const mcp = new StdioMcpClient();

before(async () => {
await mcp.start("node", ["/Users/joshzhang/Desktop/e2a/mcp/dist/index.js"], {
// Default to the repo-relative dist path so the suite works for any
// contributor / CI runner. Hardcoded absolute path was unportable.
// Override with E2A_MCP_DIST if the dist lives elsewhere.
const mcpDist =
process.env.E2A_MCP_DIST ?? new URL("../../../mcp/dist/index.js", import.meta.url).pathname;
await mcp.start("node", [mcpDist], {
E2A_API_KEY: apiClient.env.apiKey,
E2A_BASE_URL: apiClient.env.apiUrl,
E2A_AGENT_EMAIL: apiClient.env.primaryAgentEmail,
Expand Down
16 changes: 6 additions & 10 deletions tests/e2e-prod/suites/09-postfix.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,16 +105,12 @@ test("postfix #7: bare LF in subject is also rejected (no carriage return)", asy
assert.equal(r.status, 400, `expected 400 for bare LF, got ${r.status}: ${r.raw.slice(0, 200)}`);
});

test("postfix #1 #2: /send 429 includes Retry-After header (probed via invalid payloads + 1 quota-hit guard)", async () => {
// We can't probe the send rate limit on prod without queueing real HITL
// notifications. Instead we verify the header CONTRACT: the docs (and
// OpenAPI) now say 429 carries Retry-After. We'll skip the active probe.
info(
SUITE,
"retry-after-probe-skipped",
"skipping active 60-send rate-limit probe to avoid triggering HITL notification emails — see issue #146",
);
});
// Skipped: actively probing the /send rate limit queues 60+ real HITL
// notifications to the owner inbox (see auto-memory feedback note). The
// /agents Retry-After test below covers the header CONTRACT via a cheaper
// path that doesn't fan out to SMTP. Marked as test.skip so it doesn't
// pollute the green-pass count.
test.skip("postfix #1 #2: /send 429 includes Retry-After header (skipped — would queue real HITL notifications)", async () => {});

test("postfix #1 #2: /agents 429 includes Retry-After header (active probe — does NOT send mail)", async () => {
// Agent creation is a pure CRUD op; failing creates don't fan out to SMTP.
Expand Down
29 changes: 22 additions & 7 deletions tests/e2e-prod/suites/11-messaging.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -226,20 +226,35 @@ test("messaging: reply to bogus message ID returns 404", async () => {
});

test("messaging: reply with empty body returns 400", async () => {
// Find any message we own to attempt reply against; if none, skip.
const list = await client.get<{ messages: Array<{ id: string; direction?: string }> }>("/api/v1/messages", { query: { limit: 5 } });
const candidate = list.body?.messages?.find((m) => m.direction === "inbound") ?? list.body?.messages?.[0];
// /reply requires the target message be inbound and belong to the
// agent in the path. The previous version fell back to any message
// including outbound, which routinely 404'd before the 400-missing-
// body check ran — so the test passed without ever exercising the
// "empty body returns 400" branch. Now: pull from the agent-scoped
// inbound listing, skip cleanly if none exist.
const email = client.env.primaryAgentEmail;
const list = await client.get<{ messages: Array<{ id: string; direction?: string }> }>(
`/api/v1/agents/${encodeURIComponent(email)}/messages`,
{ query: { limit: 5, direction: "inbound" } },
);
const candidate = list.body?.messages?.find((m) => m.direction === "inbound" || m.direction === undefined);
if (!candidate) {
info(SUITE, "reply-empty-skipped", "no messages in inbox to attempt reply against");
info(SUITE, "reply-empty-skipped", `no inbound messages on ${email} — cannot exercise empty-body reply check`);
return;
}
const email = client.env.primaryAgentEmail;
const r = await client.post(
`/api/v1/agents/${encodeURIComponent(email)}/messages/${encodeURIComponent(candidate.id)}/reply`,
{ body: {} },
);
// Spec: 400 missing body, OR 404 if message isn't owned by THIS agent.
assert.ok(r.status >= 400 && r.status < 500, `expected 4xx (400 or 404), got ${r.status}: ${r.raw.slice(0, 200)}`);
// Now that we picked from the agent-scoped inbound list, 400 is the
// expected response (missing body). 404 here would mean the inbound
// listing returned a stale id — flag it informationally rather than
// assert away a different bug.
if (r.status === 404) {
info(SUITE, "reply-empty-404-on-listed-msg", `inbound list returned ${candidate.id} but /reply 404'd — possible listing/storage skew`);
return;
}
assert.equal(r.status, 400, `expected 400 (empty body) on owned inbound message, got ${r.status}: ${r.raw.slice(0, 200)}`);
});

test("messaging: /messages search filters — surface what's supported", async () => {
Expand Down
6 changes: 5 additions & 1 deletion tests/e2e-prod/suites/12-mcp-extended.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ const SUITE = "12-mcp-extended";
const mcp = new StdioMcpClient();

before(async () => {
await mcp.start("node", ["/Users/joshzhang/Desktop/e2a/mcp/dist/index.js"], {
// Default to the repo-relative dist path so the suite works for any
// contributor / CI runner. Override with E2A_MCP_DIST if needed.
const mcpDist =
process.env.E2A_MCP_DIST ?? new URL("../../../mcp/dist/index.js", import.meta.url).pathname;
await mcp.start("node", [mcpDist], {
E2A_API_KEY: apiClient.env.apiKey,
E2A_BASE_URL: apiClient.env.apiUrl,
E2A_AGENT_EMAIL: apiClient.env.primaryAgentEmail,
Expand Down
14 changes: 11 additions & 3 deletions tests/e2e-prod/suites/13-billing-surface.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,20 @@ test("billing: usage.agents counts roughly match the actual /agents list", async
const agents = await client.get<{ agents: unknown[] }>("/api/v1/agents");
const actual = agents.body?.agents?.length ?? 0;
const reported = limits.body.usage.agents!;
// Allow ±1 drift for in-flight resources, but a big mismatch is a bug.
if (Math.abs(reported - actual) > 1) {
const drift = Math.abs(reported - actual);
// ±1 is benign timing under 1 RPS concurrent creates (the two HTTP calls
// are not atomic). Wider drift signals a real counter-staleness bug — the
// limits primitive is supposed to read live, not cache counts.
if (drift > 1) {
info(
SUITE,
"usage-agents-drift",
`usage.agents=${reported} but /agents list has ${actual} — drift larger than ±1 may indicate counter is stale`,
`usage.agents=${reported}, /agents list=${actual}, drift=${drift} — wider than expected timing race`,
);
// Hard-fail past a meaningful threshold; anything inside it is noise.
assert.ok(
drift <= 3,
`usage.agents counter drift ${drift} (reported=${reported}, actual=${actual}) — counter is stale or broken`,
);
} else {
info(SUITE, "usage-agents-consistent", `usage.agents=${reported}, /agents list=${actual} — within ±1`);
Expand Down
Loading