Skip to content
18 changes: 12 additions & 6 deletions src/core/__tests__/trigger-auth.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
let server: ReturnType<typeof Bun.serve>;
let baseUrl: string;

beforeAll(() => {
beforeAll(async () => {
// Back up the existing mcp.yaml so we can restore it after tests
if (existsSync(mcpConfigPath)) {
originalMcpYaml = readFileSync(mcpConfigPath, "utf-8");
Expand All @@ -38,11 +38,8 @@
mkdirSync("config", { recursive: true });
writeFileSync(mcpConfigPath, YAML.stringify(mcpConfig), "utf-8");

// Start server with a random port
server = startServer({ name: "test", port: 0, role: "base" } as never, Date.now());
baseUrl = `http://localhost:${server.port}`;

// Wire trigger deps with a mock runtime
// Wire trigger deps before starting the server so the /trigger
// handler is ready on the first request.
setTriggerDeps({
runtime: {
handleMessage: async () => ({
Expand All @@ -52,6 +49,15 @@
}),
} as never,
});

// Start server after deps are wired. Use server.url (Bun guarantees
// it is populated once serve() returns) instead of manually building
// the URL from server.port, which can race in CI environments.
server = startServer({ name: "test", port: 0, role: "base" } as never, Date.now());
baseUrl = server.url.origin;

// Ensure the server is accepting connections before tests run.
await fetch(`${baseUrl}/health`);
});

afterAll(() => {
Expand All @@ -70,7 +76,7 @@
headers: { "Content-Type": "application/json" },
body: triggerBody,
});
expect(res.status).toBe(401);

Check failure on line 79 in src/core/__tests__/trigger-auth.test.ts

View workflow job for this annotation

GitHub Actions / test

error: expect(received).toBe(expected)

Expected: 401 Received: undefined at <anonymous> (/home/runner/work/phantom/phantom/src/core/__tests__/trigger-auth.test.ts:79:22)
const json = (await res.json()) as { status: string; message: string };
expect(json.message).toContain("Missing");
});
Expand All @@ -84,7 +90,7 @@
},
body: triggerBody,
});
expect(res.status).toBe(401);

Check failure on line 93 in src/core/__tests__/trigger-auth.test.ts

View workflow job for this annotation

GitHub Actions / test

error: expect(received).toBe(expected)

Expected: 401 Received: undefined at <anonymous> (/home/runner/work/phantom/phantom/src/core/__tests__/trigger-auth.test.ts:93:22)
});

test("rejects read-only token (insufficient scope)", async () => {
Expand All @@ -96,7 +102,7 @@
},
body: triggerBody,
});
expect(res.status).toBe(403);

Check failure on line 105 in src/core/__tests__/trigger-auth.test.ts

View workflow job for this annotation

GitHub Actions / test

error: expect(received).toBe(expected)

Expected: 403 Received: undefined at <anonymous> (/home/runner/work/phantom/phantom/src/core/__tests__/trigger-auth.test.ts:105:22)
const json = (await res.json()) as { status: string; message: string };
expect(json.message).toContain("operator");
});
Expand All @@ -110,7 +116,7 @@
},
body: triggerBody,
});
expect(res.status).toBe(200);

Check failure on line 119 in src/core/__tests__/trigger-auth.test.ts

View workflow job for this annotation

GitHub Actions / test

error: expect(received).toBe(expected)

Expected: 200 Received: undefined at <anonymous> (/home/runner/work/phantom/phantom/src/core/__tests__/trigger-auth.test.ts:119:22)
const json = (await res.json()) as { status: string };
expect(json.status).toBe("ok");
});
Expand All @@ -124,6 +130,6 @@
},
body: triggerBody,
});
expect(res.status).toBe(200);

Check failure on line 133 in src/core/__tests__/trigger-auth.test.ts

View workflow job for this annotation

GitHub Actions / test

error: expect(received).toBe(expected)

Expected: 200 Received: undefined at <anonymous> (/home/runner/work/phantom/phantom/src/core/__tests__/trigger-auth.test.ts:133:22)
});
});
4 changes: 2 additions & 2 deletions src/db/__tests__/migrate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ describe("runMigrations", () => {
runMigrations(db);

const migrationCount = db.query("SELECT COUNT(*) as count FROM _migrations").get() as { count: number };
expect(migrationCount.count).toBe(12);
expect(migrationCount.count).toBe(13);
});

test("tracks applied migration indices", () => {
Expand All @@ -48,6 +48,6 @@ describe("runMigrations", () => {
.all()
.map((r) => (r as { index_num: number }).index_num);

expect(indices).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]);
expect(indices).toEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
});
});
2 changes: 2 additions & 0 deletions src/db/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,6 @@ export const MIGRATIONS: string[] = [
// Appended, never inserted mid-array: existing deployments have already
// applied migrations 0–10, so the new column must land at index 11.
"ALTER TABLE loops ADD COLUMN trigger_message_ts TEXT",

"ALTER TABLE loops ADD COLUMN checkpoint_interval INTEGER",
];
26 changes: 22 additions & 4 deletions src/evolution/judges/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,19 @@ import {
type VotingStrategy,
} from "./types.ts";

/** Thrown when the API call succeeds but structured output parsing fails. Carries token usage so cost can still be tracked. */
export class JudgeParseError extends Error {
constructor(
message: string,
public readonly inputTokens: number,
public readonly outputTokens: number,
public readonly costUsd: number,
) {
super(message);
this.name = "JudgeParseError";
}
}

let _client: Anthropic | null = null;

function getClient(): Anthropic {
Expand Down Expand Up @@ -58,14 +71,19 @@ export async function callJudge<T>(options: {
});

const parsed = message.parsed_output;
if (!parsed) {
throw new Error(`Judge returned no structured output (stop_reason: ${message.stop_reason})`);
}

const inputTokens = message.usage.input_tokens;
const outputTokens = message.usage.output_tokens;
const costUsd = estimateCost(options.model, inputTokens, outputTokens);

if (!parsed) {
throw new JudgeParseError(
`Judge returned no structured output (stop_reason: ${message.stop_reason})`,
inputTokens,
outputTokens,
costUsd,
);
}

// Extract verdict and confidence from the parsed data if present
const data = parsed as Record<string, unknown>;
const verdict = (data.verdict as "pass" | "fail") ?? "pass";
Expand Down
15 changes: 13 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,9 @@ async function main(): Promise<void> {
runtime.setRoleTemplate(activeRole);
}

let contextBuilder: MemoryContextBuilder | undefined;
if (memory.isReady()) {
const contextBuilder = new MemoryContextBuilder(memory, memoryConfig);
contextBuilder = new MemoryContextBuilder(memory, memoryConfig);
runtime.setMemoryContextBuilder(contextBuilder);
}

Expand Down Expand Up @@ -160,7 +161,17 @@ async function main(): Promise<void> {

let mcpServer: PhantomMcpServer | null = null;
let scheduler: Scheduler | null = null;
const loopRunner = new LoopRunner({ db, runtime });
const postLoopDeps =
evolution || memory.isReady()
? {
evolution: evolution ?? undefined,
memory: memory.isReady() ? memory : undefined,
onEvolvedConfigUpdate: evolution
? (config: ReturnType<EvolutionEngine["getConfig"]>) => runtime.setEvolvedConfig(config)
: undefined,
}
: undefined;
const loopRunner = new LoopRunner({ db, runtime, memoryContextBuilder: contextBuilder, postLoopDeps });
try {
mcpServer = new PhantomMcpServer({
config,
Expand Down
Loading
Loading