Skip to content

Commit 43eaab4

Browse files
authored
fix: improvements to agent (#62)
1 parent 752b961 commit 43eaab4

13 files changed

Lines changed: 544 additions & 204 deletions

File tree

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[submodule ".repos/effect"]
22
path = .repos/effect
3-
url = git@github.com:Effect-TS/effect-smol.git
3+
url = https://github.com/Effect-TS/effect-smol.git

.vscode/settings.json

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
11
{
2-
"typescript.tsdk": "node_modules/typescript/lib"
2+
"typescript.tsdk": "node_modules/typescript/lib",
3+
"editor.defaultFormatter": "oxc.oxc-vscode",
4+
"editor.formatOnSave": false,
5+
"editor.codeActionsOnSave": {
6+
"source.format.oxc": "always", // run formatter first
7+
"source.fixAll.oxc": "always", // run lint fixes after
8+
},
39
}

apps/cli/src/commands/add-github-action.ts

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const generateWorkflow = (packageManager: PackageManager, devCommand: string, de
4242

4343
const setupSteps = buildSetupSteps(packageManager);
4444

45-
return `name: Expect Tests
45+
return `name: Expect Browser Tests
4646
4747
on:
4848
pull_request:
@@ -57,24 +57,33 @@ jobs:
5757
pull-requests: write
5858
env:
5959
ANTHROPIC_API_KEY: \${{ secrets.ANTHROPIC_API_KEY }}
60+
# Expect uses this local app URL as the browser test target in CI.
6061
EXPECT_BASE_URL: "${devUrl}"
6162
steps:
6263
- uses: actions/checkout@v4
6364
${setupSteps}
6465
- name: Install dependencies
6566
run: ${install}
6667
68+
# Expect runs against your dev server by default, not a production build or deployed preview.
69+
# To test a preview URL instead, set EXPECT_BASE_URL to that URL. You can use
70+
# https://docs.github.com/en/actions/reference/workflows-and-actions/workflow-syntax#jobsjob_idneeds
71+
# to pass preview URLs from other jobs.
6772
- name: Start dev server
6873
run: ${devCommand} &
6974
75+
# Wait until the local app is reachable before handing control to the browser agent.
7076
- name: Wait for dev server
7177
run: npx wait-on ${devUrl} --timeout 60000
7278
79+
7380
- name: Run expect
7481
env:
82+
# Expect uses the GitHub token to comment on the pull request.
7583
GH_TOKEN: \${{ secrets.GITHUB_TOKEN }}
7684
run: ${dlx} expect-cli@latest --ci
7785
86+
# Upload browser recordings and traces from .expect so failures are debuggable after the run.
7887
- name: Upload test artifacts
7988
if: always()
8089
uses: actions/upload-artifact@v4
@@ -175,7 +184,12 @@ export const runAddGithubAction = async (options: AddGithubActionOptions = {}) =
175184
logger.success("Created .github/workflows/expect.yml");
176185
logger.break();
177186
logger.log(` Add ${highlighter.info("ANTHROPIC_API_KEY")} to your repository secrets:`);
187+
logger.break();
188+
logger.log(` You can use the ${highlighter.info("gh")} CLI to add repository secrets:`);
189+
logger.log(
190+
` ${highlighter.dim("claude setup-token")} ${highlighter.dim("# use Claude Code to generate a token, then paste it into ANTHROPIC_API_KEY")}`,
191+
);
178192
logger.log(
179-
` ${highlighter.dim("Settings → Secrets and variables → Actions → New repository secret")}`,
193+
` ${highlighter.dim("gh secret set ANTHROPIC_API_KEY")} ${highlighter.dim("# for an Anthropic API key or a token from claude setup-token")}`,
180194
);
181195
};

apps/cli/src/data/execution-atom.ts

Lines changed: 124 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import * as NodeServices from "@effect/platform-node/NodeServices";
1010
import { startReplayProxy } from "../utils/replay-proxy-server";
1111
import { toViewerRunState, pushStepState } from "../utils/push-step-state";
1212
import { extractCloseArtifacts } from "../utils/extract-close-artifacts";
13+
import { loadReplayEvents } from "../utils/load-replay-events";
1314

1415
const LIVE_VIEW_PORT_MIN = 50000;
1516
const LIVE_VIEW_PORT_RANGE = 10000;
@@ -35,6 +36,30 @@ export interface ExecutionResult {
3536

3637
export const screenshotPathsAtom = Atom.make<readonly string[]>([]);
3738

39+
const syncReplayProxy = Effect.fn("syncReplayProxy")(function* (
40+
replayUrl: string | undefined,
41+
liveViewUrl: string,
42+
replaySessionPath: string | undefined,
43+
executed: ExecutedTestPlan,
44+
) {
45+
if (!replayUrl) return;
46+
47+
const proxyBase = replayUrl.split("/replay")[0];
48+
const replayEvents = yield* loadReplayEvents({ liveViewUrl, replaySessionPath });
49+
50+
if (replayEvents && replayEvents.length > 0) {
51+
yield* Effect.tryPromise(() =>
52+
fetch(`${proxyBase}/latest.json`, {
53+
method: "POST",
54+
headers: { "Content-Type": "application/json" },
55+
body: JSON.stringify(replayEvents),
56+
}),
57+
).pipe(Effect.catchCause(() => Effect.void));
58+
}
59+
60+
yield* pushStepState(proxyBase, toViewerRunState(executed));
61+
});
62+
3863
const execute = Effect.fnUntraced(
3964
function* (input: ExecuteInput, _ctx: Atom.FnContext) {
4065
const reporter = yield* Reporter;
@@ -104,22 +129,7 @@ const execute = Effect.fnUntraced(
104129

105130
const artifacts = extractCloseArtifacts(finalExecuted.events);
106131

107-
if (replayUrl) {
108-
const proxyBase = replayUrl.split("/replay")[0];
109-
yield* Effect.tryPromise(() =>
110-
fetch(`${liveViewUrl}/latest.json`).then(async (response) => {
111-
if (!response.ok) return;
112-
const allEvents = await response.json();
113-
await fetch(`${proxyBase}/latest.json`, {
114-
method: "POST",
115-
headers: { "Content-Type": "application/json" },
116-
body: JSON.stringify(allEvents),
117-
});
118-
}),
119-
).pipe(Effect.catchCause(() => Effect.void));
120-
121-
yield* pushStepState(proxyBase, toViewerRunState(finalExecuted));
122-
}
132+
yield* syncReplayProxy(replayUrl, liveViewUrl, artifacts.replaySessionPath, finalExecuted);
123133

124134
const report = yield* reporter.report(finalExecuted);
125135

@@ -170,123 +180,109 @@ export const executeFn = cliAtomRuntime.fn<ExecuteInput>()((input, ctx) =>
170180
),
171181
);
172182

173-
export const executeAtomFn = cliAtomRuntime.fn(
174-
Effect.fnUntraced(
175-
function* (input: ExecuteInput, _ctx: Atom.FnContext) {
176-
const reporter = yield* Reporter;
177-
const executor = yield* Executor;
178-
const analytics = yield* Analytics;
179-
const git = yield* Git;
180-
181-
const runStartedAt = Date.now();
182-
183-
const liveViewPort = pickRandomPort();
184-
const liveViewUrl = `http://localhost:${liveViewPort}`;
183+
const executeAtom = Effect.fnUntraced(
184+
function* (input: ExecuteInput, _ctx: Atom.FnContext) {
185+
const reporter = yield* Reporter;
186+
const executor = yield* Executor;
187+
const analytics = yield* Analytics;
188+
const git = yield* Git;
185189

186-
let replayUrl: string | undefined;
190+
const runStartedAt = Date.now();
187191

188-
if (input.replayHost) {
189-
const proxyHandle = yield* startReplayProxy({
190-
replayHost: input.replayHost,
191-
liveViewUrl,
192-
});
193-
replayUrl = `${proxyHandle.url}/replay`;
192+
const liveViewPort = pickRandomPort();
193+
const liveViewUrl = `http://localhost:${liveViewPort}`;
194194

195-
yield* Effect.logInfo("Replay viewer available", { replayUrl });
196-
yield* Effect.sync(() => input.onReplayUrl?.(`${replayUrl}?live=true`));
197-
}
195+
let replayUrl: string | undefined;
198196

199-
const executeOptions: ExecuteOptions = {
200-
...input.options,
197+
if (input.replayHost) {
198+
const proxyHandle = yield* startReplayProxy({
199+
replayHost: input.replayHost,
201200
liveViewUrl,
202-
onConfigOptions: input.onConfigOptions,
203-
};
204-
205-
yield* analytics.capture("run:started", { plan_id: "direct" });
206-
207-
const finalExecuted = yield* executor.execute(executeOptions).pipe(
208-
Stream.tap((executed) =>
209-
Effect.gen(function* () {
210-
input.onUpdate(executed);
211-
yield* pushStepState(liveViewUrl, toViewerRunState(executed));
212-
}),
213-
),
214-
Stream.runLast,
215-
Effect.map((option) =>
216-
(option._tag === "Some"
217-
? option.value
218-
: new ExecutedTestPlan({
219-
...input.options,
220-
id: "" as never,
221-
changesFor: input.options.changesFor,
222-
currentBranch: "",
223-
diffPreview: "",
224-
fileStats: [],
225-
instruction: input.options.instruction,
226-
baseUrl: undefined as never,
227-
isHeadless: input.options.isHeadless,
228-
cookieBrowserKeys: input.options.cookieBrowserKeys,
229-
testCoverage: Option.none(),
230-
title: input.options.instruction,
231-
rationale: "Direct execution",
232-
steps: [],
233-
events: [],
234-
})
235-
)
236-
.finalizeTextBlock()
237-
.synthesizeRunFinished(),
238-
),
239-
);
240-
241-
const artifacts = extractCloseArtifacts(finalExecuted.events);
242-
243-
if (replayUrl) {
244-
const proxyBase = replayUrl.split("/replay")[0];
245-
yield* Effect.tryPromise(() =>
246-
fetch(`${liveViewUrl}/latest.json`).then(async (response) => {
247-
if (!response.ok) return;
248-
const allEvents = await response.json();
249-
await fetch(`${proxyBase}/latest.json`, {
250-
method: "POST",
251-
headers: { "Content-Type": "application/json" },
252-
body: JSON.stringify(allEvents),
253-
});
254-
}),
255-
).pipe(Effect.catchCause(() => Effect.void));
256-
257-
yield* pushStepState(proxyBase, toViewerRunState(finalExecuted));
258-
}
259-
260-
const report = yield* reporter.report(finalExecuted);
261-
262-
const passedCount = report.steps.filter(
263-
(step) => report.stepStatuses.get(step.id)?.status === "passed",
264-
).length;
265-
const failedCount = report.steps.filter(
266-
(step) => report.stepStatuses.get(step.id)?.status === "failed",
267-
).length;
268-
269-
yield* analytics.capture("run:completed", {
270-
plan_id: finalExecuted.id ?? "direct",
271-
passed: passedCount,
272-
failed: failedCount,
273-
step_count: finalExecuted.steps.length,
274-
file_count: 0,
275-
duration_ms: Date.now() - runStartedAt,
276201
});
202+
replayUrl = `${proxyHandle.url}/replay`;
277203

278-
if (report.status === "passed") {
279-
yield* git.saveTestedFingerprint();
280-
}
281-
282-
return {
283-
executedPlan: finalExecuted,
284-
report,
285-
replayUrl: replayUrl ?? artifacts.localReplayUrl,
286-
localReplayUrl: artifacts.localReplayUrl,
287-
videoUrl: artifacts.videoUrl,
288-
} satisfies ExecutionResult;
289-
},
290-
Effect.annotateLogs({ fn: "executeAtomFn" }),
291-
),
204+
yield* Effect.logInfo("Replay viewer available", { replayUrl });
205+
yield* Effect.sync(() => input.onReplayUrl?.(`${replayUrl}?live=true`));
206+
}
207+
208+
const executeOptions: ExecuteOptions = {
209+
...input.options,
210+
liveViewUrl,
211+
onConfigOptions: input.onConfigOptions,
212+
};
213+
214+
yield* analytics.capture("run:started", { plan_id: "direct" });
215+
216+
const finalExecuted = yield* executor.execute(executeOptions).pipe(
217+
Stream.tap((executed) =>
218+
Effect.gen(function* () {
219+
input.onUpdate(executed);
220+
yield* pushStepState(liveViewUrl, toViewerRunState(executed));
221+
}),
222+
),
223+
Stream.runLast,
224+
Effect.map((option) =>
225+
(option._tag === "Some"
226+
? option.value
227+
: new ExecutedTestPlan({
228+
...input.options,
229+
id: "" as never,
230+
changesFor: input.options.changesFor,
231+
currentBranch: "",
232+
diffPreview: "",
233+
fileStats: [],
234+
instruction: input.options.instruction,
235+
baseUrl: undefined as never,
236+
isHeadless: input.options.isHeadless,
237+
cookieBrowserKeys: input.options.cookieBrowserKeys,
238+
testCoverage: Option.none(),
239+
title: input.options.instruction,
240+
rationale: "Direct execution",
241+
steps: [],
242+
events: [],
243+
})
244+
)
245+
.finalizeTextBlock()
246+
.synthesizeRunFinished(),
247+
),
248+
);
249+
250+
const artifacts = extractCloseArtifacts(finalExecuted.events);
251+
252+
yield* syncReplayProxy(replayUrl, liveViewUrl, artifacts.replaySessionPath, finalExecuted);
253+
254+
const report = yield* reporter.report(finalExecuted);
255+
256+
const passedCount = report.steps.filter(
257+
(step) => report.stepStatuses.get(step.id)?.status === "passed",
258+
).length;
259+
const failedCount = report.steps.filter(
260+
(step) => report.stepStatuses.get(step.id)?.status === "failed",
261+
).length;
262+
263+
yield* analytics.capture("run:completed", {
264+
plan_id: finalExecuted.id ?? "direct",
265+
passed: passedCount,
266+
failed: failedCount,
267+
step_count: finalExecuted.steps.length,
268+
file_count: 0,
269+
duration_ms: Date.now() - runStartedAt,
270+
});
271+
272+
if (report.status === "passed") {
273+
yield* git.saveTestedFingerprint();
274+
}
275+
276+
return {
277+
executedPlan: finalExecuted,
278+
report,
279+
replayUrl: replayUrl ?? artifacts.localReplayUrl,
280+
localReplayUrl: artifacts.localReplayUrl,
281+
videoUrl: artifacts.videoUrl,
282+
} satisfies ExecutionResult;
283+
},
284+
Effect.annotateLogs({ fn: "executeAtomFn" }),
285+
Effect.provide(NodeServices.layer),
292286
);
287+
288+
export const executeAtomFn = cliAtomRuntime.fn(executeAtom);

0 commit comments

Comments
 (0)