Skip to content

Commit 5042d40

Browse files
author
SentienceDEV
committed
verification payment step_end in agent runtime
1 parent e672b07 commit 5042d40

File tree

3 files changed

+199
-16
lines changed

3 files changed

+199
-16
lines changed

src/agent-runtime.ts

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import { Page } from 'playwright';
4444
import { Snapshot } from './types';
4545
import { AssertContext, Predicate } from './verification';
4646
import { Tracer } from './tracing/tracer';
47+
import { TraceEventBuilder } from './utils/trace-event-builder';
4748
import { LLMProvider } from './llm-provider';
4849
import { FailureArtifactBuffer, FailureArtifactsOptions } from './failure-artifacts';
4950
import {
@@ -338,6 +339,8 @@ export class AgentRuntime {
338339
stepIndex: number = 0;
339340
/** Most recent snapshot (for assertion context) */
340341
lastSnapshot: Snapshot | null = null;
342+
private stepPreSnapshot: Snapshot | null = null;
343+
private stepPreUrl: string | null = null;
341344
/** Best-effort download records (Playwright downloads) */
342345
private downloads: Array<Record<string, any>> = [];
343346

@@ -347,6 +350,8 @@ export class AgentRuntime {
347350

348351
/** Assertions accumulated during current step */
349352
private assertionsThisStep: AssertionRecord[] = [];
353+
private stepGoal: string | null = null;
354+
private lastAction: string | null = null;
350355
/** Task completion tracking */
351356
private taskDone: boolean = false;
352357
private taskDoneLabel: string | null = null;
@@ -532,6 +537,10 @@ export class AgentRuntime {
532537
async snapshot(options?: Record<string, any>): Promise<Snapshot> {
533538
const { _skipCaptchaHandling, ...snapshotOptions } = options || {};
534539
this.lastSnapshot = await this.browser.snapshot(this.page, snapshotOptions);
540+
if (this.lastSnapshot && !this.stepPreSnapshot) {
541+
this.stepPreSnapshot = this.lastSnapshot;
542+
this.stepPreUrl = this.lastSnapshot.url;
543+
}
535544
if (!_skipCaptchaHandling) {
536545
await this.handleCaptchaIfNeeded(this.lastSnapshot, 'gateway');
537546
}
@@ -713,6 +722,7 @@ export class AgentRuntime {
713722
* Record an action in the artifact timeline and capture a frame if enabled.
714723
*/
715724
async recordAction(action: string, url?: string): Promise<void> {
725+
this.lastAction = action;
716726
if (!this.artifactBuffer) {
717727
return;
718728
}
@@ -722,6 +732,84 @@ export class AgentRuntime {
722732
}
723733
}
724734

735+
/**
736+
* Emit a step_end event using TraceEventBuilder.
737+
*/
738+
emitStepEnd(opts: {
739+
action?: string;
740+
success?: boolean;
741+
error?: string;
742+
outcome?: string;
743+
durationMs?: number;
744+
attempt?: number;
745+
verifyPassed?: boolean;
746+
verifySignals?: Record<string, any>;
747+
postUrl?: string;
748+
postSnapshotDigest?: string;
749+
}): any {
750+
const goal = this.stepGoal || '';
751+
const preSnap = this.stepPreSnapshot || this.lastSnapshot;
752+
const preUrl = this.stepPreUrl || preSnap?.url || this.page?.url?.() || '';
753+
const postUrl = opts.postUrl || this.page?.url?.() || this.lastSnapshot?.url || preUrl;
754+
755+
const preDigest = preSnap ? TraceEventBuilder.buildSnapshotDigest(preSnap) : undefined;
756+
const postDigest =
757+
opts.postSnapshotDigest ||
758+
(this.lastSnapshot ? TraceEventBuilder.buildSnapshotDigest(this.lastSnapshot) : undefined);
759+
760+
const urlChanged = Boolean(preUrl && postUrl && String(preUrl) !== String(postUrl));
761+
const assertionsData = this.getAssertionsForStepEnd();
762+
763+
const signals = { ...(opts.verifySignals || {}) } as Record<string, any>;
764+
if (signals.url_changed === undefined) {
765+
signals.url_changed = urlChanged;
766+
}
767+
if (opts.error && signals.error === undefined) {
768+
signals.error = opts.error;
769+
}
770+
if (assertionsData.task_done !== undefined) {
771+
signals.task_done = assertionsData.task_done;
772+
}
773+
if (assertionsData.task_done_label) {
774+
signals.task_done_label = assertionsData.task_done_label;
775+
}
776+
777+
const verifyPassed =
778+
opts.verifyPassed !== undefined ? opts.verifyPassed : this.requiredAssertionsPassed();
779+
780+
const execData = {
781+
success: opts.success !== undefined ? opts.success : verifyPassed,
782+
action: opts.action || this.lastAction || 'unknown',
783+
outcome: opts.outcome || '',
784+
duration_ms: opts.durationMs,
785+
error: opts.error,
786+
};
787+
788+
const verifyData = {
789+
passed: Boolean(verifyPassed),
790+
signals,
791+
};
792+
793+
const stepEndData = TraceEventBuilder.buildRuntimeStepEndData({
794+
stepId: this.stepId || '',
795+
stepIndex: this.stepIndex,
796+
goal,
797+
attempt: opts.attempt ?? 0,
798+
preUrl,
799+
postUrl,
800+
preSnapshotDigest: preDigest,
801+
postSnapshotDigest: postDigest,
802+
execData,
803+
verifyData,
804+
assertions: assertionsData.assertions,
805+
taskDone: assertionsData.task_done,
806+
taskDoneLabel: assertionsData.task_done_label,
807+
});
808+
809+
this.tracer.emit('step_end', stepEndData, this.stepId || undefined);
810+
return stepEndData;
811+
}
812+
725813
private async captureArtifactFrame(): Promise<void> {
726814
if (!this.artifactBuffer) {
727815
return;
@@ -797,6 +885,10 @@ export class AgentRuntime {
797885
beginStep(goal: string, stepIndex?: number): string {
798886
// Clear previous step state
799887
this.assertionsThisStep = [];
888+
this.stepPreSnapshot = null;
889+
this.stepPreUrl = null;
890+
this.stepGoal = goal;
891+
this.lastAction = null;
800892

801893
// Update step index
802894
if (stepIndex !== undefined) {

src/runtime-agent.ts

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -78,26 +78,49 @@ export class RuntimeAgent {
7878
const { taskGoal, step } = opts;
7979
this.runtime.beginStep(step.goal);
8080

81-
const snap = await this.snapshotWithRamp(step);
81+
let ok = false;
82+
let emitted = false;
83+
try {
84+
const snap = await this.snapshotWithRamp(step);
8285

83-
if (await this.shouldShortCircuitToVision(step, snap)) {
84-
return await this.visionExecutorAttempt({ taskGoal, step, snap });
85-
}
86+
if (await this.shouldShortCircuitToVision(step, snap)) {
87+
ok = await this.visionExecutorAttempt({ taskGoal, step, snap });
88+
return ok;
89+
}
8690

87-
// 1) Structured executor attempt.
88-
const action = await this.proposeStructuredAction({ taskGoal, step, snap });
89-
await this.executeAction(action, snap);
90-
const ok = await this.applyVerifications(step);
91-
if (ok) return true;
91+
// 1) Structured executor attempt.
92+
const action = await this.proposeStructuredAction({ taskGoal, step, snap });
93+
await this.executeAction(action, snap);
94+
ok = await this.applyVerifications(step);
95+
if (ok) return true;
96+
97+
// 2) Optional vision executor fallback (bounded).
98+
const enabled = step.visionExecutorEnabled ?? true;
99+
const maxAttempts = step.maxVisionExecutorAttempts ?? 1;
100+
if (enabled && maxAttempts > 0) {
101+
ok = await this.visionExecutorAttempt({ taskGoal, step, snap });
102+
return ok;
103+
}
92104

93-
// 2) Optional vision executor fallback (bounded).
94-
const enabled = step.visionExecutorEnabled ?? true;
95-
const maxAttempts = step.maxVisionExecutorAttempts ?? 1;
96-
if (enabled && maxAttempts > 0) {
97-
return await this.visionExecutorAttempt({ taskGoal, step, snap });
105+
return false;
106+
} catch (error: any) {
107+
this.runtime.emitStepEnd({
108+
success: false,
109+
verifyPassed: false,
110+
error: String(error?.message ?? error),
111+
outcome: 'exception',
112+
});
113+
emitted = true;
114+
throw error;
115+
} finally {
116+
if (!emitted) {
117+
this.runtime.emitStepEnd({
118+
success: ok,
119+
verifyPassed: ok,
120+
outcome: ok ? 'ok' : 'verification_failed',
121+
});
122+
}
98123
}
99-
100-
return false;
101124
}
102125

103126
private async snapshotWithRamp(step: RuntimeStep): Promise<Snapshot> {

src/utils/trace-event-builder.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,74 @@ export class TraceEventBuilder {
248248
};
249249
}
250250

251+
/**
252+
* Build step_end event data for AgentRuntime (verification loop).
253+
*/
254+
static buildRuntimeStepEndData(params: {
255+
stepId: string;
256+
stepIndex: number;
257+
goal: string;
258+
attempt: number;
259+
preUrl: string;
260+
postUrl: string;
261+
preSnapshotDigest?: string;
262+
postSnapshotDigest?: string;
263+
execData: TraceEventData['exec'];
264+
verifyData: TraceEventData['verify'];
265+
assertions?: TraceEventData['verify']['signals']['assertions'];
266+
taskDone?: boolean;
267+
taskDoneLabel?: string;
268+
}): TraceEventData {
269+
const {
270+
stepId,
271+
stepIndex,
272+
goal,
273+
attempt,
274+
preUrl,
275+
postUrl,
276+
preSnapshotDigest,
277+
postSnapshotDigest,
278+
execData,
279+
verifyData,
280+
assertions,
281+
taskDone,
282+
taskDoneLabel,
283+
} = params;
284+
285+
const signals = { ...(verifyData.signals || {}) } as Record<string, any>;
286+
if (assertions && assertions.length > 0) {
287+
signals.assertions = assertions;
288+
}
289+
if (typeof taskDone === 'boolean') {
290+
signals.task_done = taskDone;
291+
}
292+
if (taskDoneLabel) {
293+
signals.task_done_label = taskDoneLabel;
294+
}
295+
296+
return {
297+
v: 1,
298+
step_id: stepId,
299+
step_index: stepIndex,
300+
goal,
301+
attempt,
302+
pre: {
303+
url: preUrl,
304+
snapshot_digest: preSnapshotDigest,
305+
},
306+
llm: {},
307+
exec: execData,
308+
post: {
309+
url: postUrl,
310+
snapshot_digest: postSnapshotDigest,
311+
},
312+
verify: {
313+
passed: verifyData.passed,
314+
signals,
315+
},
316+
};
317+
}
318+
251319
/**
252320
* Build snapshot event data
253321
*

0 commit comments

Comments
 (0)