Skip to content

Commit 27ac735

Browse files
authored
Merge pull request #131 from SentienceAPI/p0
P0: add baseline safety net testing + assert failure artifacts
2 parents 824bc70 + 59b5dd6 commit 27ac735

File tree

7 files changed

+566
-3
lines changed

7 files changed

+566
-3
lines changed

.github/workflows/test.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,13 @@ jobs:
6363
echo "Warning: src/extension not found, tests may fail"
6464
fi
6565
66-
- name: Run tests
66+
- name: Phase 0 regression safety net
67+
run: |
68+
npm test -- agent-runtime-assertions.test.ts
69+
env:
70+
CI: true
71+
72+
- name: Run full test suite
6773
run: |
6874
npm test
6975
env:

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,31 @@ if (runtime.assertDone(exists("text~'Example'"), 'task_complete')) {
8383
console.log(`Task done: ${runtime.isTaskDone}`);
8484
```
8585

86+
### Failure Artifact Buffer (Phase 1)
87+
88+
Capture a short ring buffer of screenshots and persist them when a required assertion fails.
89+
90+
```typescript
91+
runtime.enableFailureArtifacts({ bufferSeconds: 15, captureOnAction: true, fps: 0 });
92+
93+
// After each action, record it (best-effort).
94+
await runtime.recordAction('CLICK');
95+
```
96+
97+
### Redaction callback (Phase 3)
98+
99+
Provide a user-defined callback to redact snapshots and decide whether to persist frames. The SDK does not implement image/video redaction.
100+
101+
```typescript
102+
import { RedactionContext, RedactionResult } from 'sentienceapi';
103+
104+
const redact = (_ctx: RedactionContext): RedactionResult => {
105+
return { dropFrames: true };
106+
};
107+
108+
runtime.enableFailureArtifacts({ onBeforePersist: redact });
109+
```
110+
86111
**See examples:** [`examples/asserts/`](examples/asserts/)
87112

88113
## 🚀 Quick Start: Choose Your Abstraction Level

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/agent-runtime.ts

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ import { Snapshot } from './types';
4444
import { AssertContext, Predicate } from './verification';
4545
import { Tracer } from './tracing/tracer';
4646
import { LLMProvider } from './llm-provider';
47+
import { FailureArtifactBuffer, FailureArtifactsOptions } from './failure-artifacts';
4748

4849
// Define a minimal browser interface to avoid circular dependencies
4950
interface BrowserLike {
@@ -185,6 +186,11 @@ export class AssertionHandle {
185186
},
186187
true
187188
);
189+
if (this.required && !passed) {
190+
(this.runtime as any).persistFailureArtifacts(
191+
`assert_eventually_failed:${this.label}`
192+
);
193+
}
188194
return passed;
189195
} catch {
190196
// fall through to snapshot_exhausted
@@ -216,6 +222,9 @@ export class AssertionHandle {
216222
},
217223
true
218224
);
225+
if (this.required) {
226+
(this.runtime as any).persistFailureArtifacts(`assert_eventually_failed:${this.label}`);
227+
}
219228
return false;
220229
}
221230

@@ -233,6 +242,11 @@ export class AssertionHandle {
233242
},
234243
true
235244
);
245+
if (this.required) {
246+
(this.runtime as any).persistFailureArtifacts(
247+
`assert_eventually_timeout:${this.label}`
248+
);
249+
}
236250
return false;
237251
}
238252

@@ -272,6 +286,9 @@ export class AssertionHandle {
272286
{ eventually: true, attempt, final: true, timeout: true },
273287
true
274288
);
289+
if (this.required) {
290+
(this.runtime as any).persistFailureArtifacts(`assert_eventually_timeout:${this.label}`);
291+
}
275292
return false;
276293
}
277294

@@ -306,6 +323,10 @@ export class AgentRuntime {
306323
/** Most recent snapshot (for assertion context) */
307324
lastSnapshot: Snapshot | null = null;
308325

326+
/** Failure artifact buffer (Phase 1) */
327+
private artifactBuffer: FailureArtifactBuffer | null = null;
328+
private artifactTimer: NodeJS.Timeout | null = null;
329+
309330
/** Assertions accumulated during current step */
310331
private assertionsThisStep: AssertionRecord[] = [];
311332
/** Task completion tracking */
@@ -432,6 +453,105 @@ export class AgentRuntime {
432453
return this.lastSnapshot;
433454
}
434455

456+
/**
457+
* Enable failure artifact buffer (Phase 1).
458+
*/
459+
enableFailureArtifacts(options: FailureArtifactsOptions = {}): void {
460+
this.artifactBuffer = new FailureArtifactBuffer(this.tracer.getRunId(), options);
461+
const fps = this.artifactBuffer.getOptions().fps;
462+
if (fps && fps > 0) {
463+
const intervalMs = Math.max(1, Math.floor(1000 / fps));
464+
this.artifactTimer = setInterval(() => {
465+
this.captureArtifactFrame().catch(() => {
466+
// best-effort
467+
});
468+
}, intervalMs);
469+
}
470+
}
471+
472+
/**
473+
* Disable failure artifact buffer and stop background capture.
474+
*/
475+
disableFailureArtifacts(): void {
476+
if (this.artifactTimer) {
477+
clearInterval(this.artifactTimer);
478+
this.artifactTimer = null;
479+
}
480+
}
481+
482+
/**
483+
* Record an action in the artifact timeline and capture a frame if enabled.
484+
*/
485+
async recordAction(action: string, url?: string): Promise<void> {
486+
if (!this.artifactBuffer) {
487+
return;
488+
}
489+
this.artifactBuffer.recordStep(action, this.stepId, this.stepIndex, url);
490+
if (this.artifactBuffer.getOptions().captureOnAction) {
491+
await this.captureArtifactFrame();
492+
}
493+
}
494+
495+
private async captureArtifactFrame(): Promise<void> {
496+
if (!this.artifactBuffer) {
497+
return;
498+
}
499+
try {
500+
const image = await this.page.screenshot({ type: 'jpeg', quality: 80 });
501+
await this.artifactBuffer.addFrame(image, 'jpeg');
502+
} catch {
503+
// best-effort
504+
}
505+
}
506+
507+
/**
508+
* Finalize artifact buffer at end of run.
509+
*/
510+
async finalizeRun(success: boolean): Promise<void> {
511+
if (!this.artifactBuffer) {
512+
return;
513+
}
514+
if (success) {
515+
if (this.artifactBuffer.getOptions().persistMode === 'always') {
516+
await this.artifactBuffer.persist(
517+
'success',
518+
'success',
519+
this.lastSnapshot ?? undefined,
520+
this.lastSnapshot?.diagnostics,
521+
this.artifactMetadata()
522+
);
523+
}
524+
await this.artifactBuffer.cleanup();
525+
} else {
526+
await this.persistFailureArtifacts('finalize_failure');
527+
}
528+
}
529+
530+
private async persistFailureArtifacts(reason: string): Promise<void> {
531+
if (!this.artifactBuffer) {
532+
return;
533+
}
534+
await this.artifactBuffer.persist(
535+
reason,
536+
'failure',
537+
this.lastSnapshot ?? undefined,
538+
this.lastSnapshot?.diagnostics,
539+
this.artifactMetadata()
540+
);
541+
await this.artifactBuffer.cleanup();
542+
if (this.artifactBuffer.getOptions().persistMode === 'onFail') {
543+
this.disableFailureArtifacts();
544+
}
545+
}
546+
547+
private artifactMetadata(): Record<string, any> {
548+
const url = this.lastSnapshot?.url ?? this.page?.url?.();
549+
return {
550+
backend: 'playwright',
551+
url,
552+
};
553+
}
554+
435555
/**
436556
* Begin a new step in the verification loop.
437557
*
@@ -476,6 +596,11 @@ export class AgentRuntime {
476596
assert(predicate: Predicate, label: string, required: boolean = false): boolean {
477597
const outcome = predicate(this.ctx());
478598
this._recordOutcome(outcome, label, required, null, true);
599+
if (required && !outcome.passed) {
600+
this.persistFailureArtifacts(`assert_failed:${label}`).catch(() => {
601+
// best-effort
602+
});
603+
}
479604
return outcome.passed;
480605
}
481606

0 commit comments

Comments
 (0)