diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..7608f4c
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,61 @@
+name: CI
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+
+jobs:
+ unit:
+ name: Unit tests
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-node@v4
+ with:
+ node-version: 24
+ # ffmpeg is needed for boundary-frame extraction and shader pre-render.
+ - run: sudo apt-get update && sudo apt-get install -y ffmpeg
+ - run: npm install
+ - run: npx playwright install --with-deps chromium
+ - run: npm run build
+ - run: npm test
+
+ cross-browser:
+ name: ci-smoke (${{ matrix.browser }})
+ needs: unit
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ browser: [chromium, firefox, webkit]
+ env:
+ ARGO_BROWSER: ${{ matrix.browser }}
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-node@v4
+ with:
+ node-version: 24
+ - run: sudo apt-get update && sudo apt-get install -y ffmpeg
+ - run: npm install
+ # chromium is also installed even when recording on webkit/firefox —
+ # shader-render uses headless chromium for WebGL regardless of the
+ # recording browser.
+ - run: npx playwright install --with-deps chromium "$ARGO_BROWSER"
+ - run: npm run build
+
+ - name: Run pipeline
+ run: npx tsx bin/argo.js pipeline ci-smoke --config demos/ci-smoke.config.mjs --browser "$ARGO_BROWSER"
+
+ - name: Verify output
+ run: |
+ pip install --quiet pillow
+ python3 scripts/verify-ci-smoke.py videos/ci-smoke.mp4
+
+ - name: Upload mp4 artifact
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: ci-smoke-${{ matrix.browser }}
+ path: videos/ci-smoke.mp4
+ if-no-files-found: warn
diff --git a/demos/blocks-showcase.demo.ts b/demos/blocks-showcase.demo.ts
index 28b0f6a..7150c38 100644
--- a/demos/blocks-showcase.demo.ts
+++ b/demos/blocks-showcase.demo.ts
@@ -15,6 +15,8 @@ test('blocks-showcase', async ({ page, narration }) => {
`);
await page.waitForTimeout(500);
+ await narration.startRecording(page);
+
for (const scene of ['intro', 'x-post', 'macos', 'ytlt', 'chart', 'spotify', 'closing']) {
narration.mark(scene);
await showOverlay(page, scene, narration.durationFor(scene, { maxMs: 6000 }));
diff --git a/demos/ci-smoke.config.mjs b/demos/ci-smoke.config.mjs
new file mode 100644
index 0000000..160804d
--- /dev/null
+++ b/demos/ci-smoke.config.mjs
@@ -0,0 +1,26 @@
+import { defineConfig } from '@argo-video/cli';
+
+// CI smoke config — exercises the cross-browser robustness fixes:
+// * captureMode: 'jpeg-stitch' auto-downgrades to 'webm' on non-chromium
+// * deviceScaleFactor: 2 auto-clamps to 1 on non-chromium
+// * shader transition exercises setsar=1 normalization on webkit
+// Silent demo (no text in scenes manifest) — TTS is skipped, video-only export.
+export default defineConfig({
+ // Demo uses page.setContent — baseURL is unused but required by config schema.
+ baseURL: 'about:blank',
+ demosDir: 'demos',
+ outputDir: 'videos',
+ video: {
+ width: 1920,
+ height: 1080,
+ fps: 30,
+ deviceScaleFactor: 2,
+ captureMode: 'jpeg-stitch',
+ },
+ export: {
+ preset: 'ultrafast',
+ crf: 28,
+ encoder: 'cpu',
+ transition: { type: 'shader', shader: 'crosswarp', durationMs: 600 },
+ },
+});
diff --git a/demos/ci-smoke.demo.ts b/demos/ci-smoke.demo.ts
new file mode 100644
index 0000000..c43251e
--- /dev/null
+++ b/demos/ci-smoke.demo.ts
@@ -0,0 +1,34 @@
+import { test } from '@argo-video/cli';
+
+test('ci-smoke', async ({ page, narration }) => {
+ test.setTimeout(60_000);
+
+ await page.setContent(`
+
+
+ Scene 1
+ Scene 2
+ Scene 3
+
+ `);
+ await page.waitForTimeout(300);
+
+ await narration.startRecording(page);
+
+ narration.mark('one');
+ await page.waitForTimeout(2500);
+
+ await page.evaluate(() => {
+ (document.getElementById('s1') as HTMLElement).style.display = 'none';
+ (document.getElementById('s2') as HTMLElement).style.display = 'grid';
+ });
+ narration.mark('two');
+ await page.waitForTimeout(2500);
+
+ await page.evaluate(() => {
+ (document.getElementById('s2') as HTMLElement).style.display = 'none';
+ (document.getElementById('s3') as HTMLElement).style.display = 'grid';
+ });
+ narration.mark('three');
+ await page.waitForTimeout(2500);
+});
diff --git a/demos/ci-smoke.scenes.json b/demos/ci-smoke.scenes.json
new file mode 100644
index 0000000..92dcd72
--- /dev/null
+++ b/demos/ci-smoke.scenes.json
@@ -0,0 +1,5 @@
+[
+ { "scene": "one" },
+ { "scene": "two" },
+ { "scene": "three" }
+]
diff --git a/scripts/verify-ci-smoke.py b/scripts/verify-ci-smoke.py
new file mode 100755
index 0000000..26f4033
--- /dev/null
+++ b/scripts/verify-ci-smoke.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Verify ci-smoke output mp4. Asserts:
+ - output exists and is valid mp4
+ - dimensions are 1920x1080 (catches dsf-clamp regressions)
+ - duration is in expected window
+ - midpoint frame's bottom-right quadrant is not near-gray
+ (catches frame-in-frame regressions where a non-chromium browser
+ rendered the page at 1x into a 2x screencast canvas)
+
+Usage: python3 scripts/verify-ci-smoke.py videos/ci-smoke.mp4
+"""
+
+import subprocess
+import sys
+from pathlib import Path
+
+EXPECTED_W, EXPECTED_H = 1920, 1080
+MIN_DURATION_S, MAX_DURATION_S = 5, 20
+
+
+def ffprobe(*args: str) -> str:
+ return subprocess.check_output(['ffprobe', '-v', 'error', *args]).decode().strip()
+
+
+def main() -> None:
+ if len(sys.argv) != 2:
+ sys.exit('Usage: verify-ci-smoke.py ')
+ mp4 = Path(sys.argv[1])
+ if not mp4.exists():
+ sys.exit(f'missing output: {mp4}')
+
+ dims_csv = ffprobe('-select_streams', 'v:0', '-show_entries', 'stream=width,height', '-of', 'csv=p=0', str(mp4))
+ w, h = (int(x) for x in dims_csv.split(','))
+ if (w, h) != (EXPECTED_W, EXPECTED_H):
+ sys.exit(f'dimensions {w}x{h} != {EXPECTED_W}x{EXPECTED_H}')
+
+ duration = float(ffprobe('-show_entries', 'format=duration', '-of', 'csv=p=0', str(mp4)))
+ if not MIN_DURATION_S <= duration <= MAX_DURATION_S:
+ sys.exit(f'duration {duration:.1f}s outside [{MIN_DURATION_S}, {MAX_DURATION_S}]')
+
+ sample_path = Path('/tmp/ci-smoke-sample.png')
+ subprocess.check_call(
+ ['ffmpeg', '-y', '-ss', f'{duration / 2:.2f}', '-i', str(mp4), '-frames:v', '1', str(sample_path)],
+ stderr=subprocess.DEVNULL,
+ )
+
+ from PIL import Image
+ img = Image.open(sample_path).convert('RGB')
+ samples = {
+ 'top-left': img.getpixel((w // 4, h // 4)),
+ 'top-right': img.getpixel((3 * w // 4, h // 4)),
+ 'bottom-left': img.getpixel((w // 4, 3 * h // 4)),
+ 'bottom-right': img.getpixel((3 * w // 4, 3 * h // 4)),
+ 'center': img.getpixel((w // 2, h // 2)),
+ }
+ print('pixel samples:')
+ for name, rgb in samples.items():
+ print(f' {name:13s}: {rgb}')
+
+ # Bottom-right should be inside the rendered scene background, not gray padding.
+ # The three scenes use saturated dark colors (#1e3a8a, #7c2d12, #14532d) — the
+ # max channel deviation from grey is at least ~70. Padding gray pixels stay
+ # within ~10 of (128,128,128).
+ br = samples['bottom-right']
+ gray_distance = sum(abs(c - 128) for c in br)
+ if gray_distance < 30:
+ sys.exit(f'bottom-right {br} too close to gray (distance={gray_distance}) — frame-in-frame regression?')
+
+ print(f'OK: {w}x{h}, {duration:.1f}s, br_gray_distance={gray_distance}')
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/cdp-screencast.ts b/src/cdp-screencast.ts
new file mode 100644
index 0000000..52043a1
--- /dev/null
+++ b/src/cdp-screencast.ts
@@ -0,0 +1,244 @@
+import { spawn } from 'node:child_process';
+import { mkdirSync, rmSync, writeFileSync } from 'node:fs';
+import { dirname, join } from 'node:path';
+
+/**
+ * Structural subset of `BrowserContext.newCDPSession()` and the resulting
+ * `CDPSession` we depend on. Avoids pulling `@playwright/test` types into
+ * this module's runtime dependencies.
+ */
+interface CdpPage {
+ context(): { newCDPSession(page: CdpPage): Promise };
+}
+
+interface CdpSession {
+ on(event: 'Page.screencastFrame', listener: (payload: ScreencastFramePayload) => void): unknown;
+ off(event: 'Page.screencastFrame', listener: (payload: ScreencastFramePayload) => void): unknown;
+ send(method: 'Page.startScreencast', params: StartScreencastParams): Promise;
+ send(method: 'Page.stopScreencast'): Promise;
+ send(method: 'Page.screencastFrameAck', params: { sessionId: number }): Promise;
+ detach(): Promise;
+}
+
+interface ScreencastFramePayload {
+ /** Base64-encoded JPEG. */
+ data: string;
+ metadata: {
+ /** Frame swap timestamp in seconds since epoch (Network.TimeSinceEpoch). Optional in CDP, populated in chromium. */
+ timestamp?: number;
+ deviceWidth: number;
+ deviceHeight: number;
+ };
+ sessionId: number;
+}
+
+interface StartScreencastParams {
+ format?: 'jpeg' | 'png';
+ quality?: number;
+ maxWidth?: number;
+ maxHeight?: number;
+ everyNthFrame?: number;
+}
+
+export interface CdpScreencastOptions {
+ /** Final stitched mp4 output path. */
+ outputPath: string;
+ /** Capture size — passed to CDP as maxWidth/maxHeight. */
+ size: { width: number; height: number };
+ /** JPEG quality 0-100. */
+ quality: number;
+ /** Output framerate hint for ffmpeg encode. Concat demuxer drives PTS from per-file durations; this is just metadata. */
+ fps: number;
+ /** Optional: write every captured JPEG to this path for live preview thumbnails. */
+ liveFramePath?: string;
+ /** Optional: per-scene thumb directory. Tag a scene via `setPendingThumb()`. */
+ thumbsDir?: string;
+}
+
+export interface CdpScreencastHandle {
+ /** Tag the next captured JPEG as the thumbnail for this scene. */
+ setPendingThumb(scene: string): void;
+ /**
+ * Stop the screencast, flush remaining frames, run ffmpeg with concat
+ * demuxer to produce `outputPath`, return when ffmpeg has exited.
+ */
+ stop(): Promise;
+ /** Number of frames CDP delivered (excludes any synthesized padding). */
+ frameCount(): number;
+}
+
+/**
+ * Build concat-demuxer lines from an ordered list of frames with paint
+ * timestamps. Exported for tests.
+ *
+ * The concat demuxer reads:
+ * file 'frame-0000000.jpg'
+ * duration 0.0333
+ * file 'frame-0000001.jpg'
+ * duration 0.0286
+ * ...
+ * file 'frame-NNNNNNN.jpg' ← final entry repeated, see below
+ *
+ * Each `duration` applies to the file that PRECEDES it. The final file's
+ * duration is given by the trailing entry — and concat requires the last
+ * file line to be repeated AFTER its duration so the duration is applied
+ * (otherwise ffmpeg uses the file's intrinsic duration, which for a JPEG
+ * is "1 frame at 25fps" = 40ms).
+ */
+export function buildConcatLines(
+ frames: ReadonlyArray<{ filename: string; paintTimestampSec: number }>,
+ fallbackFps: number,
+): string[] {
+ if (frames.length === 0) return [];
+ const lines: string[] = [];
+ for (let i = 0; i < frames.length; i++) {
+ lines.push(`file '${frames[i].filename}'`);
+ let duration: number;
+ if (i + 1 < frames.length) {
+ duration = frames[i + 1].paintTimestampSec - frames[i].paintTimestampSec;
+ } else {
+ // Last frame — use the average of the prior frame intervals if we
+ // have them, else fall back to 1/fps. Keeps the trailing hold sane.
+ duration = frames.length > 1
+ ? (frames[frames.length - 1].paintTimestampSec - frames[0].paintTimestampSec) / (frames.length - 1)
+ : 1 / fallbackFps;
+ }
+ // Clamp to a sane range — clock weirdness or first-frame zero deltas
+ // would otherwise break ffmpeg's PTS math.
+ if (!Number.isFinite(duration) || duration <= 0) duration = 1 / fallbackFps;
+ if (duration > 60) duration = 60;
+ lines.push(`duration ${duration.toFixed(6)}`);
+ }
+ // Repeat the last file line so concat applies its duration. Without this
+ // the trailing duration is silently ignored.
+ lines.push(`file '${frames[frames.length - 1].filename}'`);
+ return lines;
+}
+
+/**
+ * Begin a CDP-direct screencast. Bypasses Playwright's `page.screencast.start`
+ * so we own the `Page.screencastFrame` events end-to-end. Each event carries
+ * `metadata.timestamp` — the actual paint time on Chrome's clock — which lets
+ * us timestamp frames by paint, not by Node-side arrival wallclock. This is
+ * the only way to keep visuals aligned with audio when CDP transport can't
+ * sustain wallclock pace at high resolutions.
+ *
+ * Frames are written to disk under `/cdp-frames/`, then stitched
+ * into the final mp4 via ffmpeg's concat demuxer with per-frame durations
+ * derived from paint-timestamp deltas. Disk buffering trades ~5MB/s of I/O
+ * for VFR output that doesn't depend on wallclock at all.
+ *
+ * Caller MUST NOT also call `page.screencast.start()` — Chromium accepts only
+ * one screencast subscriber per target; the second supersedes the first.
+ * `page.screencast.showActions()` is independent and stays usable.
+ */
+export async function startCdpScreencast(
+ page: CdpPage,
+ options: CdpScreencastOptions,
+): Promise {
+ const framesDir = join(dirname(options.outputPath), 'cdp-frames');
+ rmSync(framesDir, { recursive: true, force: true });
+ mkdirSync(framesDir, { recursive: true });
+
+ const cdp = await page.context().newCDPSession(page);
+ const frames: { filename: string; paintTimestampSec: number }[] = [];
+ let pendingThumb: string | null = null;
+
+ const handler = (payload: ScreencastFramePayload): void => {
+ const { data, metadata, sessionId } = payload;
+ // Ack first so Chrome's next paint isn't held by our I/O.
+ cdp.send('Page.screencastFrameAck', { sessionId }).catch(() => {
+ // Detach races stop() — silent.
+ });
+
+ let buffer: Buffer;
+ try {
+ buffer = Buffer.from(data, 'base64');
+ } catch {
+ return;
+ }
+
+ // metadata.timestamp is optional in CDP. When absent, fall back to
+ // Date.now() — better than dropping the frame, and chromium populates
+ // it in practice.
+ const tsSec = typeof metadata.timestamp === 'number'
+ ? metadata.timestamp
+ : Date.now() / 1000;
+
+ const filename = `frame-${String(frames.length).padStart(7, '0')}.jpg`;
+ try {
+ writeFileSync(join(framesDir, filename), buffer);
+ } catch (err) {
+ console.warn(`Warning: cdp-screencast: failed to write ${filename}: ${(err as Error).message}`);
+ return;
+ }
+ frames.push({ filename, paintTimestampSec: tsSec });
+
+ if (options.liveFramePath) {
+ try { writeFileSync(options.liveFramePath, buffer); } catch { /* best-effort */ }
+ }
+ if (pendingThumb && options.thumbsDir) {
+ const scene = pendingThumb;
+ pendingThumb = null;
+ try { writeFileSync(join(options.thumbsDir, `${scene}.jpg`), buffer); } catch { /* best-effort */ }
+ }
+ };
+
+ cdp.on('Page.screencastFrame', handler);
+ await cdp.send('Page.startScreencast', {
+ format: 'jpeg',
+ quality: options.quality,
+ maxWidth: options.size.width,
+ maxHeight: options.size.height,
+ everyNthFrame: 1,
+ });
+
+ return {
+ frameCount() { return frames.length; },
+ setPendingThumb(scene) { pendingThumb = scene; },
+ async stop() {
+ cdp.off('Page.screencastFrame', handler);
+ try { await cdp.send('Page.stopScreencast'); } catch { /* page may have closed */ }
+ try { await cdp.detach(); } catch { /* best-effort */ }
+
+ if (frames.length === 0) {
+ console.warn('Warning: cdp-screencast: no frames captured — output mp4 not created.');
+ return;
+ }
+
+ const concatPath = join(framesDir, 'concat.txt');
+ const lines = buildConcatLines(frames, options.fps);
+ writeFileSync(concatPath, lines.join('\n') + '\n', 'utf-8');
+
+ await new Promise((resolve, reject) => {
+ // -vf fps=N forces CFR output. Without it, the concat-demuxer produces
+ // VFR with avg_frame_rate well below the declared r_frame_rate, which
+ // confuses downstream filter graphs (e.g. shader-splice trims): the
+ // final mp4 ends up several seconds longer than the source.
+ const proc = spawn('ffmpeg', [
+ '-y',
+ '-f', 'concat',
+ '-safe', '0',
+ '-i', concatPath,
+ '-vf', `fps=${options.fps}`,
+ '-fps_mode', 'cfr',
+ '-c:v', 'libx264',
+ '-preset', 'ultrafast',
+ '-crf', '12',
+ '-pix_fmt', 'yuv420p',
+ options.outputPath,
+ ], { stdio: ['ignore', 'ignore', 'pipe'] });
+ let stderr = '';
+ proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); });
+ proc.on('error', reject);
+ proc.on('exit', (code) => {
+ if (code === 0) resolve();
+ else reject(new Error(`cdp-screencast: ffmpeg concat exited ${code}: ${stderr.slice(-2000)}`));
+ });
+ });
+
+ // Free disk — JPEGs are only useful for the encode pass.
+ rmSync(framesDir, { recursive: true, force: true });
+ },
+ };
+}
diff --git a/src/narration.ts b/src/narration.ts
index e2d9ee7..f34fac2 100644
--- a/src/narration.ts
+++ b/src/narration.ts
@@ -5,6 +5,7 @@ import { dirname, join } from 'node:path';
import type { Writable } from 'node:stream';
import { schedulePlacements, type Placement } from './tts/align.js';
import type { CameraMove } from './camera-move.js';
+import { startCdpScreencast, type CdpScreencastHandle } from './cdp-screencast.js';
/**
* Subset of Playwright's Page we depend on — typed structurally so we don't
@@ -29,6 +30,7 @@ interface ScreencastPage {
evaluate(pageFunction: () => R | Promise): Promise;
on(event: 'framenavigated', listener: (frame: { parentFrame: () => unknown }) => void): unknown;
off(event: 'framenavigated', listener: (frame: { parentFrame: () => unknown }) => void): unknown;
+ context(): unknown;
}
export interface StartRecordingOptions {
@@ -69,6 +71,7 @@ export class NarrationTimeline {
private _pendingThumbScene: string | null = null;
private _recordingPage: ScreencastPage | null = null;
private _navListener: ((frame: { parentFrame: () => unknown }) => void) | null = null;
+ private _cdpHandle: CdpScreencastHandle | null = null;
constructor(sceneDurations?: Record) {
if (sceneDurations) {
@@ -126,6 +129,33 @@ export class NarrationTimeline {
const thumbsDir = process.env.ARGO_SCENE_THUMBS === '0' ? '' : (process.env.ARGO_THUMBS_DIR || '');
const streamOut = process.env.ARGO_STREAM_OUT || '';
const streamFps = Number(process.env.ARGO_FPS) || 30;
+ // CDP-direct screencast: bypass Playwright's onFrame wrapper to access
+ // metadata.timestamp (paint time). Only meaningful with stream-encode
+ // (record.ts only sets the env var for chromium + captureMode: jpeg-stitch).
+ const useCdpDirect = process.env.ARGO_USE_CDP_DIRECT === '1';
+
+ // Honor ARGO_JPEG_QUALITY for stream-encode mode; caller `options.quality` wins.
+ const envQuality = Number(process.env.ARGO_JPEG_QUALITY);
+ const quality = options.quality
+ ?? (Number.isFinite(envQuality) && envQuality > 0 ? envQuality : undefined);
+
+ if (useCdpDirect && streamOut) {
+ // Chromium accepts only one screencast subscriber per target — calling
+ // page.screencast.start would supersede our CDP subscription. Skip it
+ // entirely. showActions still works because it's a separate Playwright
+ // feature that just sets a flag for instrumentation hooks.
+ const captureSize = size ?? { width: 1920, height: 1080 };
+ const handle = await startCdpScreencast(page as Parameters[0], {
+ outputPath: streamOut,
+ size: captureSize,
+ quality: quality ?? 80,
+ fps: streamFps,
+ liveFramePath: liveFramePath || undefined,
+ thumbsDir: thumbsDir || undefined,
+ });
+ this._cdpHandle = handle;
+ this._screencastStop = async () => { await handle.stop(); };
+ } else {
// Set up the ffmpeg child for stream-encode mode.
let ffmpegProc: ChildProcessByStdio | null = null;
@@ -215,11 +245,6 @@ export class NarrationTimeline {
};
}
- // Honor ARGO_JPEG_QUALITY for stream-encode mode; caller `options.quality` wins.
- const envQuality = Number(process.env.ARGO_JPEG_QUALITY);
- const quality = options.quality
- ?? (Number.isFinite(envQuality) && envQuality > 0 ? envQuality : undefined);
-
// Playwright's screencast still demands a `path` for its WebM writer even
// when we ignore that output entirely (stream-encode mode). Pass the path
// through; for jpeg-stitch users it's a discardable temp file.
@@ -281,6 +306,8 @@ export class NarrationTimeline {
};
}
+ } // close legacy `else` branch — showActions + timeline anchor below run for both paths.
+
// Optional auto-annotation of every Playwright interaction.
const showActionsEnv = process.env.ARGO_SHOW_ACTIONS;
if (showActionsEnv) {
@@ -369,6 +396,7 @@ export class NarrationTimeline {
// Tell the next onFrame callback to also persist this scene's JPEG —
// best-effort, gated on ARGO_THUMBS_DIR being set + screencast being live.
this._pendingThumbScene = scene;
+ if (this._cdpHandle) this._cdpHandle.setPendingThumb(scene);
// Force CDP to emit a fresh frame so this scene's visual state is in the
// recording at mark-time. Without this, an idle page (no recent paint)
diff --git a/src/record.ts b/src/record.ts
index 6dcbcb0..f110bf6 100644
--- a/src/record.ts
+++ b/src/record.ts
@@ -107,7 +107,39 @@ export async function record(demoName: string, options: RecordOptions): Promise<
// jpeg-stitch (stream-encode) produces an H.264 mp4 directly — JPEG frames
// are piped to ffmpeg child in narration.startRecording, bypassing
// Playwright's hardcoded VP8 encoder. Default mode keeps Playwright's WebM.
- const useJpegStitch = options.captureMode === 'jpeg-stitch';
+ //
+ // jpeg-stitch is chromium-only in practice: webkit/firefox screencast
+ // onFrame delivery is far below 30fps (firefox typically ~3fps), so
+ // 80%+ of frames get synthesized as duplicates and the page-side test
+ // hits its timeout while the CDP transport drains. Auto-fall back to
+ // webm with a loud warning rather than letting the recording hang.
+ const browserName = options.browser ?? 'chromium';
+ let useJpegStitch = options.captureMode === 'jpeg-stitch';
+ if (useJpegStitch && browserName !== 'chromium') {
+ console.warn(
+ `Warning: captureMode: 'jpeg-stitch' is chromium-only — ` +
+ `${browserName}'s screencast cannot sustain the JPEG framerate. ` +
+ `Falling back to captureMode: 'webm' for this run.`,
+ );
+ useJpegStitch = false;
+ }
+
+ // deviceScaleFactor > 1 only works on chromium (via --force-device-scale-factor).
+ // webkit/firefox keep the page at 1x but the screencast still captures at the
+ // 2x/3x viewport size — the page renders into the upper-left of the frame and
+ // the rest is empty gray pixels (a "frame within a frame" once the export's
+ // frame effect wraps it). Clamp to 1 with a warning rather than producing
+ // unusable output.
+ const requestedDsf = normalizeDeviceScaleFactor(options.deviceScaleFactor);
+ if (requestedDsf > 1 && browserName !== 'chromium') {
+ console.warn(
+ `Warning: deviceScaleFactor: ${requestedDsf} is chromium-only — ` +
+ `${browserName} renders the page at 1x while the screencast captures at ` +
+ `${requestedDsf}x, leaving the right and bottom of every frame empty. ` +
+ `Clamping to 1 for this run.`,
+ );
+ options = { ...options, deviceScaleFactor: 1 };
+ }
const videoExt = useJpegStitch ? '.mp4' : '.webm';
const videoPath = path.join(argoDir, `video${videoExt}`);
// Playwright's screencast.start still requires a `path` even when we ignore
@@ -214,6 +246,15 @@ export async function record(demoName: string, options: RecordOptions): Promise<
ARGO_STREAM_OUT: streamOutPath,
ARGO_FPS: String(options.video.fps ?? 30),
ARGO_JPEG_QUALITY: jpegQuality,
+ // CDP-direct screencast: chromium-only path that uses paint timestamps
+ // (CDP metadata.timestamp) instead of arrival wallclock for frame
+ // numbering. Sidesteps the throughput-induced visual lag that the
+ // hotfix only mitigates. Disable via ARGO_CDP_DIRECT=0 to fall back
+ // to the legacy onFrame + image2pipe path (e.g. for debugging).
+ ARGO_USE_CDP_DIRECT:
+ process.env.ARGO_CDP_DIRECT === '0'
+ ? ''
+ : (useJpegStitch && browserName === 'chromium' ? '1' : ''),
BASE_URL: options.baseURL,
ARGO_ASSET_URL: assetServer?.url ?? '',
ARGO_AUTO_BACKGROUND: options.autoBackground ? '1' : '',
diff --git a/src/transitions/shader-splice.ts b/src/transitions/shader-splice.ts
index b37f6bf..0fd12d1 100644
--- a/src/transitions/shader-splice.ts
+++ b/src/transitions/shader-splice.ts
@@ -99,9 +99,11 @@ export function buildShaderSpliceFilter(opts: ShaderSpliceOptions): ShaderSplice
const sceneEnd = b.boundarySec - dHalf;
const transitionEnd = b.boundarySec + dHalf;
+ // setsar=1 normalizes SAR so concat doesn't fail when source and PNG
+ // sequence disagree (webkit screencast emits SAR 108:109; PNG is 0:1).
const vSceneLabel = `ssv${activeBoundaries}`;
parts.push(
- `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${sceneEnd.toFixed(3)},setpts=PTS-STARTPTS[${vSceneLabel}]`,
+ `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${sceneEnd.toFixed(3)},setpts=PTS-STARTPTS,setsar=1[${vSceneLabel}]`,
);
videoLabels.push(`[${vSceneLabel}]`);
@@ -114,7 +116,7 @@ export function buildShaderSpliceFilter(opts: ShaderSpliceOptions): ShaderSplice
}
const vTransLabel = `stv${activeBoundaries}`;
- parts.push(`[${b.extraInputIndex}:v]setpts=PTS-STARTPTS[${vTransLabel}]`);
+ parts.push(`[${b.extraInputIndex}:v]setpts=PTS-STARTPTS,setsar=1[${vTransLabel}]`);
videoLabels.push(`[${vTransLabel}]`);
if (audioInputLabel) {
@@ -132,7 +134,7 @@ export function buildShaderSpliceFilter(opts: ShaderSpliceOptions): ShaderSplice
// Final scene segment
const vLastLabel = `ssv${activeBoundaries}`;
parts.push(
- `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${totalDurationSec.toFixed(3)},setpts=PTS-STARTPTS[${vLastLabel}]`,
+ `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${totalDurationSec.toFixed(3)},setpts=PTS-STARTPTS,setsar=1[${vLastLabel}]`,
);
videoLabels.push(`[${vLastLabel}]`);
if (audioInputLabel) {
diff --git a/tests/cdp-screencast.test.ts b/tests/cdp-screencast.test.ts
new file mode 100644
index 0000000..470c669
--- /dev/null
+++ b/tests/cdp-screencast.test.ts
@@ -0,0 +1,69 @@
+import { describe, it, expect } from 'vitest';
+import { buildConcatLines } from '../src/cdp-screencast.js';
+
+describe('buildConcatLines', () => {
+ it('returns empty for empty input', () => {
+ expect(buildConcatLines([], 30)).toEqual([]);
+ });
+
+ it('emits file + duration pairs derived from timestamp deltas', () => {
+ const lines = buildConcatLines(
+ [
+ { filename: 'frame-0000000.jpg', paintTimestampSec: 100.000 },
+ { filename: 'frame-0000001.jpg', paintTimestampSec: 100.033 },
+ { filename: 'frame-0000002.jpg', paintTimestampSec: 100.075 },
+ ],
+ 30,
+ );
+ // 3 frames → 3 file lines + 3 duration lines + 1 trailing file line repeat
+ expect(lines).toEqual([
+ "file 'frame-0000000.jpg'",
+ 'duration 0.033000',
+ "file 'frame-0000001.jpg'",
+ 'duration 0.042000',
+ "file 'frame-0000002.jpg'",
+ // last frame: avg of (33ms, 42ms) = 37.5ms — falls back to overall avg
+ 'duration 0.037500',
+ // concat demuxer requires the last file line repeated for its duration
+ // to actually apply (otherwise ffmpeg uses the JPEG's intrinsic duration).
+ "file 'frame-0000002.jpg'",
+ ]);
+ });
+
+ it('falls back to 1/fps for the single-frame case', () => {
+ const lines = buildConcatLines(
+ [{ filename: 'frame-0000000.jpg', paintTimestampSec: 100.0 }],
+ 30,
+ );
+ expect(lines).toEqual([
+ "file 'frame-0000000.jpg'",
+ `duration ${(1 / 30).toFixed(6)}`,
+ "file 'frame-0000000.jpg'",
+ ]);
+ });
+
+ it('falls back to 1/fps when a delta is non-positive (clock weirdness)', () => {
+ const lines = buildConcatLines(
+ [
+ { filename: 'a.jpg', paintTimestampSec: 100.0 },
+ { filename: 'b.jpg', paintTimestampSec: 100.0 }, // identical timestamps
+ ],
+ 30,
+ );
+ // First duration is 0 → clamped to 1/30; last duration is the avg → also 0 → clamped.
+ expect(lines).toContain(`duration ${(1 / 30).toFixed(6)}`);
+ expect(lines.some((l) => l === 'duration 0.000000')).toBe(false);
+ });
+
+ it('clamps absurdly long deltas (clock jump) to 60s', () => {
+ const lines = buildConcatLines(
+ [
+ { filename: 'a.jpg', paintTimestampSec: 100.0 },
+ { filename: 'b.jpg', paintTimestampSec: 1_000_000.0 },
+ ],
+ 30,
+ );
+ // First delta is enormous; should clamp to 60.
+ expect(lines).toContain('duration 60.000000');
+ });
+});
diff --git a/tests/record.test.ts b/tests/record.test.ts
index 69eee70..2fcc2cc 100644
--- a/tests/record.test.ts
+++ b/tests/record.test.ts
@@ -190,14 +190,14 @@ describe('record', () => {
demosDir: 'custom-demos',
baseURL: 'http://localhost:4321',
video: { width: 1280, height: 720 },
- browser: 'webkit',
+ browser: 'chromium',
deviceScaleFactor: 1.6,
});
const configPath = join(tempDir, '.argo', 'demo', 'playwright.record.config.mjs');
const config = readFileSync(configPath, 'utf-8');
- expect(config).toContain("browserName: \"webkit\"");
+ expect(config).toContain("browserName: \"chromium\"");
expect(config).toContain('deviceScaleFactor: 2');
expect(execFileMock).toHaveBeenCalledWith(
'npx',
@@ -212,6 +212,36 @@ describe('record', () => {
);
});
+ it('clamps deviceScaleFactor to 1 on non-chromium browsers', async () => {
+ // webkit/firefox don't honor --force-device-scale-factor — page renders at 1x
+ // while screencast captures at the 2x viewport, leaving 75% of frames empty.
+ mockSubprocessSuccess();
+
+ await record('demo', {
+ demosDir: 'custom-demos',
+ baseURL: 'http://localhost:4321',
+ video: { width: 1280, height: 720 },
+ browser: 'webkit',
+ deviceScaleFactor: 2,
+ });
+
+ const configPath = join(tempDir, '.argo', 'demo', 'playwright.record.config.mjs');
+ const config = readFileSync(configPath, 'utf-8');
+
+ expect(config).toContain('deviceScaleFactor: 1');
+ expect(execFileMock).toHaveBeenCalledWith(
+ 'npx',
+ expect.any(Array),
+ expect.objectContaining({
+ env: expect.objectContaining({
+ ARGO_SCREENCAST_WIDTH: '1280',
+ ARGO_SCREENCAST_HEIGHT: '720',
+ }),
+ }),
+ expect.any(Function),
+ );
+ });
+
it('includes isMobile, hasTouch, and contextOptions in generated config', async () => {
mockSubprocessSuccess();
diff --git a/tests/transitions/shader-splice.test.ts b/tests/transitions/shader-splice.test.ts
index eb4432e..d02821e 100644
--- a/tests/transitions/shader-splice.test.ts
+++ b/tests/transitions/shader-splice.test.ts
@@ -124,6 +124,24 @@ describe('buildShaderSpliceFilter', () => {
expect(result.filterComplex).toMatch(/concat=n=3/);
});
+ it('normalizes SAR=1 on every concat input to avoid mismatch', () => {
+ // webkit screencast emits SAR 108:109; PNG sequence is 0:1. Without
+ // setsar=1 on each input, ffmpeg concat fails with "parameters do not
+ // match". Every video segment (scene + transition) must include setsar=1.
+ const result = buildShaderSpliceFilter({
+ totalDurationSec: 6.0,
+ boundaries: [{ boundarySec: 3.0, durationMs: 800, extraInputIndex: 2 }],
+ videoInputLabel: '[0:v]',
+ audioInputLabel: '[1:a]',
+ fps: 30,
+ });
+ // Scene segments (trim+setpts+setsar)
+ expect(result.filterComplex).toMatch(/trim=0\.000:2\.600,setpts=PTS-STARTPTS,setsar=1/);
+ expect(result.filterComplex).toMatch(/trim=3\.400:6\.000,setpts=PTS-STARTPTS,setsar=1/);
+ // PNG transition segment also normalized
+ expect(result.filterComplex).toMatch(/\[2:v\]setpts=PTS-STARTPTS,setsar=1/);
+ });
+
it('clamps dHalf when boundary is near video end', () => {
// boundarySec=2.85, requested halfDur=0.4 — only 0.15 available after
const result = buildShaderSpliceFilter({