diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..7608f4c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,61 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +jobs: + unit: + name: Unit tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + # ffmpeg is needed for boundary-frame extraction and shader pre-render. + - run: sudo apt-get update && sudo apt-get install -y ffmpeg + - run: npm install + - run: npx playwright install --with-deps chromium + - run: npm run build + - run: npm test + + cross-browser: + name: ci-smoke (${{ matrix.browser }}) + needs: unit + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + browser: [chromium, firefox, webkit] + env: + ARGO_BROWSER: ${{ matrix.browser }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: 24 + - run: sudo apt-get update && sudo apt-get install -y ffmpeg + - run: npm install + # chromium is also installed even when recording on webkit/firefox — + # shader-render uses headless chromium for WebGL regardless of the + # recording browser. + - run: npx playwright install --with-deps chromium "$ARGO_BROWSER" + - run: npm run build + + - name: Run pipeline + run: npx tsx bin/argo.js pipeline ci-smoke --config demos/ci-smoke.config.mjs --browser "$ARGO_BROWSER" + + - name: Verify output + run: | + pip install --quiet pillow + python3 scripts/verify-ci-smoke.py videos/ci-smoke.mp4 + + - name: Upload mp4 artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: ci-smoke-${{ matrix.browser }} + path: videos/ci-smoke.mp4 + if-no-files-found: warn diff --git a/demos/blocks-showcase.demo.ts b/demos/blocks-showcase.demo.ts index 28b0f6a..7150c38 100644 --- a/demos/blocks-showcase.demo.ts +++ b/demos/blocks-showcase.demo.ts @@ -15,6 +15,8 @@ test('blocks-showcase', async ({ page, narration }) => { `); await page.waitForTimeout(500); + await narration.startRecording(page); + for (const scene of ['intro', 'x-post', 'macos', 'ytlt', 'chart', 'spotify', 'closing']) { narration.mark(scene); await showOverlay(page, scene, narration.durationFor(scene, { maxMs: 6000 })); diff --git a/demos/ci-smoke.config.mjs b/demos/ci-smoke.config.mjs new file mode 100644 index 0000000..160804d --- /dev/null +++ b/demos/ci-smoke.config.mjs @@ -0,0 +1,26 @@ +import { defineConfig } from '@argo-video/cli'; + +// CI smoke config — exercises the cross-browser robustness fixes: +// * captureMode: 'jpeg-stitch' auto-downgrades to 'webm' on non-chromium +// * deviceScaleFactor: 2 auto-clamps to 1 on non-chromium +// * shader transition exercises setsar=1 normalization on webkit +// Silent demo (no text in scenes manifest) — TTS is skipped, video-only export. +export default defineConfig({ + // Demo uses page.setContent — baseURL is unused but required by config schema. + baseURL: 'about:blank', + demosDir: 'demos', + outputDir: 'videos', + video: { + width: 1920, + height: 1080, + fps: 30, + deviceScaleFactor: 2, + captureMode: 'jpeg-stitch', + }, + export: { + preset: 'ultrafast', + crf: 28, + encoder: 'cpu', + transition: { type: 'shader', shader: 'crosswarp', durationMs: 600 }, + }, +}); diff --git a/demos/ci-smoke.demo.ts b/demos/ci-smoke.demo.ts new file mode 100644 index 0000000..c43251e --- /dev/null +++ b/demos/ci-smoke.demo.ts @@ -0,0 +1,34 @@ +import { test } from '@argo-video/cli'; + +test('ci-smoke', async ({ page, narration }) => { + test.setTimeout(60_000); + + await page.setContent(` + + +
Scene 1
+ + + + `); + await page.waitForTimeout(300); + + await narration.startRecording(page); + + narration.mark('one'); + await page.waitForTimeout(2500); + + await page.evaluate(() => { + (document.getElementById('s1') as HTMLElement).style.display = 'none'; + (document.getElementById('s2') as HTMLElement).style.display = 'grid'; + }); + narration.mark('two'); + await page.waitForTimeout(2500); + + await page.evaluate(() => { + (document.getElementById('s2') as HTMLElement).style.display = 'none'; + (document.getElementById('s3') as HTMLElement).style.display = 'grid'; + }); + narration.mark('three'); + await page.waitForTimeout(2500); +}); diff --git a/demos/ci-smoke.scenes.json b/demos/ci-smoke.scenes.json new file mode 100644 index 0000000..92dcd72 --- /dev/null +++ b/demos/ci-smoke.scenes.json @@ -0,0 +1,5 @@ +[ + { "scene": "one" }, + { "scene": "two" }, + { "scene": "three" } +] diff --git a/scripts/verify-ci-smoke.py b/scripts/verify-ci-smoke.py new file mode 100755 index 0000000..26f4033 --- /dev/null +++ b/scripts/verify-ci-smoke.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Verify ci-smoke output mp4. Asserts: + - output exists and is valid mp4 + - dimensions are 1920x1080 (catches dsf-clamp regressions) + - duration is in expected window + - midpoint frame's bottom-right quadrant is not near-gray + (catches frame-in-frame regressions where a non-chromium browser + rendered the page at 1x into a 2x screencast canvas) + +Usage: python3 scripts/verify-ci-smoke.py videos/ci-smoke.mp4 +""" + +import subprocess +import sys +from pathlib import Path + +EXPECTED_W, EXPECTED_H = 1920, 1080 +MIN_DURATION_S, MAX_DURATION_S = 5, 20 + + +def ffprobe(*args: str) -> str: + return subprocess.check_output(['ffprobe', '-v', 'error', *args]).decode().strip() + + +def main() -> None: + if len(sys.argv) != 2: + sys.exit('Usage: verify-ci-smoke.py ') + mp4 = Path(sys.argv[1]) + if not mp4.exists(): + sys.exit(f'missing output: {mp4}') + + dims_csv = ffprobe('-select_streams', 'v:0', '-show_entries', 'stream=width,height', '-of', 'csv=p=0', str(mp4)) + w, h = (int(x) for x in dims_csv.split(',')) + if (w, h) != (EXPECTED_W, EXPECTED_H): + sys.exit(f'dimensions {w}x{h} != {EXPECTED_W}x{EXPECTED_H}') + + duration = float(ffprobe('-show_entries', 'format=duration', '-of', 'csv=p=0', str(mp4))) + if not MIN_DURATION_S <= duration <= MAX_DURATION_S: + sys.exit(f'duration {duration:.1f}s outside [{MIN_DURATION_S}, {MAX_DURATION_S}]') + + sample_path = Path('/tmp/ci-smoke-sample.png') + subprocess.check_call( + ['ffmpeg', '-y', '-ss', f'{duration / 2:.2f}', '-i', str(mp4), '-frames:v', '1', str(sample_path)], + stderr=subprocess.DEVNULL, + ) + + from PIL import Image + img = Image.open(sample_path).convert('RGB') + samples = { + 'top-left': img.getpixel((w // 4, h // 4)), + 'top-right': img.getpixel((3 * w // 4, h // 4)), + 'bottom-left': img.getpixel((w // 4, 3 * h // 4)), + 'bottom-right': img.getpixel((3 * w // 4, 3 * h // 4)), + 'center': img.getpixel((w // 2, h // 2)), + } + print('pixel samples:') + for name, rgb in samples.items(): + print(f' {name:13s}: {rgb}') + + # Bottom-right should be inside the rendered scene background, not gray padding. + # The three scenes use saturated dark colors (#1e3a8a, #7c2d12, #14532d) — the + # max channel deviation from grey is at least ~70. Padding gray pixels stay + # within ~10 of (128,128,128). + br = samples['bottom-right'] + gray_distance = sum(abs(c - 128) for c in br) + if gray_distance < 30: + sys.exit(f'bottom-right {br} too close to gray (distance={gray_distance}) — frame-in-frame regression?') + + print(f'OK: {w}x{h}, {duration:.1f}s, br_gray_distance={gray_distance}') + + +if __name__ == '__main__': + main() diff --git a/src/cdp-screencast.ts b/src/cdp-screencast.ts new file mode 100644 index 0000000..52043a1 --- /dev/null +++ b/src/cdp-screencast.ts @@ -0,0 +1,244 @@ +import { spawn } from 'node:child_process'; +import { mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; + +/** + * Structural subset of `BrowserContext.newCDPSession()` and the resulting + * `CDPSession` we depend on. Avoids pulling `@playwright/test` types into + * this module's runtime dependencies. + */ +interface CdpPage { + context(): { newCDPSession(page: CdpPage): Promise }; +} + +interface CdpSession { + on(event: 'Page.screencastFrame', listener: (payload: ScreencastFramePayload) => void): unknown; + off(event: 'Page.screencastFrame', listener: (payload: ScreencastFramePayload) => void): unknown; + send(method: 'Page.startScreencast', params: StartScreencastParams): Promise; + send(method: 'Page.stopScreencast'): Promise; + send(method: 'Page.screencastFrameAck', params: { sessionId: number }): Promise; + detach(): Promise; +} + +interface ScreencastFramePayload { + /** Base64-encoded JPEG. */ + data: string; + metadata: { + /** Frame swap timestamp in seconds since epoch (Network.TimeSinceEpoch). Optional in CDP, populated in chromium. */ + timestamp?: number; + deviceWidth: number; + deviceHeight: number; + }; + sessionId: number; +} + +interface StartScreencastParams { + format?: 'jpeg' | 'png'; + quality?: number; + maxWidth?: number; + maxHeight?: number; + everyNthFrame?: number; +} + +export interface CdpScreencastOptions { + /** Final stitched mp4 output path. */ + outputPath: string; + /** Capture size — passed to CDP as maxWidth/maxHeight. */ + size: { width: number; height: number }; + /** JPEG quality 0-100. */ + quality: number; + /** Output framerate hint for ffmpeg encode. Concat demuxer drives PTS from per-file durations; this is just metadata. */ + fps: number; + /** Optional: write every captured JPEG to this path for live preview thumbnails. */ + liveFramePath?: string; + /** Optional: per-scene thumb directory. Tag a scene via `setPendingThumb()`. */ + thumbsDir?: string; +} + +export interface CdpScreencastHandle { + /** Tag the next captured JPEG as the thumbnail for this scene. */ + setPendingThumb(scene: string): void; + /** + * Stop the screencast, flush remaining frames, run ffmpeg with concat + * demuxer to produce `outputPath`, return when ffmpeg has exited. + */ + stop(): Promise; + /** Number of frames CDP delivered (excludes any synthesized padding). */ + frameCount(): number; +} + +/** + * Build concat-demuxer lines from an ordered list of frames with paint + * timestamps. Exported for tests. + * + * The concat demuxer reads: + * file 'frame-0000000.jpg' + * duration 0.0333 + * file 'frame-0000001.jpg' + * duration 0.0286 + * ... + * file 'frame-NNNNNNN.jpg' ← final entry repeated, see below + * + * Each `duration` applies to the file that PRECEDES it. The final file's + * duration is given by the trailing entry — and concat requires the last + * file line to be repeated AFTER its duration so the duration is applied + * (otherwise ffmpeg uses the file's intrinsic duration, which for a JPEG + * is "1 frame at 25fps" = 40ms). + */ +export function buildConcatLines( + frames: ReadonlyArray<{ filename: string; paintTimestampSec: number }>, + fallbackFps: number, +): string[] { + if (frames.length === 0) return []; + const lines: string[] = []; + for (let i = 0; i < frames.length; i++) { + lines.push(`file '${frames[i].filename}'`); + let duration: number; + if (i + 1 < frames.length) { + duration = frames[i + 1].paintTimestampSec - frames[i].paintTimestampSec; + } else { + // Last frame — use the average of the prior frame intervals if we + // have them, else fall back to 1/fps. Keeps the trailing hold sane. + duration = frames.length > 1 + ? (frames[frames.length - 1].paintTimestampSec - frames[0].paintTimestampSec) / (frames.length - 1) + : 1 / fallbackFps; + } + // Clamp to a sane range — clock weirdness or first-frame zero deltas + // would otherwise break ffmpeg's PTS math. + if (!Number.isFinite(duration) || duration <= 0) duration = 1 / fallbackFps; + if (duration > 60) duration = 60; + lines.push(`duration ${duration.toFixed(6)}`); + } + // Repeat the last file line so concat applies its duration. Without this + // the trailing duration is silently ignored. + lines.push(`file '${frames[frames.length - 1].filename}'`); + return lines; +} + +/** + * Begin a CDP-direct screencast. Bypasses Playwright's `page.screencast.start` + * so we own the `Page.screencastFrame` events end-to-end. Each event carries + * `metadata.timestamp` — the actual paint time on Chrome's clock — which lets + * us timestamp frames by paint, not by Node-side arrival wallclock. This is + * the only way to keep visuals aligned with audio when CDP transport can't + * sustain wallclock pace at high resolutions. + * + * Frames are written to disk under `/cdp-frames/`, then stitched + * into the final mp4 via ffmpeg's concat demuxer with per-frame durations + * derived from paint-timestamp deltas. Disk buffering trades ~5MB/s of I/O + * for VFR output that doesn't depend on wallclock at all. + * + * Caller MUST NOT also call `page.screencast.start()` — Chromium accepts only + * one screencast subscriber per target; the second supersedes the first. + * `page.screencast.showActions()` is independent and stays usable. + */ +export async function startCdpScreencast( + page: CdpPage, + options: CdpScreencastOptions, +): Promise { + const framesDir = join(dirname(options.outputPath), 'cdp-frames'); + rmSync(framesDir, { recursive: true, force: true }); + mkdirSync(framesDir, { recursive: true }); + + const cdp = await page.context().newCDPSession(page); + const frames: { filename: string; paintTimestampSec: number }[] = []; + let pendingThumb: string | null = null; + + const handler = (payload: ScreencastFramePayload): void => { + const { data, metadata, sessionId } = payload; + // Ack first so Chrome's next paint isn't held by our I/O. + cdp.send('Page.screencastFrameAck', { sessionId }).catch(() => { + // Detach races stop() — silent. + }); + + let buffer: Buffer; + try { + buffer = Buffer.from(data, 'base64'); + } catch { + return; + } + + // metadata.timestamp is optional in CDP. When absent, fall back to + // Date.now() — better than dropping the frame, and chromium populates + // it in practice. + const tsSec = typeof metadata.timestamp === 'number' + ? metadata.timestamp + : Date.now() / 1000; + + const filename = `frame-${String(frames.length).padStart(7, '0')}.jpg`; + try { + writeFileSync(join(framesDir, filename), buffer); + } catch (err) { + console.warn(`Warning: cdp-screencast: failed to write ${filename}: ${(err as Error).message}`); + return; + } + frames.push({ filename, paintTimestampSec: tsSec }); + + if (options.liveFramePath) { + try { writeFileSync(options.liveFramePath, buffer); } catch { /* best-effort */ } + } + if (pendingThumb && options.thumbsDir) { + const scene = pendingThumb; + pendingThumb = null; + try { writeFileSync(join(options.thumbsDir, `${scene}.jpg`), buffer); } catch { /* best-effort */ } + } + }; + + cdp.on('Page.screencastFrame', handler); + await cdp.send('Page.startScreencast', { + format: 'jpeg', + quality: options.quality, + maxWidth: options.size.width, + maxHeight: options.size.height, + everyNthFrame: 1, + }); + + return { + frameCount() { return frames.length; }, + setPendingThumb(scene) { pendingThumb = scene; }, + async stop() { + cdp.off('Page.screencastFrame', handler); + try { await cdp.send('Page.stopScreencast'); } catch { /* page may have closed */ } + try { await cdp.detach(); } catch { /* best-effort */ } + + if (frames.length === 0) { + console.warn('Warning: cdp-screencast: no frames captured — output mp4 not created.'); + return; + } + + const concatPath = join(framesDir, 'concat.txt'); + const lines = buildConcatLines(frames, options.fps); + writeFileSync(concatPath, lines.join('\n') + '\n', 'utf-8'); + + await new Promise((resolve, reject) => { + // -vf fps=N forces CFR output. Without it, the concat-demuxer produces + // VFR with avg_frame_rate well below the declared r_frame_rate, which + // confuses downstream filter graphs (e.g. shader-splice trims): the + // final mp4 ends up several seconds longer than the source. + const proc = spawn('ffmpeg', [ + '-y', + '-f', 'concat', + '-safe', '0', + '-i', concatPath, + '-vf', `fps=${options.fps}`, + '-fps_mode', 'cfr', + '-c:v', 'libx264', + '-preset', 'ultrafast', + '-crf', '12', + '-pix_fmt', 'yuv420p', + options.outputPath, + ], { stdio: ['ignore', 'ignore', 'pipe'] }); + let stderr = ''; + proc.stderr.on('data', (chunk) => { stderr += chunk.toString(); }); + proc.on('error', reject); + proc.on('exit', (code) => { + if (code === 0) resolve(); + else reject(new Error(`cdp-screencast: ffmpeg concat exited ${code}: ${stderr.slice(-2000)}`)); + }); + }); + + // Free disk — JPEGs are only useful for the encode pass. + rmSync(framesDir, { recursive: true, force: true }); + }, + }; +} diff --git a/src/narration.ts b/src/narration.ts index e2d9ee7..f34fac2 100644 --- a/src/narration.ts +++ b/src/narration.ts @@ -5,6 +5,7 @@ import { dirname, join } from 'node:path'; import type { Writable } from 'node:stream'; import { schedulePlacements, type Placement } from './tts/align.js'; import type { CameraMove } from './camera-move.js'; +import { startCdpScreencast, type CdpScreencastHandle } from './cdp-screencast.js'; /** * Subset of Playwright's Page we depend on — typed structurally so we don't @@ -29,6 +30,7 @@ interface ScreencastPage { evaluate(pageFunction: () => R | Promise): Promise; on(event: 'framenavigated', listener: (frame: { parentFrame: () => unknown }) => void): unknown; off(event: 'framenavigated', listener: (frame: { parentFrame: () => unknown }) => void): unknown; + context(): unknown; } export interface StartRecordingOptions { @@ -69,6 +71,7 @@ export class NarrationTimeline { private _pendingThumbScene: string | null = null; private _recordingPage: ScreencastPage | null = null; private _navListener: ((frame: { parentFrame: () => unknown }) => void) | null = null; + private _cdpHandle: CdpScreencastHandle | null = null; constructor(sceneDurations?: Record) { if (sceneDurations) { @@ -126,6 +129,33 @@ export class NarrationTimeline { const thumbsDir = process.env.ARGO_SCENE_THUMBS === '0' ? '' : (process.env.ARGO_THUMBS_DIR || ''); const streamOut = process.env.ARGO_STREAM_OUT || ''; const streamFps = Number(process.env.ARGO_FPS) || 30; + // CDP-direct screencast: bypass Playwright's onFrame wrapper to access + // metadata.timestamp (paint time). Only meaningful with stream-encode + // (record.ts only sets the env var for chromium + captureMode: jpeg-stitch). + const useCdpDirect = process.env.ARGO_USE_CDP_DIRECT === '1'; + + // Honor ARGO_JPEG_QUALITY for stream-encode mode; caller `options.quality` wins. + const envQuality = Number(process.env.ARGO_JPEG_QUALITY); + const quality = options.quality + ?? (Number.isFinite(envQuality) && envQuality > 0 ? envQuality : undefined); + + if (useCdpDirect && streamOut) { + // Chromium accepts only one screencast subscriber per target — calling + // page.screencast.start would supersede our CDP subscription. Skip it + // entirely. showActions still works because it's a separate Playwright + // feature that just sets a flag for instrumentation hooks. + const captureSize = size ?? { width: 1920, height: 1080 }; + const handle = await startCdpScreencast(page as Parameters[0], { + outputPath: streamOut, + size: captureSize, + quality: quality ?? 80, + fps: streamFps, + liveFramePath: liveFramePath || undefined, + thumbsDir: thumbsDir || undefined, + }); + this._cdpHandle = handle; + this._screencastStop = async () => { await handle.stop(); }; + } else { // Set up the ffmpeg child for stream-encode mode. let ffmpegProc: ChildProcessByStdio | null = null; @@ -215,11 +245,6 @@ export class NarrationTimeline { }; } - // Honor ARGO_JPEG_QUALITY for stream-encode mode; caller `options.quality` wins. - const envQuality = Number(process.env.ARGO_JPEG_QUALITY); - const quality = options.quality - ?? (Number.isFinite(envQuality) && envQuality > 0 ? envQuality : undefined); - // Playwright's screencast still demands a `path` for its WebM writer even // when we ignore that output entirely (stream-encode mode). Pass the path // through; for jpeg-stitch users it's a discardable temp file. @@ -281,6 +306,8 @@ export class NarrationTimeline { }; } + } // close legacy `else` branch — showActions + timeline anchor below run for both paths. + // Optional auto-annotation of every Playwright interaction. const showActionsEnv = process.env.ARGO_SHOW_ACTIONS; if (showActionsEnv) { @@ -369,6 +396,7 @@ export class NarrationTimeline { // Tell the next onFrame callback to also persist this scene's JPEG — // best-effort, gated on ARGO_THUMBS_DIR being set + screencast being live. this._pendingThumbScene = scene; + if (this._cdpHandle) this._cdpHandle.setPendingThumb(scene); // Force CDP to emit a fresh frame so this scene's visual state is in the // recording at mark-time. Without this, an idle page (no recent paint) diff --git a/src/record.ts b/src/record.ts index 6dcbcb0..f110bf6 100644 --- a/src/record.ts +++ b/src/record.ts @@ -107,7 +107,39 @@ export async function record(demoName: string, options: RecordOptions): Promise< // jpeg-stitch (stream-encode) produces an H.264 mp4 directly — JPEG frames // are piped to ffmpeg child in narration.startRecording, bypassing // Playwright's hardcoded VP8 encoder. Default mode keeps Playwright's WebM. - const useJpegStitch = options.captureMode === 'jpeg-stitch'; + // + // jpeg-stitch is chromium-only in practice: webkit/firefox screencast + // onFrame delivery is far below 30fps (firefox typically ~3fps), so + // 80%+ of frames get synthesized as duplicates and the page-side test + // hits its timeout while the CDP transport drains. Auto-fall back to + // webm with a loud warning rather than letting the recording hang. + const browserName = options.browser ?? 'chromium'; + let useJpegStitch = options.captureMode === 'jpeg-stitch'; + if (useJpegStitch && browserName !== 'chromium') { + console.warn( + `Warning: captureMode: 'jpeg-stitch' is chromium-only — ` + + `${browserName}'s screencast cannot sustain the JPEG framerate. ` + + `Falling back to captureMode: 'webm' for this run.`, + ); + useJpegStitch = false; + } + + // deviceScaleFactor > 1 only works on chromium (via --force-device-scale-factor). + // webkit/firefox keep the page at 1x but the screencast still captures at the + // 2x/3x viewport size — the page renders into the upper-left of the frame and + // the rest is empty gray pixels (a "frame within a frame" once the export's + // frame effect wraps it). Clamp to 1 with a warning rather than producing + // unusable output. + const requestedDsf = normalizeDeviceScaleFactor(options.deviceScaleFactor); + if (requestedDsf > 1 && browserName !== 'chromium') { + console.warn( + `Warning: deviceScaleFactor: ${requestedDsf} is chromium-only — ` + + `${browserName} renders the page at 1x while the screencast captures at ` + + `${requestedDsf}x, leaving the right and bottom of every frame empty. ` + + `Clamping to 1 for this run.`, + ); + options = { ...options, deviceScaleFactor: 1 }; + } const videoExt = useJpegStitch ? '.mp4' : '.webm'; const videoPath = path.join(argoDir, `video${videoExt}`); // Playwright's screencast.start still requires a `path` even when we ignore @@ -214,6 +246,15 @@ export async function record(demoName: string, options: RecordOptions): Promise< ARGO_STREAM_OUT: streamOutPath, ARGO_FPS: String(options.video.fps ?? 30), ARGO_JPEG_QUALITY: jpegQuality, + // CDP-direct screencast: chromium-only path that uses paint timestamps + // (CDP metadata.timestamp) instead of arrival wallclock for frame + // numbering. Sidesteps the throughput-induced visual lag that the + // hotfix only mitigates. Disable via ARGO_CDP_DIRECT=0 to fall back + // to the legacy onFrame + image2pipe path (e.g. for debugging). + ARGO_USE_CDP_DIRECT: + process.env.ARGO_CDP_DIRECT === '0' + ? '' + : (useJpegStitch && browserName === 'chromium' ? '1' : ''), BASE_URL: options.baseURL, ARGO_ASSET_URL: assetServer?.url ?? '', ARGO_AUTO_BACKGROUND: options.autoBackground ? '1' : '', diff --git a/src/transitions/shader-splice.ts b/src/transitions/shader-splice.ts index b37f6bf..0fd12d1 100644 --- a/src/transitions/shader-splice.ts +++ b/src/transitions/shader-splice.ts @@ -99,9 +99,11 @@ export function buildShaderSpliceFilter(opts: ShaderSpliceOptions): ShaderSplice const sceneEnd = b.boundarySec - dHalf; const transitionEnd = b.boundarySec + dHalf; + // setsar=1 normalizes SAR so concat doesn't fail when source and PNG + // sequence disagree (webkit screencast emits SAR 108:109; PNG is 0:1). const vSceneLabel = `ssv${activeBoundaries}`; parts.push( - `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${sceneEnd.toFixed(3)},setpts=PTS-STARTPTS[${vSceneLabel}]`, + `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${sceneEnd.toFixed(3)},setpts=PTS-STARTPTS,setsar=1[${vSceneLabel}]`, ); videoLabels.push(`[${vSceneLabel}]`); @@ -114,7 +116,7 @@ export function buildShaderSpliceFilter(opts: ShaderSpliceOptions): ShaderSplice } const vTransLabel = `stv${activeBoundaries}`; - parts.push(`[${b.extraInputIndex}:v]setpts=PTS-STARTPTS[${vTransLabel}]`); + parts.push(`[${b.extraInputIndex}:v]setpts=PTS-STARTPTS,setsar=1[${vTransLabel}]`); videoLabels.push(`[${vTransLabel}]`); if (audioInputLabel) { @@ -132,7 +134,7 @@ export function buildShaderSpliceFilter(opts: ShaderSpliceOptions): ShaderSplice // Final scene segment const vLastLabel = `ssv${activeBoundaries}`; parts.push( - `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${totalDurationSec.toFixed(3)},setpts=PTS-STARTPTS[${vLastLabel}]`, + `[${vSplitLabels[videoSegmentIdx++]}]trim=${cursorSec.toFixed(3)}:${totalDurationSec.toFixed(3)},setpts=PTS-STARTPTS,setsar=1[${vLastLabel}]`, ); videoLabels.push(`[${vLastLabel}]`); if (audioInputLabel) { diff --git a/tests/cdp-screencast.test.ts b/tests/cdp-screencast.test.ts new file mode 100644 index 0000000..470c669 --- /dev/null +++ b/tests/cdp-screencast.test.ts @@ -0,0 +1,69 @@ +import { describe, it, expect } from 'vitest'; +import { buildConcatLines } from '../src/cdp-screencast.js'; + +describe('buildConcatLines', () => { + it('returns empty for empty input', () => { + expect(buildConcatLines([], 30)).toEqual([]); + }); + + it('emits file + duration pairs derived from timestamp deltas', () => { + const lines = buildConcatLines( + [ + { filename: 'frame-0000000.jpg', paintTimestampSec: 100.000 }, + { filename: 'frame-0000001.jpg', paintTimestampSec: 100.033 }, + { filename: 'frame-0000002.jpg', paintTimestampSec: 100.075 }, + ], + 30, + ); + // 3 frames → 3 file lines + 3 duration lines + 1 trailing file line repeat + expect(lines).toEqual([ + "file 'frame-0000000.jpg'", + 'duration 0.033000', + "file 'frame-0000001.jpg'", + 'duration 0.042000', + "file 'frame-0000002.jpg'", + // last frame: avg of (33ms, 42ms) = 37.5ms — falls back to overall avg + 'duration 0.037500', + // concat demuxer requires the last file line repeated for its duration + // to actually apply (otherwise ffmpeg uses the JPEG's intrinsic duration). + "file 'frame-0000002.jpg'", + ]); + }); + + it('falls back to 1/fps for the single-frame case', () => { + const lines = buildConcatLines( + [{ filename: 'frame-0000000.jpg', paintTimestampSec: 100.0 }], + 30, + ); + expect(lines).toEqual([ + "file 'frame-0000000.jpg'", + `duration ${(1 / 30).toFixed(6)}`, + "file 'frame-0000000.jpg'", + ]); + }); + + it('falls back to 1/fps when a delta is non-positive (clock weirdness)', () => { + const lines = buildConcatLines( + [ + { filename: 'a.jpg', paintTimestampSec: 100.0 }, + { filename: 'b.jpg', paintTimestampSec: 100.0 }, // identical timestamps + ], + 30, + ); + // First duration is 0 → clamped to 1/30; last duration is the avg → also 0 → clamped. + expect(lines).toContain(`duration ${(1 / 30).toFixed(6)}`); + expect(lines.some((l) => l === 'duration 0.000000')).toBe(false); + }); + + it('clamps absurdly long deltas (clock jump) to 60s', () => { + const lines = buildConcatLines( + [ + { filename: 'a.jpg', paintTimestampSec: 100.0 }, + { filename: 'b.jpg', paintTimestampSec: 1_000_000.0 }, + ], + 30, + ); + // First delta is enormous; should clamp to 60. + expect(lines).toContain('duration 60.000000'); + }); +}); diff --git a/tests/record.test.ts b/tests/record.test.ts index 69eee70..2fcc2cc 100644 --- a/tests/record.test.ts +++ b/tests/record.test.ts @@ -190,14 +190,14 @@ describe('record', () => { demosDir: 'custom-demos', baseURL: 'http://localhost:4321', video: { width: 1280, height: 720 }, - browser: 'webkit', + browser: 'chromium', deviceScaleFactor: 1.6, }); const configPath = join(tempDir, '.argo', 'demo', 'playwright.record.config.mjs'); const config = readFileSync(configPath, 'utf-8'); - expect(config).toContain("browserName: \"webkit\""); + expect(config).toContain("browserName: \"chromium\""); expect(config).toContain('deviceScaleFactor: 2'); expect(execFileMock).toHaveBeenCalledWith( 'npx', @@ -212,6 +212,36 @@ describe('record', () => { ); }); + it('clamps deviceScaleFactor to 1 on non-chromium browsers', async () => { + // webkit/firefox don't honor --force-device-scale-factor — page renders at 1x + // while screencast captures at the 2x viewport, leaving 75% of frames empty. + mockSubprocessSuccess(); + + await record('demo', { + demosDir: 'custom-demos', + baseURL: 'http://localhost:4321', + video: { width: 1280, height: 720 }, + browser: 'webkit', + deviceScaleFactor: 2, + }); + + const configPath = join(tempDir, '.argo', 'demo', 'playwright.record.config.mjs'); + const config = readFileSync(configPath, 'utf-8'); + + expect(config).toContain('deviceScaleFactor: 1'); + expect(execFileMock).toHaveBeenCalledWith( + 'npx', + expect.any(Array), + expect.objectContaining({ + env: expect.objectContaining({ + ARGO_SCREENCAST_WIDTH: '1280', + ARGO_SCREENCAST_HEIGHT: '720', + }), + }), + expect.any(Function), + ); + }); + it('includes isMobile, hasTouch, and contextOptions in generated config', async () => { mockSubprocessSuccess(); diff --git a/tests/transitions/shader-splice.test.ts b/tests/transitions/shader-splice.test.ts index eb4432e..d02821e 100644 --- a/tests/transitions/shader-splice.test.ts +++ b/tests/transitions/shader-splice.test.ts @@ -124,6 +124,24 @@ describe('buildShaderSpliceFilter', () => { expect(result.filterComplex).toMatch(/concat=n=3/); }); + it('normalizes SAR=1 on every concat input to avoid mismatch', () => { + // webkit screencast emits SAR 108:109; PNG sequence is 0:1. Without + // setsar=1 on each input, ffmpeg concat fails with "parameters do not + // match". Every video segment (scene + transition) must include setsar=1. + const result = buildShaderSpliceFilter({ + totalDurationSec: 6.0, + boundaries: [{ boundarySec: 3.0, durationMs: 800, extraInputIndex: 2 }], + videoInputLabel: '[0:v]', + audioInputLabel: '[1:a]', + fps: 30, + }); + // Scene segments (trim+setpts+setsar) + expect(result.filterComplex).toMatch(/trim=0\.000:2\.600,setpts=PTS-STARTPTS,setsar=1/); + expect(result.filterComplex).toMatch(/trim=3\.400:6\.000,setpts=PTS-STARTPTS,setsar=1/); + // PNG transition segment also normalized + expect(result.filterComplex).toMatch(/\[2:v\]setpts=PTS-STARTPTS,setsar=1/); + }); + it('clamps dHalf when boundary is near video end', () => { // boundarySec=2.85, requested halfDur=0.4 — only 0.15 available after const result = buildShaderSpliceFilter({