Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion demos/showcase.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export default defineConfig({
baseURL: 'http://127.0.0.1:8976',
demosDir: 'demos',
outputDir: 'videos',
tts: { defaultVoice: 'af_heart', defaultSpeed: 1.0 },
tts: { defaultVoice: 'af_heart', defaultSpeed: 1.0, transcribe: true },
video: {
width: 1920,
height: 1080,
Expand Down
70 changes: 41 additions & 29 deletions demos/showcase.demo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ async function enterScene(

test('showcase', async ({ page, narration }) => {
test.setTimeout(300_000);
// Wait until `word` is next spoken in `scene`, or `fb` ms when the
// transcript is unavailable or the word isn't in it.
const waitForWord = (scene: string, word: string, fb: number) =>
page.waitForTimeout(narration.atWord(scene, word) ?? fb);

await page.goto('/showcase.html');
trackCursor(page, narration);
cursorHighlight(page, { color: '#60a5fa', radius: 18 });
Expand Down Expand Up @@ -65,16 +70,27 @@ test('showcase', async ({ page, narration }) => {

await enterScene(page, narration, '#voiceover', 'voiceover');
await withOverlay(page, 'voiceover', async () => {
const totalMs = narration.durationFor('voiceover', { maxMs: 10000 }) - 400;
const beat = Math.floor(totalMs / 8);
await dimAround(page, '#engine-kokoro', { duration: beat, wait: true });
await dimAround(page, '#engine-transformers', { duration: beat, wait: true });
await dimAround(page, '#engine-mlx', { duration: beat, wait: true });
await dimAround(page, '#engine-openai', { duration: beat, wait: true });
await dimAround(page, '#engine-elevenlabs', { duration: beat, wait: true });
await dimAround(page, '#engine-gemini', { duration: beat, wait: true });
await dimAround(page, '#engine-sarvam', { duration: beat, wait: true });
await focusRing(page, '#voiceover-config', { color: '#22d3ee', duration: beat, wait: true });
// Anchor words are Whisper transcript spellings, not manifest text —
// Kokoro speaks "Kokoro" as "cochro" and "OpenAI" as "opening eye".
const dim = 900;
await waitForWord('voiceover', 'cochro', 3000);
dimAround(page, '#engine-kokoro', { duration: dim });
await waitForWord('voiceover', 'hugging', 1300);
dimAround(page, '#engine-transformers', { duration: dim });
await waitForWord('voiceover', 'opening', 1900);
dimAround(page, '#engine-openai', { duration: dim });
await waitForWord('voiceover', '11', 1100);
dimAround(page, '#engine-elevenlabs', { duration: dim });
// Gemini and Sarvam aren't named in narration — fill the gap.
await page.waitForTimeout(700);
dimAround(page, '#engine-gemini', { duration: dim });
await page.waitForTimeout(800);
dimAround(page, '#engine-sarvam', { duration: dim });
await waitForWord('voiceover', 'MLX', 1200);
dimAround(page, '#engine-mlx', { duration: dim });
await waitForWord('voiceover', 'Audio', 700);
focusRing(page, '#voiceover-config', { color: '#22d3ee', duration: 1200 });
await page.waitForTimeout(1000);
await resetCamera(page);
});

Expand All @@ -93,26 +109,22 @@ test('showcase', async ({ page, narration }) => {
});

await enterScene(page, narration, '#camera-effects', 'camera');
// Total scene time = durationFor. Divide evenly across 6 effects.
// Each beat includes the effect duration + a small gap.
const totalCameraMs = narration.durationFor('camera', { maxMs: 10000 });
const cameraGap = 150;
const cameraBeat = Math.floor((totalCameraMs - 400) / 7) - cameraGap;
spotlight(page, '#effect-spotlight', { duration: cameraBeat, padding: 10 });
await page.waitForTimeout(cameraBeat + cameraGap);
focusRing(page, '#effect-focus-ring', { color: '#fb7185', duration: cameraBeat });
await page.waitForTimeout(cameraBeat + cameraGap);
dimAround(page, '#effect-dim-around', { duration: cameraBeat });
await page.waitForTimeout(cameraBeat + cameraGap);
focusRing(page, '#effect-cursor', { color: '#60a5fa', duration: cameraBeat });
await page.waitForTimeout(cameraBeat + cameraGap);
await waitForWord('camera', 'Spotlight', 3000);
spotlight(page, '#effect-spotlight', { duration: 1400, padding: 10 });
await waitForWord('camera', 'focus', 1500);
focusRing(page, '#effect-focus-ring', { color: '#fb7185', duration: 1200 });
await waitForWord('camera', 'dim', 1100);
dimAround(page, '#effect-dim-around', { duration: 1100 });
await waitForWord('camera', 'highlight', 1100);
focusRing(page, '#effect-cursor', { color: '#60a5fa', duration: 1200 });
await waitForWord('camera', 'zoom', 1200);
// Post-export zoom on the zoomTo card itself — meta!
zoomTo(page, '#effect-zoom', { narration, scale: 1.5, duration: cameraBeat, fadeIn: 300, holdMs: cameraBeat - 600 });
await page.waitForTimeout(cameraBeat + cameraGap);
focusRing(page, '#effect-motion-blur', { color: '#a78bfa', duration: cameraBeat });
await page.waitForTimeout(cameraBeat + cameraGap);
showConfetti(page, { spread: 'rain', duration: cameraBeat, pieces: 130 });
await page.waitForTimeout(cameraBeat + cameraGap);
zoomTo(page, '#effect-zoom', { narration, scale: 1.5, duration: 1500, fadeIn: 300, holdMs: 900 });
await waitForWord('camera', 'motion', 1500);
focusRing(page, '#effect-motion-blur', { color: '#a78bfa', duration: 1100 });
await waitForWord('camera', 'confetti', 1500);
showConfetti(page, { spread: 'rain', duration: 1500, pieces: 130 });
await page.waitForTimeout(1200);
await resetCamera(page);

await enterScene(page, narration, '#export-stack', 'export');
Expand Down
126 changes: 2 additions & 124 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
"gsap": "^3.15.0",
"kokoro-js": "^1.2.1"
},
"overrides": {
"kokoro-js": {
"@huggingface/transformers": "$@huggingface/transformers"
}
},
"optionalDependencies": {
"@elevenlabs/elevenlabs-js": "^2.0.0",
"@google/genai": "^1.0.0",
Expand Down
16 changes: 16 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,26 @@ export type { TTSEngine };

// ---- Types ----

/** Options for the optional Whisper-based word-level transcription pass.
* `true` accepts defaults; an object overrides model or language. */
export type TranscribeOption =
| boolean
| {
/** HuggingFace Hub model id. Default `onnx-community/whisper-base.en`. */
model?: string;
/** Source language hint (e.g., 'en', 'fr'). Auto-detect if omitted. */
language?: string;
};

export interface TTSConfig {
defaultVoice: string;
defaultSpeed: number;
engine?: TTSEngine;
/** Run Whisper STT over each generated TTS clip to produce word-level
* timestamps. Off by default in v0.38.0 — opt in to enable
* `narration.wordTiming(scene)` and the `narration.transcript.json`
* public artifact. */
transcribe?: TranscribeOption;
}

export type BrowserEngine = 'chromium' | 'webkit' | 'firefox';
Expand Down
Loading
Loading