From ababb98913a8c573bba89fa54a261ea229a42ef0 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 1 Dec 2025 07:55:43 +0000 Subject: [PATCH 1/2] Refactor rendering pipeline and add hardware encoder support Co-authored-by: richiemcilroy1 --- crates/enc-ffmpeg/src/video/h264.rs | 116 +++++++---- crates/export/src/lib.rs | 1 - crates/export/src/mp4.rs | 4 +- crates/rendering/src/decoder/avassetreader.rs | 59 ++++-- crates/rendering/src/decoder/ffmpeg.rs | 62 +++--- crates/rendering/src/decoder/mod.rs | 9 + crates/rendering/src/frame_pipeline.rs | 99 +++++----- crates/rendering/src/lib.rs | 184 ++++++++++++------ 8 files changed, 342 insertions(+), 192 deletions(-) diff --git a/crates/enc-ffmpeg/src/video/h264.rs b/crates/enc-ffmpeg/src/video/h264.rs index a338229cb7..b06b326230 100644 --- a/crates/enc-ffmpeg/src/video/h264.rs +++ b/crates/enc-ffmpeg/src/video/h264.rs @@ -271,49 +271,87 @@ fn get_codec_and_options( config: &VideoInfo, preset: H264Preset, ) -> Option<(Codec, Dictionary<'_>)> { - let encoder_name = { - // if cfg!(target_os = "macos") { - // "libx264" - // // looks terrible rn :( - // // "h264_videotoolbox" - // } else { - // "libx264" - // } - - "libx264" + #[cfg(target_os = "macos")] + let hw_encoder_name = "h264_videotoolbox"; + + #[cfg(not(target_os = "macos"))] + let hw_encoder_name = "h264_nvenc"; + + let try_hw_first = matches!(preset, H264Preset::Ultrafast); + + let encoder_names: &[&str] = if try_hw_first { + &[hw_encoder_name, "libx264"] + } else { + &["libx264"] }; - if let Some(codec) = encoder::find_by_name(encoder_name) { - let mut options = Dictionary::new(); - - if encoder_name == "h264_videotoolbox" { - options.set("realtime", "true"); - } else if encoder_name == "libx264" { - let keyframe_interval_secs = 2; - let keyframe_interval = keyframe_interval_secs * config.frame_rate.numerator(); - let keyframe_interval_str = keyframe_interval.to_string(); - - options.set( - "preset", - match preset { - H264Preset::Slow => "slow", - H264Preset::Medium => "medium", - H264Preset::Ultrafast => "ultrafast", - }, - ); - if let H264Preset::Ultrafast = preset { - options.set("tune", "zerolatency"); + for encoder_name in encoder_names { + if let Some(codec) = encoder::find_by_name(encoder_name) { + let mut options = Dictionary::new(); + + if *encoder_name == "h264_videotoolbox" { + options.set("realtime", "false"); + options.set("allow_sw", "0"); + options.set("prio_speed", "1"); + options.set("profile", "high"); + options.set("level", "5.1"); + debug!("Using VideoToolbox hardware encoder"); + return Some((codec, options)); + } else if *encoder_name == "h264_nvenc" { + options.set("preset", "p1"); + options.set("tune", "ll"); + options.set("rc", "vbr"); + options.set("multipass", "disabled"); + options.set("bf", "0"); + debug!("Using NVENC hardware encoder"); + return Some((codec, options)); + } else if *encoder_name == "h264_qsv" { + options.set("preset", "veryfast"); + options.set("look_ahead", "0"); + debug!("Using Intel QuickSync hardware encoder"); + return Some((codec, options)); + } else if *encoder_name == "libx264" { + let keyframe_interval_secs = 2; + let keyframe_interval = keyframe_interval_secs * config.frame_rate.numerator(); + let keyframe_interval_str = keyframe_interval.to_string(); + let thread_count = thread::available_parallelism() + .map(|v| v.get()) + .unwrap_or(4); + + options.set( + "preset", + match preset { + H264Preset::Slow => "slow", + H264Preset::Medium => "medium", + H264Preset::Ultrafast => "ultrafast", + }, + ); + if let H264Preset::Ultrafast = preset { + options.set("tune", "zerolatency"); + options.set("bf", "0"); + options.set("refs", "1"); + options.set("rc-lookahead", "0"); + options.set("aq-mode", "0"); + options.set("sc_threshold", "0"); + } + options.set("vsync", "1"); + options.set("g", &keyframe_interval_str); + options.set("keyint_min", &keyframe_interval_str); + options.set("threads", &thread_count.to_string()); + options.set("sliced-threads", "1"); + + debug!( + "Using libx264 software encoder with {} threads", + thread_count + ); + return Some((codec, options)); + } else if *encoder_name == "h264_mf" { + options.set("hw_encoding", "true"); + options.set("scenario", "4"); + options.set("quality", "1"); + return Some((codec, options)); } - options.set("vsync", "1"); - options.set("g", &keyframe_interval_str); - options.set("keyint_min", &keyframe_interval_str); - } else if encoder_name == "h264_mf" { - options.set("hw_encoding", "true"); - options.set("scenario", "4"); - options.set("quality", "1"); } - - return Some((codec, options)); } None diff --git a/crates/export/src/lib.rs b/crates/export/src/lib.rs index 2e80be962e..4b187f7c90 100644 --- a/crates/export/src/lib.rs +++ b/crates/export/src/lib.rs @@ -5,7 +5,6 @@ use cap_editor::SegmentMedia; use cap_project::{ProjectConfiguration, RecordingMeta, StudioRecordingMeta}; use cap_rendering::{ProjectRecordingsMeta, RenderVideoConstants}; use std::{path::PathBuf, sync::Arc}; -use tracing::error; #[derive(thiserror::Error, Debug)] pub enum ExportError { diff --git a/crates/export/src/mp4.rs b/crates/export/src/mp4.rs index 00276ea432..9bbce01db0 100644 --- a/crates/export/src/mp4.rs +++ b/crates/export/src/mp4.rs @@ -49,8 +49,8 @@ impl Mp4ExportSettings { info!("Exporting mp4 with settings: {:?}", &self); info!("Expected to render {} frames", base.total_frames(self.fps)); - let (tx_image_data, mut video_rx) = tokio::sync::mpsc::channel::<(RenderedFrame, u32)>(8); - let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(8); + let (tx_image_data, mut video_rx) = tokio::sync::mpsc::channel::<(RenderedFrame, u32)>(32); + let (frame_tx, frame_rx) = std::sync::mpsc::sync_channel::(16); let fps = self.fps; diff --git a/crates/rendering/src/decoder/avassetreader.rs b/crates/rendering/src/decoder/avassetreader.rs index f4ad42eb77..3759d83176 100644 --- a/crates/rendering/src/decoder/avassetreader.rs +++ b/crates/rendering/src/decoder/avassetreader.rs @@ -16,7 +16,7 @@ use tokio::{runtime::Handle as TokioHandle, sync::oneshot}; use crate::DecodedFrame; use super::frame_converter::{FrameConverter, copy_rgba_plane}; -use super::{FRAME_CACHE_SIZE, VideoDecoderMessage, pts_to_frame}; +use super::{FRAME_CACHE_SIZE, PREFETCH_LOOKAHEAD, VideoDecoderMessage, pts_to_frame}; #[derive(Clone)] struct ProcessedFrame { @@ -227,6 +227,44 @@ impl AVAssetReaderDecoder { while let Ok(r) = rx.recv() { match r { + VideoDecoderMessage::PrefetchFrames(start_time_secs, prefetch_fps) => { + let start_frame = (start_time_secs * prefetch_fps as f32).floor() as u32; + let end_frame = start_frame + PREFETCH_LOOKAHEAD as u32; + + for frame in &mut frames { + let Ok(frame) = frame else { continue }; + + let current_frame = pts_to_frame( + frame.pts().value, + Rational::new(1, frame.pts().scale), + fps, + ); + + if let Some(image_buf) = frame.image_buf() { + if current_frame >= start_frame + && current_frame <= end_frame + && !cache.contains_key(¤t_frame) + { + if cache.len() >= FRAME_CACHE_SIZE { + if let Some(&oldest) = cache.keys().next() { + cache.remove(&oldest); + } + } + cache.insert( + current_frame, + CachedFrame::Raw { + image_buf: image_buf.retained(), + number: current_frame, + }, + ); + } + } + + if current_frame >= end_frame { + break; + } + } + } VideoDecoderMessage::GetFrame(requested_time, sender) => { let requested_frame = (requested_time * fps as f32).floor() as u32; @@ -297,8 +335,6 @@ impl AVAssetReaderDecoder { this.is_done = false; - // Handles frame skips. - // We use the cache instead of last_sent_frame as newer non-matching frames could have been decoded. if let Some(most_recent_prev_frame) = cache.iter_mut().rev().find(|v| *v.0 < requested_frame) && let Some(sender) = sender.take() @@ -314,7 +350,6 @@ impl AVAssetReaderDecoder { && let Some(sender) = sender.take() { let data = cache_frame.process(&mut processor); - // info!("sending frame {requested_frame}"); (sender)(data); @@ -344,23 +379,13 @@ impl AVAssetReaderDecoder { } if current_frame > requested_frame && sender.is_some() { - // not inlining this is important so that last_sent_frame is dropped before the sender is invoked let last_sent_frame = last_sent_frame.borrow().clone(); if let Some((sender, last_sent_frame)) = last_sent_frame.and_then(|l| Some((sender.take()?, l))) { - // info!( - // "sending previous frame {} for {requested_frame}", - // last_sent_frame.0 - // ); - (sender)(last_sent_frame); } else if let Some(sender) = sender.take() { - // info!( - // "sending forward frame {current_frame} for {requested_frame}", - // ); - (sender)(cache_frame.process(&mut processor)); } } @@ -374,14 +399,8 @@ impl AVAssetReaderDecoder { this.is_done = true; - // not inlining this is important so that last_sent_frame is dropped before the sender is invoked let last_sent_frame = last_sent_frame.borrow().clone(); if let Some((sender, last_sent_frame)) = sender.take().zip(last_sent_frame) { - // info!( - // "sending hail mary frame {} for {requested_frame}", - // last_sent_frame.0 - // ); - (sender)(last_sent_frame); } } diff --git a/crates/rendering/src/decoder/ffmpeg.rs b/crates/rendering/src/decoder/ffmpeg.rs index 5a25eca6fc..e5fad35042 100644 --- a/crates/rendering/src/decoder/ffmpeg.rs +++ b/crates/rendering/src/decoder/ffmpeg.rs @@ -11,7 +11,10 @@ use tokio::sync::oneshot; use crate::DecodedFrame; -use super::{FRAME_CACHE_SIZE, VideoDecoderMessage, frame_converter::FrameConverter, pts_to_frame}; +use super::{ + FRAME_CACHE_SIZE, PREFETCH_LOOKAHEAD, VideoDecoderMessage, frame_converter::FrameConverter, + pts_to_frame, +}; #[derive(Clone)] struct ProcessedFrame { @@ -97,10 +100,44 @@ impl FfmpegDecoder { while let Ok(r) = rx.recv() { match r { + VideoDecoderMessage::PrefetchFrames(start_time_secs, prefetch_fps) => { + let start_frame = (start_time_secs * prefetch_fps as f32).floor() as u32; + let end_frame = start_frame + PREFETCH_LOOKAHEAD as u32; + + for target_frame in start_frame..end_frame { + if cache.contains_key(&target_frame) { + continue; + } + + for frame in &mut frames { + let Ok(frame) = frame else { continue }; + + let current_frame = + pts_to_frame(frame.pts().unwrap() - start_time, time_base, fps); + + if current_frame >= start_frame && current_frame <= end_frame { + if cache.len() >= FRAME_CACHE_SIZE { + if let Some(&oldest) = cache.keys().next() { + cache.remove(&oldest); + } + } + cache.insert( + current_frame, + CachedFrame::Raw { + frame, + number: current_frame, + }, + ); + } + + if current_frame >= end_frame { + break; + } + } + } + } VideoDecoderMessage::GetFrame(requested_time, sender) => { let requested_frame = (requested_time * fps as f32).floor() as u32; - // sender.send(black_frame.clone()).ok(); - // continue; let mut sender = if let Some(cached) = cache.get_mut(&requested_frame) { let data = cached.process(&mut converter); @@ -162,8 +199,6 @@ impl FfmpegDecoder { number: current_frame, }; - // Handles frame skips. - // We use the cache instead of last_sent_frame as newer non-matching frames could have been decoded. if let Some(most_recent_prev_frame) = cache.iter_mut().rev().find(|v| *v.0 < requested_frame) && let Some(sender) = sender.take() @@ -179,7 +214,6 @@ impl FfmpegDecoder { && let Some(sender) = sender.take() { let data = cache_frame.process(&mut converter); - // info!("sending frame {requested_frame}"); (sender)(data); @@ -212,23 +246,13 @@ impl FfmpegDecoder { }; if current_frame > requested_frame && sender.is_some() { - // not inlining this is important so that last_sent_frame is dropped before the sender is invoked let last_sent_frame = last_sent_frame.borrow().clone(); if let Some((sender, last_sent_frame)) = last_sent_frame.and_then(|l| Some((sender.take()?, l))) { - // info!( - // "sending previous frame {} for {requested_frame}", - // last_sent_frame.0 - // ); - (sender)(last_sent_frame); } else if let Some(sender) = sender.take() { - // info!( - // "sending forward frame {current_frame} for {requested_frame}", - // ); - (sender)(cache_frame.process(&mut converter)); } } @@ -240,15 +264,9 @@ impl FfmpegDecoder { } } - // not inlining this is important so that last_sent_frame is dropped before the sender is invoked let last_sent_frame = last_sent_frame.borrow().clone(); if let Some((sender, last_sent_frame)) = sender.take().zip(last_sent_frame) { - // info!( - // "sending hail mary frame {} for {requested_frame}", - // last_sent_frame.0 - // ); - (sender)(last_sent_frame); } } diff --git a/crates/rendering/src/decoder/mod.rs b/crates/rendering/src/decoder/mod.rs index 8875f63246..805f120865 100644 --- a/crates/rendering/src/decoder/mod.rs +++ b/crates/rendering/src/decoder/mod.rs @@ -40,6 +40,7 @@ impl DecodedFrame { pub enum VideoDecoderMessage { GetFrame(f32, tokio::sync::oneshot::Sender), + PrefetchFrames(f32, u32), } pub fn pts_to_frame(pts: i64, time_base: Rational, fps: u32) -> u32 { @@ -48,6 +49,7 @@ pub fn pts_to_frame(pts: i64, time_base: Rational, fps: u32) -> u32 { } pub const FRAME_CACHE_SIZE: usize = 100; +pub const PREFETCH_LOOKAHEAD: usize = 8; #[derive(Clone)] pub struct AsyncVideoDecoderHandle { @@ -64,6 +66,13 @@ impl AsyncVideoDecoderHandle { rx.await.ok() } + pub fn prefetch(&self, time: f32, fps: u32) { + let _ = self.sender.send(VideoDecoderMessage::PrefetchFrames( + self.get_time(time), + fps, + )); + } + pub fn get_time(&self, time: f32) -> f32 { time + self.offset as f32 } diff --git a/crates/rendering/src/frame_pipeline.rs b/crates/rendering/src/frame_pipeline.rs index 2c17424e23..360ecc2fc3 100644 --- a/crates/rendering/src/frame_pipeline.rs +++ b/crates/rendering/src/frame_pipeline.rs @@ -2,34 +2,6 @@ use wgpu::COPY_BYTES_PER_ROW_ALIGNMENT; use crate::{ProjectUniforms, RenderSession, RenderingError}; -// pub struct FramePipelineState<'a> { -// pub constants: &'a RenderVideoConstants, -// pub uniforms: &'a ProjectUniforms, -// pub texture: &'a wgpu::Texture, -// pub texture_view: wgpu::TextureView, -// } - -// impl<'a> FramePipelineState<'a> { -// pub fn new( -// constants: &'a RenderVideoConstants, -// uniforms: &'a ProjectUniforms, -// texture: &'a wgpu::Texture, -// ) -> Self { -// let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default()); - -// Self { -// constants, -// uniforms, -// texture, -// texture_view, -// } -// } -// } - -// pub struct FramePipelineEncoder { -// pub encoder: wgpu::CommandEncoder, -// } - #[derive(Clone)] pub struct RenderedFrame { pub data: Vec, @@ -38,36 +10,29 @@ pub struct RenderedFrame { pub padded_bytes_per_row: u32, } -// impl FramePipelineEncoder { -// pub fn new(state: &FramePipelineState) -> Self { -// Self { -// encoder: state.constants.device.create_command_encoder( -// &(wgpu::CommandEncoderDescriptor { -// label: Some("Render Encoder"), -// }), -// ), -// } -// } -// } - pub fn padded_bytes_per_row(output_size: (u32, u32)) -> u32 { - // Calculate the aligned bytes per row let align = COPY_BYTES_PER_ROW_ALIGNMENT; let unpadded_bytes_per_row = output_size.0 * 4; let padding = (align - (unpadded_bytes_per_row % align)) % align; let padded_bytes_per_row = unpadded_bytes_per_row + padding; - // Ensure the padded_bytes_per_row is a multiple of 4 (32 bits) (padded_bytes_per_row + 3) & !3 } -pub async fn finish_encoder( +pub struct PendingReadback { + pub width: u32, + pub height: u32, + pub padded_bytes_per_row: u32, + pub receiver: tokio::sync::oneshot::Receiver>, +} + +pub fn submit_frame_for_readback( session: &mut RenderSession, device: &wgpu::Device, queue: &wgpu::Queue, uniforms: &ProjectUniforms, encoder: wgpu::CommandEncoder, -) -> Result { +) -> PendingReadback { let padded_bytes_per_row = padded_bytes_per_row(uniforms.output_size); queue.submit(std::iter::once(encoder.finish())); @@ -82,13 +47,13 @@ pub async fn finish_encoder( session.ensure_readback_buffers(device, output_buffer_size); let output_buffer = session.current_readback_buffer(); - let mut encoder = device.create_command_encoder( + let mut copy_encoder = device.create_command_encoder( &(wgpu::CommandEncoderDescriptor { label: Some("Copy Encoder"), }), ); - encoder.copy_texture_to_buffer( + copy_encoder.copy_texture_to_buffer( wgpu::TexelCopyTextureInfo { texture: session.current_texture(), mip_level: 0, @@ -106,7 +71,7 @@ pub async fn finish_encoder( output_texture_size, ); - queue.submit(std::iter::once(encoder.finish())); + queue.submit(std::iter::once(copy_encoder.finish())); let buffer_slice = output_buffer.slice(..); let (tx, rx) = tokio::sync::oneshot::channel(); @@ -114,23 +79,51 @@ pub async fn finish_encoder( let _ = tx.send(result); }); + session.swap_readback_buffers(); + + PendingReadback { + width: uniforms.output_size.0, + height: uniforms.output_size.1, + padded_bytes_per_row, + receiver: rx, + } +} + +pub async fn collect_readback( + session: &RenderSession, + device: &wgpu::Device, + pending: PendingReadback, +) -> Result { device.poll(wgpu::PollType::Wait)?; - rx.await + pending + .receiver + .await .map_err(|_| RenderingError::BufferMapWaitingFailed)??; + let output_buffer = session.previous_readback_buffer(); + let buffer_slice = output_buffer.slice(..); let data = buffer_slice.get_mapped_range(); let data_vec = data.to_vec(); drop(data); output_buffer.unmap(); - session.swap_readback_buffers(); - Ok(RenderedFrame { data: data_vec, - padded_bytes_per_row, - width: uniforms.output_size.0, - height: uniforms.output_size.1, + padded_bytes_per_row: pending.padded_bytes_per_row, + width: pending.width, + height: pending.height, }) } + +pub async fn finish_encoder( + session: &mut RenderSession, + device: &wgpu::Device, + queue: &wgpu::Queue, + uniforms: &ProjectUniforms, + encoder: wgpu::CommandEncoder, +) -> Result { + let pending = submit_frame_for_readback(session, device, queue, uniforms, encoder); + collect_readback(session, device, pending).await +} diff --git a/crates/rendering/src/lib.rs b/crates/rendering/src/lib.rs index b9cd8175b0..f30bd3782f 100644 --- a/crates/rendering/src/lib.rs +++ b/crates/rendering/src/lib.rs @@ -1,8 +1,7 @@ use anyhow::Result; use cap_project::{ AspectRatio, CameraShape, CameraXPosition, CameraYPosition, ClipOffsets, CornerStyle, Crop, - CursorEvents, MaskKind, MaskSegment, ProjectConfiguration, RecordingMeta, StudioRecordingMeta, - XY, + CursorEvents, MaskKind, ProjectConfiguration, RecordingMeta, StudioRecordingMeta, XY, }; use composite_frame::CompositeVideoFrameUniforms; use core::f64; @@ -20,7 +19,6 @@ use spring_mass_damper::SpringMassDamperSimulationConfig; use std::{collections::HashMap, sync::Arc}; use std::{path::PathBuf, time::Instant}; use tokio::sync::mpsc; -use tracing::error; mod composite_frame; mod coord; @@ -36,7 +34,9 @@ mod zoom; pub use coord::*; pub use decoder::DecodedFrame; -pub use frame_pipeline::RenderedFrame; +pub use frame_pipeline::{ + PendingReadback, RenderedFrame, collect_readback, submit_frame_for_readback, +}; pub use project_recordings::{ProjectRecordingsMeta, SegmentRecordings}; use mask::interpolate_masks; @@ -229,6 +229,13 @@ pub struct RenderSegment { pub decoders: RecordingSegmentDecoders, } +const PIPELINE_DEPTH: usize = 3; + +struct PipelinedFrame { + pending: PendingReadback, + frame_number: u32, +} + #[allow(clippy::too_many_arguments)] pub async fn render_video_to_channel( constants: &RenderVideoConstants, @@ -250,74 +257,127 @@ pub async fn render_video_to_channel( let total_frames = (fps as f64 * duration).ceil() as u32; let mut frame_number = 0; - - let mut frame_renderer = FrameRenderer::new(constants); - let mut layers = RendererLayers::new(&constants.device, &constants.queue); + let mut session: Option = None; + + let mut pending_frames: std::collections::VecDeque = + std::collections::VecDeque::with_capacity(PIPELINE_DEPTH); loop { - if frame_number >= total_frames { - break; - } + while pending_frames.len() < PIPELINE_DEPTH && frame_number < total_frames { + let Some((segment_time, segment)) = + project.get_segment_time(frame_number as f64 / fps as f64) + else { + break; + }; - let Some((segment_time, segment)) = - project.get_segment_time(frame_number as f64 / fps as f64) - else { - break; - }; + let clip_config = project + .clips + .iter() + .find(|v| v.index == segment.recording_clip); - let clip_config = project - .clips - .iter() - .find(|v| v.index == segment.recording_clip); + let current_frame_number = frame_number; + frame_number += 1; - let frame_number = { - let prev = frame_number; - std::mem::replace(&mut frame_number, prev + 1) - }; + let render_segment = &render_segments[segment.recording_clip as usize]; + + if let Some(segment_frames) = render_segment + .decoders + .get_frames( + segment_time as f32, + !project.camera.hide, + clip_config.map(|v| v.offsets).unwrap_or_default(), + ) + .await + { + let uniforms = ProjectUniforms::new( + constants, + project, + current_frame_number, + fps, + resolution_base, + &render_segment.cursor, + &segment_frames, + ); - let render_segment = &render_segments[segment.recording_clip as usize]; + if uniforms.output_size.0 == 0 || uniforms.output_size.1 == 0 { + continue; + } - if let Some(segment_frames) = render_segment - .decoders - .get_frames( - segment_time as f32, - !project.camera.hide, - clip_config.map(|v| v.offsets).unwrap_or_default(), - ) - .await - { - let uniforms = ProjectUniforms::new( - constants, - project, - frame_number, - fps, - resolution_base, - &render_segment.cursor, - &segment_frames, - ); + let render_session = session.get_or_insert_with(|| { + RenderSession::new( + &constants.device, + uniforms.output_size.0, + uniforms.output_size.1, + ) + }); - let frame = frame_renderer - .render( - segment_frames, - uniforms, - &render_segment.cursor, - &mut layers, - ) - .await?; + render_session.update_texture_size( + &constants.device, + uniforms.output_size.0, + uniforms.output_size.1, + ); - if frame.width == 0 || frame.height == 0 { - continue; + layers + .prepare( + constants, + &uniforms, + &segment_frames, + &render_segment.cursor, + ) + .await?; + + let mut encoder = constants.device.create_command_encoder( + &(wgpu::CommandEncoderDescriptor { + label: Some("Render Encoder"), + }), + ); + + layers.render( + &constants.device, + &constants.queue, + &mut encoder, + render_session, + &uniforms, + ); + + let pending = submit_frame_for_readback( + render_session, + &constants.device, + &constants.queue, + &uniforms, + encoder, + ); + + pending_frames.push_back(PipelinedFrame { + pending, + frame_number: current_frame_number, + }); } + } + + if pending_frames.is_empty() { + break; + } - sender.send((frame, frame_number)).await?; + let pipelined = pending_frames.pop_front().unwrap(); + + if let Some(render_session) = &session { + let frame = + collect_readback(render_session, &constants.device, pipelined.pending).await?; + + if frame.width > 0 && frame.height > 0 { + sender.send((frame, pipelined.frame_number)).await?; + } } } let total_time = start_time.elapsed(); println!( - "Render complete. Processed {frame_number} frames in {:?} seconds", - total_time.as_secs_f32() + "Render complete. Processed {} frames in {:.2?} ({:.1} fps)", + frame_number, + total_time, + frame_number as f32 / total_time.as_secs_f32() ); Ok(()) @@ -1847,6 +1907,20 @@ impl RenderSession { } } + pub(crate) fn previous_readback_buffer(&self) -> &wgpu::Buffer { + if self.current_readback_is_left { + self.readback_buffers + .1 + .as_ref() + .expect("readback buffer should be initialised") + } else { + self.readback_buffers + .0 + .as_ref() + .expect("readback buffer should be initialised") + } + } + pub(crate) fn swap_readback_buffers(&mut self) { self.current_readback_is_left = !self.current_readback_is_left; } From 46dbda2f3e02158a4e8f799ce5863d76979fa70b Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 1 Dec 2025 07:58:44 +0000 Subject: [PATCH 2/2] Refactor: Improve H264 encoder selection and configuration This commit refactors the H264 encoder selection logic to be more organized and platform-agnostic. It introduces a new function `get_hw_encoder_names` to retrieve available hardware encoders based on the target operating system. The encoder configuration is also updated to use a match statement for better readability and maintainability. Co-authored-by: richiemcilroy1 --- crates/enc-ffmpeg/src/video/h264.rs | 182 +++++++++++++++++----------- 1 file changed, 114 insertions(+), 68 deletions(-) diff --git a/crates/enc-ffmpeg/src/video/h264.rs b/crates/enc-ffmpeg/src/video/h264.rs index b06b326230..cd08206560 100644 --- a/crates/enc-ffmpeg/src/video/h264.rs +++ b/crates/enc-ffmpeg/src/video/h264.rs @@ -267,89 +267,135 @@ impl H264Encoder { } } +fn get_hw_encoder_names() -> &'static [&'static str] { + #[cfg(target_os = "macos")] + { + &["h264_videotoolbox"] + } + + #[cfg(target_os = "windows")] + { + &["h264_nvenc", "h264_amf", "h264_qsv", "h264_mf"] + } + + #[cfg(target_os = "linux")] + { + &["h264_nvenc", "h264_vaapi", "h264_qsv"] + } + + #[cfg(not(any(target_os = "macos", target_os = "windows", target_os = "linux")))] + { + &[] + } +} + fn get_codec_and_options( config: &VideoInfo, preset: H264Preset, ) -> Option<(Codec, Dictionary<'_>)> { - #[cfg(target_os = "macos")] - let hw_encoder_name = "h264_videotoolbox"; - - #[cfg(not(target_os = "macos"))] - let hw_encoder_name = "h264_nvenc"; - let try_hw_first = matches!(preset, H264Preset::Ultrafast); - let encoder_names: &[&str] = if try_hw_first { - &[hw_encoder_name, "libx264"] + let hw_encoders = get_hw_encoder_names(); + let sw_encoder = "libx264"; + + let encoder_names: Vec<&str> = if try_hw_first { + hw_encoders + .iter() + .copied() + .chain(std::iter::once(sw_encoder)) + .collect() } else { - &["libx264"] + vec![sw_encoder] }; for encoder_name in encoder_names { if let Some(codec) = encoder::find_by_name(encoder_name) { let mut options = Dictionary::new(); - if *encoder_name == "h264_videotoolbox" { - options.set("realtime", "false"); - options.set("allow_sw", "0"); - options.set("prio_speed", "1"); - options.set("profile", "high"); - options.set("level", "5.1"); - debug!("Using VideoToolbox hardware encoder"); - return Some((codec, options)); - } else if *encoder_name == "h264_nvenc" { - options.set("preset", "p1"); - options.set("tune", "ll"); - options.set("rc", "vbr"); - options.set("multipass", "disabled"); - options.set("bf", "0"); - debug!("Using NVENC hardware encoder"); - return Some((codec, options)); - } else if *encoder_name == "h264_qsv" { - options.set("preset", "veryfast"); - options.set("look_ahead", "0"); - debug!("Using Intel QuickSync hardware encoder"); - return Some((codec, options)); - } else if *encoder_name == "libx264" { - let keyframe_interval_secs = 2; - let keyframe_interval = keyframe_interval_secs * config.frame_rate.numerator(); - let keyframe_interval_str = keyframe_interval.to_string(); - let thread_count = thread::available_parallelism() - .map(|v| v.get()) - .unwrap_or(4); - - options.set( - "preset", - match preset { - H264Preset::Slow => "slow", - H264Preset::Medium => "medium", - H264Preset::Ultrafast => "ultrafast", - }, - ); - if let H264Preset::Ultrafast = preset { - options.set("tune", "zerolatency"); + match encoder_name { + "h264_videotoolbox" => { + options.set("realtime", "false"); + options.set("allow_sw", "0"); + options.set("prio_speed", "1"); + options.set("profile", "high"); + options.set("level", "5.1"); + debug!("Using VideoToolbox hardware encoder"); + return Some((codec, options)); + } + "h264_nvenc" => { + options.set("preset", "p1"); + options.set("tune", "ll"); + options.set("rc", "vbr"); + options.set("multipass", "disabled"); options.set("bf", "0"); - options.set("refs", "1"); - options.set("rc-lookahead", "0"); - options.set("aq-mode", "0"); - options.set("sc_threshold", "0"); + debug!("Using NVIDIA NVENC hardware encoder"); + return Some((codec, options)); + } + "h264_amf" => { + options.set("usage", "ultralowlatency"); + options.set("quality", "speed"); + options.set("rc", "vbr_latency"); + options.set("enforce_hrd", "0"); + debug!("Using AMD AMF hardware encoder"); + return Some((codec, options)); + } + "h264_qsv" => { + options.set("preset", "veryfast"); + options.set("look_ahead", "0"); + options.set("async_depth", "4"); + debug!("Using Intel QuickSync hardware encoder"); + return Some((codec, options)); + } + "h264_vaapi" => { + options.set("rc_mode", "VBR"); + options.set("async_depth", "4"); + debug!("Using VA-API hardware encoder (Linux)"); + return Some((codec, options)); + } + "h264_mf" => { + options.set("hw_encoding", "true"); + options.set("scenario", "4"); + options.set("quality", "1"); + debug!("Using MediaFoundation hardware encoder"); + return Some((codec, options)); + } + "libx264" => { + let keyframe_interval_secs = 2; + let keyframe_interval = keyframe_interval_secs * config.frame_rate.numerator(); + let keyframe_interval_str = keyframe_interval.to_string(); + let thread_count = thread::available_parallelism() + .map(|v| v.get()) + .unwrap_or(4); + + options.set( + "preset", + match preset { + H264Preset::Slow => "slow", + H264Preset::Medium => "medium", + H264Preset::Ultrafast => "ultrafast", + }, + ); + if let H264Preset::Ultrafast = preset { + options.set("tune", "zerolatency"); + options.set("bf", "0"); + options.set("refs", "1"); + options.set("rc-lookahead", "0"); + options.set("aq-mode", "0"); + options.set("sc_threshold", "0"); + } + options.set("vsync", "1"); + options.set("g", &keyframe_interval_str); + options.set("keyint_min", &keyframe_interval_str); + options.set("threads", &thread_count.to_string()); + options.set("sliced-threads", "1"); + + debug!( + "Using libx264 software encoder with {} threads", + thread_count + ); + return Some((codec, options)); } - options.set("vsync", "1"); - options.set("g", &keyframe_interval_str); - options.set("keyint_min", &keyframe_interval_str); - options.set("threads", &thread_count.to_string()); - options.set("sliced-threads", "1"); - - debug!( - "Using libx264 software encoder with {} threads", - thread_count - ); - return Some((codec, options)); - } else if *encoder_name == "h264_mf" { - options.set("hw_encoding", "true"); - options.set("scenario", "4"); - options.set("quality", "1"); - return Some((codec, options)); + _ => continue, } } }