Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion crates/euca-agent/src/routes/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ pub async fn profile(State(world): State<SharedWorld>) -> Json<serde_json::Value
let sections: Vec<serde_json::Value> = profiler
.frame_summary()
.iter()
.map(|(name, us)| {
.map(|(name, us, kind)| {
serde_json::json!({
"name": name,
"us": (*us * 10.0).round() / 10.0,
"kind": format!("{kind:?}"),
})
})
.collect();
Expand Down
2 changes: 1 addition & 1 deletion crates/euca-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mod time;
pub use app::App;
pub use platform::performance_core_count;
pub use plugin::Plugin;
pub use profiler::{ProfileSection, Profiler, profiler_begin, profiler_end};
pub use profiler::{ProfileSection, ProfileSectionKind, Profiler, profiler_begin, profiler_end};
pub use time::Time;

/// Re-export `winit` for downstream crates that need window types.
Expand Down
83 changes: 77 additions & 6 deletions crates/euca-core/src/profiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,23 @@ use std::time::Instant;
/// Maximum number of frame times retained for averaging.
const MAX_FRAME_HISTORY: usize = 60;

/// Whether a profile section was measured on the CPU or GPU.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ProfileSectionKind {
/// Timed on the CPU via `std::time::Instant`.
Cpu,
/// Timed on the GPU via timestamp queries.
Gpu,
}

/// A recorded profile section within a single frame.
pub struct ProfileSection {
/// Human-readable section label.
pub name: &'static str,
/// Wall-clock duration of this section in microseconds.
pub duration_us: f64,
/// Whether this section was timed on the CPU or GPU.
pub kind: ProfileSectionKind,
}

/// Built-in frame profiler that tracks per-section timings and rolling frame statistics.
Expand All @@ -34,14 +45,26 @@ impl Profiler {
}
}

/// Return section names and durations (in microseconds) for the current frame.
pub fn frame_summary(&self) -> Vec<(&str, f64)> {
/// Return section names, durations (microseconds), and kind for the current frame.
pub fn frame_summary(&self) -> Vec<(&str, f64, ProfileSectionKind)> {
self.sections
.iter()
.map(|s| (s.name, s.duration_us))
.map(|s| (s.name, s.duration_us, s.kind))
.collect()
}

/// Record a GPU-timed section (called after timestamp readback).
///
/// GPU sections are distinguished from CPU sections in the summary so
/// callers can display them separately or side-by-side.
pub fn record_gpu_section(&mut self, name: &'static str, duration_us: f64) {
self.sections.push(ProfileSection {
name,
duration_us,
kind: ProfileSectionKind::Gpu,
});
}

/// Average frame time in milliseconds over the last [`MAX_FRAME_HISTORY`] frames.
///
/// Returns `0.0` when no frame times have been recorded yet.
Expand All @@ -64,10 +87,18 @@ impl Profiler {
1000.0 / avg
}

/// Finish the current frame: record total frame time from all sections and reset
/// the section list for the next frame.
/// Finish the current frame: record total CPU frame time and reset the
/// section list for the next frame.
///
/// Only CPU sections contribute to the rolling frame-time average. GPU
/// sections run in parallel with CPU work and are reported separately.
pub fn end_frame(&mut self) {
let total_us: f64 = self.sections.iter().map(|s| s.duration_us).sum();
let total_us: f64 = self
.sections
.iter()
.filter(|s| s.kind == ProfileSectionKind::Cpu)
.map(|s| s.duration_us)
.sum();
let total_ms = total_us / 1000.0;

if self.frame_times.len() == MAX_FRAME_HISTORY {
Expand Down Expand Up @@ -103,6 +134,7 @@ pub fn profiler_end(profiler: &mut Profiler) {
profiler.sections.push(ProfileSection {
name,
duration_us: elapsed.as_secs_f64() * 1_000_000.0,
kind: ProfileSectionKind::Cpu,
});
}

Expand Down Expand Up @@ -229,4 +261,43 @@ mod tests {
// Outer should be >= inner since it wraps it.
assert!(summary[1].1 >= summary[0].1);
}

#[test]
fn cpu_sections_tagged_as_cpu() {
let mut profiler = Profiler::new();
profiler_begin(&mut profiler, "cpu_work");
profiler_end(&mut profiler);

let summary = profiler.frame_summary();
assert_eq!(summary[0].2, ProfileSectionKind::Cpu);
}

#[test]
fn gpu_section_recorded_and_tagged() {
let mut profiler = Profiler::new();
profiler.record_gpu_section("shadow_pass", 123.4);

let summary = profiler.frame_summary();
assert_eq!(summary.len(), 1);
assert_eq!(summary[0].0, "shadow_pass");
assert!((summary[0].1 - 123.4).abs() < f64::EPSILON);
assert_eq!(summary[0].2, ProfileSectionKind::Gpu);
}

#[test]
fn gpu_sections_excluded_from_frame_time() {
let mut profiler = Profiler::new();

// Add a CPU section and a GPU section.
profiler_begin(&mut profiler, "cpu");
thread::sleep(Duration::from_millis(1));
profiler_end(&mut profiler);
profiler.record_gpu_section("gpu_pass", 5000.0); // 5ms in us

profiler.end_frame();

// Frame time should only include the ~1ms CPU section, not the 5ms GPU section.
let avg = profiler.avg_frame_time_ms();
assert!(avg < 50.0, "avg should not include GPU section: {avg}");
}
}
2 changes: 2 additions & 0 deletions crates/euca-render/src/deferred.rs
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ impl<D: RenderDevice> DeferredPipeline<D> {
}),
stencil_ops: None,
}),
timestamp_writes: None,
},
);
pass.set_pipeline(&self.gbuffer_pipeline);
Expand All @@ -546,6 +547,7 @@ impl<D: RenderDevice> DeferredPipeline<D> {
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
},
);
pass.set_pipeline(&self.lighting_pipeline);
Expand Down
16 changes: 16 additions & 0 deletions crates/euca-render/src/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ impl GpuContext {
required_features |= wgpu::Features::MULTI_DRAW_INDIRECT_COUNT;
}

// GPU timestamp queries for per-pass GPU profiling.
if supported.contains(wgpu::Features::TIMESTAMP_QUERY) {
required_features |= wgpu::Features::TIMESTAMP_QUERY;
log::info!("GPU supports TIMESTAMP_QUERY — GPU pass timing enabled");
}

// Bindless materials: texture binding arrays + non-uniform indexing.
let bindless_features = wgpu::Features::TEXTURE_BINDING_ARRAY
| wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING;
Expand Down Expand Up @@ -173,6 +179,8 @@ impl GpuContext {

let unified_memory = survey.supports_unified_memory();

let has_timestamp_query = required_features.contains(wgpu::Features::TIMESTAMP_QUERY);

let capabilities = Capabilities {
unified_memory,
multi_draw_indirect: has_multi_draw_indirect,
Expand All @@ -184,6 +192,7 @@ impl GpuContext {
max_bindings_per_bind_group: cap_max_bindings,
max_binding_array_elements: cap_max_binding_array,
device_name: adapter_info.name.clone(),
timestamp_query: has_timestamp_query,
..Default::default()
};

Expand Down Expand Up @@ -248,6 +257,10 @@ impl GpuContext {
required_features |= wgpu::Features::MULTI_DRAW_INDIRECT_COUNT;
}

if supported.contains(wgpu::Features::TIMESTAMP_QUERY) {
required_features |= wgpu::Features::TIMESTAMP_QUERY;
}

let bindless_features = wgpu::Features::TEXTURE_BINDING_ARRAY
| wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING;
if supported.contains(bindless_features) {
Expand Down Expand Up @@ -305,6 +318,8 @@ impl GpuContext {

let unified_memory = survey.supports_unified_memory();

let has_timestamp_query_async = required_features.contains(wgpu::Features::TIMESTAMP_QUERY);

let capabilities = Capabilities {
unified_memory,
multi_draw_indirect: has_multi_draw_indirect,
Expand All @@ -316,6 +331,7 @@ impl GpuContext {
max_bindings_per_bind_group: cap_max_bindings,
max_binding_array_elements: cap_max_binding_array,
device_name: adapter_info.name.clone(),
timestamp_query: has_timestamp_query_async,
..Default::default()
};

Expand Down
10 changes: 10 additions & 0 deletions crates/euca-render/src/hardware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,16 @@ impl HardwareSurvey {
self.selected().vendor == GpuVendor::Apple
}

/// Whether the selected adapter supports GPU timestamp queries.
///
/// Timestamp queries enable per-pass GPU timing that is reported alongside
/// CPU profiler sections for a complete frame profile.
pub fn supports_timestamp_queries(&self) -> bool {
self.selected()
.features
.contains(wgpu::Features::TIMESTAMP_QUERY)
}

/// Get the selected adapter info.
pub fn selected(&self) -> &AdapterInfo {
&self.adapters[self.selected_adapter]
Expand Down
1 change: 1 addition & 0 deletions crates/euca-render/src/post_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ fn run_fullscreen_pass<D: euca_rhi::RenderDevice>(
},
})],
depth_stencil_attachment: None,
timestamp_writes: None,
},
);
pass.set_pipeline(pipeline);
Expand Down
1 change: 1 addition & 0 deletions crates/euca-render/src/prepass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ impl<D: euca_rhi::RenderDevice> PrepassPipeline<D> {
}),
stencil_ops: None,
}),
timestamp_writes: None,
},
);
pass.set_pipeline(&self.pipeline);
Expand Down
Loading
Loading