From f14deb8dc748ed0b18c214e84deab0b6278c8b9f Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Mon, 5 Jan 2026 11:29:21 -0700 Subject: [PATCH 01/32] perf(profiling): piggyback time samples onto allocation samples When allocation profiler takes a sample and interrupt_count > 0, collect both allocation and time samples in a single stack walk. Benefits: - Eliminates redundant stack walks when samples coincide. - Improves time sample accuracy (closer to timer expiry). --- profiling/src/allocation/mod.rs | 26 ++++++-- profiling/src/profiling/mod.rs | 62 +++++++++++++++++++ .../correctness/allocation_time_combined.json | 54 ++++++++++++++++ .../correctness/allocation_time_combined.php | 52 ++++++++++++++++ .../phpt/allocation_time_piggyback_01.phpt | 57 +++++++++++++++++ 5 files changed, 246 insertions(+), 5 deletions(-) create mode 100644 profiling/tests/correctness/allocation_time_combined.json create mode 100644 profiling/tests/correctness/allocation_time_combined.php create mode 100644 profiling/tests/phpt/allocation_time_piggyback_01.phpt diff --git a/profiling/src/allocation/mod.rs b/profiling/src/allocation/mod.rs index 3506d29ae61..29491d0ab51 100644 --- a/profiling/src/allocation/mod.rs +++ b/profiling/src/allocation/mod.rs @@ -74,13 +74,29 @@ impl AllocationProfilingStats { #[cold] pub fn collect_allocation(len: size_t) { if let Some(profiler) = Profiler::get() { + // Check if there's a pending time interrupt that we can piggyback on + let interrupt_count = REQUEST_LOCALS + .try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::SeqCst)) + .unwrap_or(0); + // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. unsafe { - profiler.collect_allocations( - zend::ddog_php_prof_get_current_execute_data(), - 1_i64, - len as i64, - ) + if interrupt_count > 0 { + // Piggyback time sample onto allocation sample + profiler.collect_allocation_and_time( + zend::ddog_php_prof_get_current_execute_data(), + 1_i64, + len as i64, + interrupt_count, + ) + } else { + // Normal allocation-only sample + profiler.collect_allocations( + zend::ddog_php_prof_get_current_execute_data(), + 1_i64, + len as i64, + ) + } }; } } diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index 8687a25e25a..f37c52e5ce1 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -977,6 +977,68 @@ impl Profiler { } } + /// Collect a stack sample with both memory allocation AND time data. + /// This is an optimization that piggybacks time sampling onto allocation + /// sampling to avoid redundant stack walks. + #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))] + pub fn collect_allocation_and_time( + &self, + execute_data: *mut zend_execute_data, + alloc_samples: i64, + alloc_size: i64, + interrupt_count: u32, + ) { + let interrupt_count = interrupt_count as i64; + let result = collect_stack_sample(execute_data); + match result { + Ok(frames) => { + let depth = frames.len(); + + // Get time data (wall and cpu time since last sample) + let (wall_time, cpu_time) = CLOCKS.with_borrow_mut(Clocks::rotate_clocks); + + let labels = Profiler::common_labels(0); + let n_labels = labels.len(); + + // Check if timeline is enabled and get timestamp if needed + let mut timestamp = NO_TIMESTAMP; + { + let system_settings = self.system_settings.load(Ordering::SeqCst); + // SAFETY: system settings are stable during a request. + if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } { + if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) { + timestamp = now.as_nanos() as i64; + } + } + } + + match self.prepare_and_send_message( + frames, + SampleValues { + interrupt_count, + wall_time, + cpu_time, + alloc_samples, + alloc_size, + ..Default::default() + }, + labels, + timestamp, + ) { + Ok(_) => trace!( + "Sent combined stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler." + ), + Err(err) => warn!( + "Failed to send combined stack sample: {err}" + ), + } + } + Err(err) => { + warn!("Failed to collect stack sample: {err}") + } + } + } + /// Collect a stack sample with exception. #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))] pub fn collect_exception( diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json new file mode 100644 index 00000000000..51322770ce5 --- /dev/null +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -0,0 +1,54 @@ +{ + "args": { + "DD_PROFILING_ENABLED": "yes", + "DD_PROFILING_ALLOCATION_ENABLED": "yes", + "DD_PROFILING_WALL_TIME_ENABLED": "yes", + "DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED": "no", + "DD_PROFILING_ENDPOINT_COLLECTION_ENABLED": "no", + "DD_PROFILING_LOG_LEVEL": "debug", + "EXECUTION_TIME": "5" + }, + "flaky": false, + "skip-asan": false, + "todo": "", + "checks": [ + { + "type": "regex", + "path": "$.sample_types[?(@.type=='samples')].unit", + "match": "^count$" + }, + { + "type": "regex", + "path": "$.sample_types[?(@.type=='wall-time')].unit", + "match": "^nanoseconds$" + }, + { + "type": "regex", + "path": "$.sample_types[?(@.type=='alloc-samples')].unit", + "match": "^count$" + }, + { + "type": "regex", + "path": "$.sample_types[?(@.type=='alloc-space')].unit", + "match": "^bytes$" + }, + { + "comment": "Verify we have samples with both allocation and time data (piggybacked samples)", + "type": "jq-minimum", + "path": ".samples | map(select(.values[0] > 0 and .values[1] > 0 and .values[2] > 0 and .values[3] > 0)) | length", + "match": 1 + }, + { + "comment": "Verify allocate_large appears in the profile", + "type": "jq-minimum", + "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"allocate_large\")))) | length", + "match": 1 + }, + { + "comment": "Verify allocate_medium appears in the profile", + "type": "jq-minimum", + "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"allocate_medium\")))) | length", + "match": 1 + } + ] +} diff --git a/profiling/tests/correctness/allocation_time_combined.php b/profiling/tests/correctness/allocation_time_combined.php new file mode 100644 index 00000000000..209f9ab42c4 --- /dev/null +++ b/profiling/tests/correctness/allocation_time_combined.php @@ -0,0 +1,52 @@ + 0 + +function allocate_large() { + // Large allocation to trigger sampling + $data = str_repeat("a", 1024 * 200_000); // ~200MB + return strlen($data); +} + +function allocate_medium() { + // Medium allocation to trigger sampling + $data = str_repeat("b", 1024 * 100_000); // ~100MB + return strlen($data); +} + +function consume_cpu() { + // CPU-intensive work to ensure time passes + $sum = 0; + for ($i = 0; $i < 100000; $i++) { + $sum += $i * $i; + } + return $sum; +} + +function main() { + $duration = $_ENV["EXECUTION_TIME"] ?? 10; + $end = microtime(true) + $duration; + + while (microtime(true) < $end) { + $start = microtime(true); + + // Mix allocations with CPU work + allocate_large(); + consume_cpu(); + allocate_medium(); + consume_cpu(); + + $elapsed = microtime(true) - $start; + + // Sleep to allow time interrupts to accumulate + // This increases the likelihood of piggybacking + $sleep = 0.02 - $elapsed; // Target 50 iterations per second + if ($sleep > 0.0) { + usleep((int) ($sleep * 1_000_000)); + } + } +} + +main(); diff --git a/profiling/tests/phpt/allocation_time_piggyback_01.phpt b/profiling/tests/phpt/allocation_time_piggyback_01.phpt new file mode 100644 index 00000000000..e4ed0a862a4 --- /dev/null +++ b/profiling/tests/phpt/allocation_time_piggyback_01.phpt @@ -0,0 +1,57 @@ +--TEST-- +[profiling] Piggybacking time samples onto allocation samples +--DESCRIPTION-- +When both allocation and time profiling are enabled, and an allocation sample +is taken when there's a pending time interrupt, both samples should be combined +into a single stack walk and message. This test verifies that the optimization +works correctly by enabling both profilers and running code that triggers both. +--SKIPIF-- + +--INI-- +datadog.profiling.enabled=yes +datadog.profiling.log_level=debug +datadog.profiling.allocation_enabled=yes +datadog.profiling.wall_time_enabled=yes +datadog.profiling.experimental_cpu_time_enabled=no +datadog.profiling.endpoint_collection_enabled=no +--FILE-- + +--EXPECTREGEX-- +.*Test completed successfully. +.*Stopping profiler. +.* From 5d9030b7af415137e8efa6b6ee0247bf950b9853 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Wed, 7 Jan 2026 11:11:21 -0700 Subject: [PATCH 02/32] refactor orderings of collect_* and extract helpers --- profiling/src/allocation/mod.rs | 34 ++++---- profiling/src/capi.rs | 6 +- profiling/src/profiling/interrupts.rs | 9 +- profiling/src/profiling/mod.rs | 114 ++++++++------------------ profiling/src/profiling/uploader.rs | 3 +- profiling/src/wall_time.rs | 4 +- 6 files changed, 63 insertions(+), 107 deletions(-) diff --git a/profiling/src/allocation/mod.rs b/profiling/src/allocation/mod.rs index 29491d0ab51..5445f1d11ed 100644 --- a/profiling/src/allocation/mod.rs +++ b/profiling/src/allocation/mod.rs @@ -74,29 +74,25 @@ impl AllocationProfilingStats { #[cold] pub fn collect_allocation(len: size_t) { if let Some(profiler) = Profiler::get() { - // Check if there's a pending time interrupt that we can piggyback on + // Check if there's a pending time interrupt that we can handle now + // instead of waiting for an interrupt handler. This is slightly more + // accurate and efficient, win-win. + // Acquire: synchronizes-with the Release store to vm_interrupt, + // ensuring we see all increments that happened before the interrupt + // was triggered. let interrupt_count = REQUEST_LOCALS - .try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::SeqCst)) + .try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::Acquire)) .unwrap_or(0); - // Safety: execute_data was provided by the engine, and the profiler doesn't mutate it. + // Safety: execute_data was provided by the engine, and the profiler + // doesn't mutate it. unsafe { - if interrupt_count > 0 { - // Piggyback time sample onto allocation sample - profiler.collect_allocation_and_time( - zend::ddog_php_prof_get_current_execute_data(), - 1_i64, - len as i64, - interrupt_count, - ) - } else { - // Normal allocation-only sample - profiler.collect_allocations( - zend::ddog_php_prof_get_current_execute_data(), - 1_i64, - len as i64, - ) - } + profiler.collect_allocations( + zend::ddog_php_prof_get_current_execute_data(), + 1_i64, + len as i64, + (interrupt_count > 0).then_some(interrupt_count), + ) }; } } diff --git a/profiling/src/capi.rs b/profiling/src/capi.rs index 0ad6588fb3b..bb7ccec07ba 100644 --- a/profiling/src/capi.rs +++ b/profiling/src/capi.rs @@ -46,8 +46,10 @@ extern "C" fn ddog_php_prof_trigger_time_sample() { if locals.system_settings().profiling_enabled { // Safety: only vm interrupts are stored there, or possibly null (edges only). if let Some(vm_interrupt) = unsafe { locals.vm_interrupt_addr.as_ref() } { - locals.interrupt_count.fetch_add(1, Ordering::SeqCst); - vm_interrupt.store(true, Ordering::SeqCst); + // Release: synchronizes-with the Acquire load in swap() on the same variable. + locals.interrupt_count.fetch_add(1, Ordering::Release); + // Release: signals the PHP engine to check for interrupts. + vm_interrupt.store(true, Ordering::Release); } } }); diff --git a/profiling/src/profiling/interrupts.rs b/profiling/src/profiling/interrupts.rs index bd1e32107eb..03758b88a31 100644 --- a/profiling/src/profiling/interrupts.rs +++ b/profiling/src/profiling/interrupts.rs @@ -50,7 +50,8 @@ impl InterruptManager { // Reset interrupt counter to prevent sampling during `mshutdown` (PHP 8.0 bug with // userland destructors), but leave the interrupt flag unchanged as other extensions // may have raised it. - (*interrupt.interrupt_count_ptr).store(0, Ordering::SeqCst); + // Relaxed: cleanup only, interrupt is already removed so no new writes will occur. + (*interrupt.interrupt_count_ptr).store(0, Ordering::Relaxed); } } @@ -62,8 +63,10 @@ impl InterruptManager { pub(super) fn trigger_interrupts(&self) { let vm_interrupts = self.vm_interrupts.lock().unwrap(); vm_interrupts.iter().for_each(|obj| unsafe { - (*obj.interrupt_count_ptr).fetch_add(1, Ordering::SeqCst); - (*obj.engine_ptr).store(true, Ordering::SeqCst); + // Release: synchronizes-with the Acquire load in swap() on the same variable. + (*obj.interrupt_count_ptr).fetch_add(1, Ordering::Release); + // Release: signals the PHP engine to check for interrupts. + (*obj.engine_ptr).store(true, Ordering::Release); }); } } diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index f37c52e5ce1..14b82a13bc7 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -902,16 +902,7 @@ impl Profiler { let labels = Profiler::common_labels(0); let n_labels = labels.len(); - let mut timestamp = NO_TIMESTAMP; - { - let system_settings = self.system_settings.load(Ordering::SeqCst); - // SAFETY: system settings are stable during a request. - if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } { - if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) { - timestamp = now.as_nanos() as i64; - } - } - } + let timestamp = self.get_timeline_timestamp(); match self.prepare_and_send_message( frames, @@ -938,80 +929,36 @@ impl Profiler { } } - /// Collect a stack sample with memory allocations. + /// Collect a stack sample with memory allocations, and optionally time data. + /// + /// When `interrupt_count` is provided, this piggybacks time sampling onto + /// allocation sampling to avoid redundant stack walks. #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))] pub fn collect_allocations( &self, execute_data: *mut zend_execute_data, alloc_samples: i64, alloc_size: i64, + interrupt_count: Option, ) { - let result = collect_stack_sample(execute_data); - match result { - Ok(frames) => { - let depth = frames.len(); - let labels = Profiler::common_labels(0); - let n_labels = labels.len(); - - match self.prepare_and_send_message( - frames, - SampleValues { - alloc_size, - alloc_samples, - ..Default::default() - }, - labels, - NO_TIMESTAMP, - ) { - Ok(_) => trace!( - "Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler." - ), - Err(err) => warn!( - "Failed to send stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler: {err}" - ), - } - } - Err(err) => { - warn!("Failed to collect stack sample: {err}") - } - } - } - - /// Collect a stack sample with both memory allocation AND time data. - /// This is an optimization that piggybacks time sampling onto allocation - /// sampling to avoid redundant stack walks. - #[cfg_attr(feature = "tracing", tracing::instrument(skip_all))] - pub fn collect_allocation_and_time( - &self, - execute_data: *mut zend_execute_data, - alloc_samples: i64, - alloc_size: i64, - interrupt_count: u32, - ) { - let interrupt_count = interrupt_count as i64; let result = collect_stack_sample(execute_data); match result { Ok(frames) => { let depth = frames.len(); - // Get time data (wall and cpu time since last sample) - let (wall_time, cpu_time) = CLOCKS.with_borrow_mut(Clocks::rotate_clocks); + // Optionally collect time data when interrupt_count is provided + let (interrupt_count, wall_time, cpu_time, timestamp) = + if let Some(count) = interrupt_count { + let (wall_time, cpu_time) = CLOCKS.with_borrow_mut(Clocks::rotate_clocks); + let timestamp = self.get_timeline_timestamp(); + (count as i64, wall_time, cpu_time, timestamp) + } else { + (0, 0, 0, NO_TIMESTAMP) + }; let labels = Profiler::common_labels(0); let n_labels = labels.len(); - // Check if timeline is enabled and get timestamp if needed - let mut timestamp = NO_TIMESTAMP; - { - let system_settings = self.system_settings.load(Ordering::SeqCst); - // SAFETY: system settings are stable during a request. - if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } { - if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) { - timestamp = now.as_nanos() as i64; - } - } - } - match self.prepare_and_send_message( frames, SampleValues { @@ -1026,10 +973,10 @@ impl Profiler { timestamp, ) { Ok(_) => trace!( - "Sent combined stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler." + "Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler." ), Err(err) => warn!( - "Failed to send combined stack sample: {err}" + "Failed to send stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler: {err}" ), } } @@ -1067,16 +1014,7 @@ impl Profiler { let n_labels = labels.len(); - let mut timestamp = NO_TIMESTAMP; - { - let system_settings = self.system_settings.load(Ordering::SeqCst); - // SAFETY: system settings are stable during a request. - if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } { - if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) { - timestamp = now.as_nanos() as i64; - } - } - } + let timestamp = self.get_timeline_timestamp(); match self.prepare_and_send_message( frames, @@ -1487,6 +1425,22 @@ impl Profiler { } } + /// Gets a timestamp for timeline profiling if timeline is enabled. + /// Returns NO_TIMESTAMP if timeline is disabled or if getting the time fails. + fn get_timeline_timestamp(&self) -> i64 { + let system_settings = self.system_settings.load(Ordering::SeqCst); + // SAFETY: system settings are stable during a request. + if unsafe { *ptr::addr_of!((*system_settings).profiling_timeline_enabled) } { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .ok() + .map(|now| now.as_nanos() as i64) + .unwrap_or(NO_TIMESTAMP) + } else { + NO_TIMESTAMP + } + } + /// Creates the common message labels for all samples. /// /// * `n_extra_labels` - Reserve room for extra labels, such as when the diff --git a/profiling/src/profiling/uploader.rs b/profiling/src/profiling/uploader.rs index e175d778739..09a728423e0 100644 --- a/profiling/src/profiling/uploader.rs +++ b/profiling/src/profiling/uploader.rs @@ -161,12 +161,11 @@ impl Uploader { } } -#[cfg(test)] +#[cfg(all(test, feature = "debug_stats"))] mod tests { use super::*; #[test] - #[cfg(feature = "debug_stats")] fn test_create_internal_metadata() { // Set up all counters with known values EXCEPTION_PROFILING_EXCEPTION_COUNT.store(42, Ordering::Relaxed); diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index bfd9390c90d..70f1015a0ed 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -121,7 +121,9 @@ pub extern "C" fn ddog_php_prof_interrupt_function(execute_data: *mut zend_execu * 1. Track how many interrupts there were. * 2. Ensure we don't collect on someone else's interrupt. */ - let interrupt_count = locals.interrupt_count.swap(0, Ordering::SeqCst); + // Acquire: synchronizes-with the Release store to vm_interrupt, ensuring + // we see all increments that happened before the interrupt was triggered. + let interrupt_count = locals.interrupt_count.swap(0, Ordering::Acquire); if interrupt_count == 0 { return; } From 989876db3b1e98035546161191059d4d487066dd Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Wed, 7 Jan 2026 11:28:22 -0700 Subject: [PATCH 03/32] refactor(test): allocation + CPU combined tests --- .../correctness/allocation_time_combined.php | 37 ++++-------- .../phpt/allocation_time_piggyback_01.phpt | 57 ------------------- 2 files changed, 11 insertions(+), 83 deletions(-) delete mode 100644 profiling/tests/phpt/allocation_time_piggyback_01.phpt diff --git a/profiling/tests/correctness/allocation_time_combined.php b/profiling/tests/correctness/allocation_time_combined.php index 209f9ab42c4..e63869b8b0f 100644 --- a/profiling/tests/correctness/allocation_time_combined.php +++ b/profiling/tests/correctness/allocation_time_combined.php @@ -1,8 +1,13 @@ 0 +// Test to verify that allocation and time profiling can coexist and that +// the piggybacking optimization works (where time samples are taken during +// allocation samples when interrupt_count > 0). +// +// Note: Piggybacking is opportunistic and happens when an allocation sample +// occurs while a time interrupt is pending. PHP checks interrupts frequently +// (at loop boundaries, function calls, etc.), so the window is narrow. This +// test just verifies the mechanism works, not that it happens often. function allocate_large() { // Large allocation to trigger sampling @@ -16,36 +21,16 @@ function allocate_medium() { return strlen($data); } -function consume_cpu() { - // CPU-intensive work to ensure time passes - $sum = 0; - for ($i = 0; $i < 100000; $i++) { - $sum += $i * $i; - } - return $sum; -} - function main() { $duration = $_ENV["EXECUTION_TIME"] ?? 10; $end = microtime(true) + $duration; while (microtime(true) < $end) { - $start = microtime(true); - - // Mix allocations with CPU work + // Just allocate repeatedly. Over time, some allocations will happen + // to occur while a time interrupt is pending (set by the profiler + // thread every ~10ms), exercising the piggybacking code path. allocate_large(); - consume_cpu(); allocate_medium(); - consume_cpu(); - - $elapsed = microtime(true) - $start; - - // Sleep to allow time interrupts to accumulate - // This increases the likelihood of piggybacking - $sleep = 0.02 - $elapsed; // Target 50 iterations per second - if ($sleep > 0.0) { - usleep((int) ($sleep * 1_000_000)); - } } } diff --git a/profiling/tests/phpt/allocation_time_piggyback_01.phpt b/profiling/tests/phpt/allocation_time_piggyback_01.phpt deleted file mode 100644 index e4ed0a862a4..00000000000 --- a/profiling/tests/phpt/allocation_time_piggyback_01.phpt +++ /dev/null @@ -1,57 +0,0 @@ ---TEST-- -[profiling] Piggybacking time samples onto allocation samples ---DESCRIPTION-- -When both allocation and time profiling are enabled, and an allocation sample -is taken when there's a pending time interrupt, both samples should be combined -into a single stack walk and message. This test verifies that the optimization -works correctly by enabling both profilers and running code that triggers both. ---SKIPIF-- - ---INI-- -datadog.profiling.enabled=yes -datadog.profiling.log_level=debug -datadog.profiling.allocation_enabled=yes -datadog.profiling.wall_time_enabled=yes -datadog.profiling.experimental_cpu_time_enabled=no -datadog.profiling.endpoint_collection_enabled=no ---FILE-- - ---EXPECTREGEX-- -.*Test completed successfully. -.*Stopping profiler. -.* From c0119e93a6dc755ec94ba5e002d1c351e66fbe66 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Wed, 7 Jan 2026 11:45:10 -0700 Subject: [PATCH 04/32] refactor: make jq for combined samples less brittle --- profiling/tests/correctness/allocation_time_combined.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 51322770ce5..40b3aa7a2dc 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -35,7 +35,7 @@ { "comment": "Verify we have samples with both allocation and time data (piggybacked samples)", "type": "jq-minimum", - "path": ".samples | map(select(.values[0] > 0 and .values[1] > 0 and .values[2] > 0 and .values[3] > 0)) | length", + "path": ". as $root | (.sample_types | to_entries | map({(.value.type): .key}) | add) as $indices | .samples | map(select(.values[$indices.samples] > 0 and .values[$indices.\"wall-time\"] > 0 and .values[$indices.\"alloc-samples\"] > 0 and .values[$indices.\"alloc-space\"] > 0)) | length", "match": 1 }, { From f43a597bb049bf482bf3fc0ce63f9275db4fcaa9 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Wed, 7 Jan 2026 11:48:39 -0700 Subject: [PATCH 05/32] refactor: revert Ordering optimizations Let's do this in a dedicated PR. --- profiling/src/allocation/mod.rs | 5 +---- profiling/src/capi.rs | 6 ++---- profiling/src/profiling/interrupts.rs | 9 +++------ profiling/src/wall_time.rs | 4 +--- 4 files changed, 7 insertions(+), 17 deletions(-) diff --git a/profiling/src/allocation/mod.rs b/profiling/src/allocation/mod.rs index 5445f1d11ed..ce0ababa2f8 100644 --- a/profiling/src/allocation/mod.rs +++ b/profiling/src/allocation/mod.rs @@ -77,11 +77,8 @@ pub fn collect_allocation(len: size_t) { // Check if there's a pending time interrupt that we can handle now // instead of waiting for an interrupt handler. This is slightly more // accurate and efficient, win-win. - // Acquire: synchronizes-with the Release store to vm_interrupt, - // ensuring we see all increments that happened before the interrupt - // was triggered. let interrupt_count = REQUEST_LOCALS - .try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::Acquire)) + .try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::SeqCst)) .unwrap_or(0); // Safety: execute_data was provided by the engine, and the profiler diff --git a/profiling/src/capi.rs b/profiling/src/capi.rs index bb7ccec07ba..0ad6588fb3b 100644 --- a/profiling/src/capi.rs +++ b/profiling/src/capi.rs @@ -46,10 +46,8 @@ extern "C" fn ddog_php_prof_trigger_time_sample() { if locals.system_settings().profiling_enabled { // Safety: only vm interrupts are stored there, or possibly null (edges only). if let Some(vm_interrupt) = unsafe { locals.vm_interrupt_addr.as_ref() } { - // Release: synchronizes-with the Acquire load in swap() on the same variable. - locals.interrupt_count.fetch_add(1, Ordering::Release); - // Release: signals the PHP engine to check for interrupts. - vm_interrupt.store(true, Ordering::Release); + locals.interrupt_count.fetch_add(1, Ordering::SeqCst); + vm_interrupt.store(true, Ordering::SeqCst); } } }); diff --git a/profiling/src/profiling/interrupts.rs b/profiling/src/profiling/interrupts.rs index 03758b88a31..bd1e32107eb 100644 --- a/profiling/src/profiling/interrupts.rs +++ b/profiling/src/profiling/interrupts.rs @@ -50,8 +50,7 @@ impl InterruptManager { // Reset interrupt counter to prevent sampling during `mshutdown` (PHP 8.0 bug with // userland destructors), but leave the interrupt flag unchanged as other extensions // may have raised it. - // Relaxed: cleanup only, interrupt is already removed so no new writes will occur. - (*interrupt.interrupt_count_ptr).store(0, Ordering::Relaxed); + (*interrupt.interrupt_count_ptr).store(0, Ordering::SeqCst); } } @@ -63,10 +62,8 @@ impl InterruptManager { pub(super) fn trigger_interrupts(&self) { let vm_interrupts = self.vm_interrupts.lock().unwrap(); vm_interrupts.iter().for_each(|obj| unsafe { - // Release: synchronizes-with the Acquire load in swap() on the same variable. - (*obj.interrupt_count_ptr).fetch_add(1, Ordering::Release); - // Release: signals the PHP engine to check for interrupts. - (*obj.engine_ptr).store(true, Ordering::Release); + (*obj.interrupt_count_ptr).fetch_add(1, Ordering::SeqCst); + (*obj.engine_ptr).store(true, Ordering::SeqCst); }); } } diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 70f1015a0ed..bfd9390c90d 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -121,9 +121,7 @@ pub extern "C" fn ddog_php_prof_interrupt_function(execute_data: *mut zend_execu * 1. Track how many interrupts there were. * 2. Ensure we don't collect on someone else's interrupt. */ - // Acquire: synchronizes-with the Release store to vm_interrupt, ensuring - // we see all increments that happened before the interrupt was triggered. - let interrupt_count = locals.interrupt_count.swap(0, Ordering::Acquire); + let interrupt_count = locals.interrupt_count.swap(0, Ordering::SeqCst); if interrupt_count == 0 { return; } From 243d028fe770cdc2cb1e4d83f9206c8b4be171c2 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Wed, 7 Jan 2026 11:50:56 -0700 Subject: [PATCH 06/32] style: use SAFETY instead of Safety --- profiling/src/allocation/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/src/allocation/mod.rs b/profiling/src/allocation/mod.rs index ce0ababa2f8..1bbcba2966d 100644 --- a/profiling/src/allocation/mod.rs +++ b/profiling/src/allocation/mod.rs @@ -81,7 +81,7 @@ pub fn collect_allocation(len: size_t) { .try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::SeqCst)) .unwrap_or(0); - // Safety: execute_data was provided by the engine, and the profiler + // SAFETY: execute_data was provided by the engine, and the profiler // doesn't mutate it. unsafe { profiler.collect_allocations( From a9c822e1ff98c842e475b70e6e2f1906668915d2 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Wed, 7 Jan 2026 15:10:34 -0700 Subject: [PATCH 07/32] test: hammer piggybacking --- .../correctness/allocation_time_combined.json | 20 ++++-- .../correctness/allocation_time_combined.php | 72 ++++++++++++++----- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 40b3aa7a2dc..7baa6471ff9 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -39,15 +39,27 @@ "match": 1 }, { - "comment": "Verify allocate_large appears in the profile", + "comment": "Verify test_frameless_str_replace appears in the profile", "type": "jq-minimum", - "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"allocate_large\")))) | length", + "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"test_frameless_str_replace\")))) | length", "match": 1 }, { - "comment": "Verify allocate_medium appears in the profile", + "comment": "Verify test_string_concat appears in the profile", "type": "jq-minimum", - "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"allocate_medium\")))) | length", + "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"test_string_concat\")))) | length", + "match": 1 + }, + { + "comment": "Verify str_replace (frameless function) appears in the profile", + "type": "jq-minimum", + "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"str_replace\")))) | length", + "match": 1 + }, + { + "comment": "Verify we have piggybacked samples that include str_replace in the stack", + "type": "jq-minimum", + "path": ". as $root | (.sample_types | to_entries | map({(.value.type): .key}) | add) as $indices | .samples | map(select(.values[$indices.samples] > 0 and .values[$indices.\"wall-time\"] > 0 and .values[$indices.\"alloc-samples\"] > 0 and .values[$indices.\"alloc-space\"] > 0 and (.location_ids | any(.frames[] | .function // \"\" | contains(\"str_replace\"))))) | length", "match": 1 } ] diff --git a/profiling/tests/correctness/allocation_time_combined.php b/profiling/tests/correctness/allocation_time_combined.php index e63869b8b0f..4b28bff7fde 100644 --- a/profiling/tests/correctness/allocation_time_combined.php +++ b/profiling/tests/correctness/allocation_time_combined.php @@ -4,33 +4,69 @@ // the piggybacking optimization works (where time samples are taken during // allocation samples when interrupt_count > 0). // -// Note: Piggybacking is opportunistic and happens when an allocation sample -// occurs while a time interrupt is pending. PHP checks interrupts frequently -// (at loop boundaries, function calls, etc.), so the window is narrow. This -// test just verifies the mechanism works, not that it happens often. - -function allocate_large() { - // Large allocation to trigger sampling - $data = str_repeat("a", 1024 * 200_000); // ~200MB - return strlen($data); +// This test uses frameless functions (PHP 8.4+) which don't handle VM +// interrupts, making them ideal for testing piggybacking: if we found a time +// sample attributed to a frameless function which is a leaf function, then +// most likely it was piggybacking (but we can actually check that memory +// values were set in the sample, we don't have to guess). +// +// Of course, only memory allocations which get sampled will include time, so +// in other cases the time interrupt will "slip" to something later. + +function test_frameless_str_replace(): int { + // Frameless function (PHP 8.4+) - doesn't handle VM interrupts + static $source = null; + if ($source === null) { + $source = str_repeat("x", 50_000_000); // ~50MB + } + $result = str_replace("x", "y", $source); + return strlen($result); } -function allocate_medium() { - // Medium allocation to trigger sampling - $data = str_repeat("b", 1024 * 100_000); // ~100MB - return strlen($data); +function test_frameless_implode(): int { + // Frameless function - allocates without interrupt handling + static $arr = null; + if ($arr === null) { + $arr = array_fill(0, 100_000, "test"); + } + $result = implode(",", $arr); + return strlen($result); +} + +function test_string_concat(): int { + // ZEND_CONCAT opcode - allocates without interrupt check + $s = str_repeat("x", 10000); + for ($i = 0; $i < 100; $i++) { + $s = $s . "y"; // Each concat allocates new string + } + return strlen($s); +} + +function test_array_init(): int { + // ZEND_INIT_ARRAY opcode - allocates without interrupt check + $total = 0; + for ($i = 0; $i < 1000; $i++) { + $arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + $total += count($arr); + } + return $total; } function main() { $duration = $_ENV["EXECUTION_TIME"] ?? 10; $end = microtime(true) + $duration; + $tests = [ + 'test_frameless_str_replace', + 'test_frameless_implode', + 'test_string_concat', + 'test_array_init', + ]; + while (microtime(true) < $end) { - // Just allocate repeatedly. Over time, some allocations will happen - // to occur while a time interrupt is pending (set by the profiler - // thread every ~10ms), exercising the piggybacking code path. - allocate_large(); - allocate_medium(); + foreach ($tests as $test) { + $test(); + } } } From 5ee032d33b70c3d40da58e5fe02fe3f24940d06b Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Wed, 7 Jan 2026 15:27:47 -0700 Subject: [PATCH 08/32] refactor: simplify test and hook it up properly --- .github/workflows/prof_correctness.yml | 15 +++++ .../correctness/allocation_time_combined.json | 18 ++---- .../correctness/allocation_time_combined.php | 62 ++----------------- 3 files changed, 26 insertions(+), 69 deletions(-) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index de76304a0c2..3d83bd404f6 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -103,6 +103,15 @@ jobs: export DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE=1 php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocations.php unset DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE + mkdir -p profiling/tests/correctness/allocation_time_combined/ + export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/allocation_time_combined/test.pprof + export DD_PROFILING_ALLOCATION_ENABLED=yes + export DD_PROFILING_WALL_TIME_ENABLED=yes + export DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED=no + export DD_PROFILING_ENDPOINT_COLLECTION_ENABLED=no + export EXECUTION_TIME=5 + php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocation_time_combined.php + unset DD_PROFILING_ALLOCATION_ENABLED DD_PROFILING_WALL_TIME_ENABLED DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED DD_PROFILING_ENDPOINT_COLLECTION_ENABLED EXECUTION_TIME - name: Run ZTS tests if: matrix.phpts == 'zts' @@ -149,6 +158,12 @@ jobs: expected_json: profiling/tests/correctness/timeline.json pprof_path: profiling/tests/correctness/timeline/ + - name: Check profiler correctness for allocation_time_combined + uses: Datadog/prof-correctness/analyze@main + with: + expected_json: profiling/tests/correctness/allocation_time_combined.json + pprof_path: profiling/tests/correctness/allocation_time_combined/ + - name: Check profiler correctness for exceptions ZTS if: matrix.phpts == 'zts' uses: Datadog/prof-correctness/analyze@main diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 7baa6471ff9..60d6f7a5a91 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -39,27 +39,21 @@ "match": 1 }, { - "comment": "Verify test_frameless_str_replace appears in the profile", + "comment": "Verify main function appears in the profile", "type": "jq-minimum", - "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"test_frameless_str_replace\")))) | length", + "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"main\")))) | length", "match": 1 }, { - "comment": "Verify test_string_concat appears in the profile", + "comment": "Verify str_repeat appears in the profile", "type": "jq-minimum", - "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"test_string_concat\")))) | length", + "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"str_repeat\")))) | length", "match": 1 }, { - "comment": "Verify str_replace (frameless function) appears in the profile", + "comment": "Verify we have piggybacked samples that include str_repeat in the stack", "type": "jq-minimum", - "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"str_replace\")))) | length", - "match": 1 - }, - { - "comment": "Verify we have piggybacked samples that include str_replace in the stack", - "type": "jq-minimum", - "path": ". as $root | (.sample_types | to_entries | map({(.value.type): .key}) | add) as $indices | .samples | map(select(.values[$indices.samples] > 0 and .values[$indices.\"wall-time\"] > 0 and .values[$indices.\"alloc-samples\"] > 0 and .values[$indices.\"alloc-space\"] > 0 and (.location_ids | any(.frames[] | .function // \"\" | contains(\"str_replace\"))))) | length", + "path": ". as $root | (.sample_types | to_entries | map({(.value.type): .key}) | add) as $indices | .samples | map(select(.values[$indices.samples] > 0 and .values[$indices.\"wall-time\"] > 0 and .values[$indices.\"alloc-samples\"] > 0 and .values[$indices.\"alloc-space\"] > 0 and (.location_ids | any(.frames[] | .function // \"\" | contains(\"str_repeat\"))))) | length", "match": 1 } ] diff --git a/profiling/tests/correctness/allocation_time_combined.php b/profiling/tests/correctness/allocation_time_combined.php index 4b28bff7fde..506a293a2af 100644 --- a/profiling/tests/correctness/allocation_time_combined.php +++ b/profiling/tests/correctness/allocation_time_combined.php @@ -4,69 +4,17 @@ // the piggybacking optimization works (where time samples are taken during // allocation samples when interrupt_count > 0). // -// This test uses frameless functions (PHP 8.4+) which don't handle VM -// interrupts, making them ideal for testing piggybacking: if we found a time -// sample attributed to a frameless function which is a leaf function, then -// most likely it was piggybacking (but we can actually check that memory -// values were set in the sample, we don't have to guess). -// -// Of course, only memory allocations which get sampled will include time, so -// in other cases the time interrupt will "slip" to something later. - -function test_frameless_str_replace(): int { - // Frameless function (PHP 8.4+) - doesn't handle VM interrupts - static $source = null; - if ($source === null) { - $source = str_repeat("x", 50_000_000); // ~50MB - } - $result = str_replace("x", "y", $source); - return strlen($result); -} - -function test_frameless_implode(): int { - // Frameless function - allocates without interrupt handling - static $arr = null; - if ($arr === null) { - $arr = array_fill(0, 100_000, "test"); - } - $result = implode(",", $arr); - return strlen($result); -} - -function test_string_concat(): int { - // ZEND_CONCAT opcode - allocates without interrupt check - $s = str_repeat("x", 10000); - for ($i = 0; $i < 100; $i++) { - $s = $s . "y"; // Each concat allocates new string - } - return strlen($s); -} - -function test_array_init(): int { - // ZEND_INIT_ARRAY opcode - allocates without interrupt check - $total = 0; - for ($i = 0; $i < 1000; $i++) { - $arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; - $total += count($arr); - } - return $total; -} +// This test uses str_repeat which allocates large amounts of memory, +// increasing the likelihood of capturing piggybacked samples. function main() { $duration = $_ENV["EXECUTION_TIME"] ?? 10; $end = microtime(true) + $duration; - $tests = [ - 'test_frameless_str_replace', - 'test_frameless_implode', - 'test_string_concat', - 'test_array_init', - ]; - while (microtime(true) < $end) { - foreach ($tests as $test) { - $test(); - } + // Allocate large strings to trigger allocation sampling + $s = str_repeat("x", 10_000_000); + strlen($s); // Use the string to prevent optimization } } From 5d79ecdd0d8609038a3bf91bb153b334b26fcc74 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Fri, 9 Jan 2026 10:12:55 -0700 Subject: [PATCH 09/32] test: fix prof-correctness --- .../correctness/allocation_time_combined.json | 95 +++++++++---------- .../correctness/allocation_time_combined.php | 13 ++- 2 files changed, 51 insertions(+), 57 deletions(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 60d6f7a5a91..e73b3423ff9 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -1,60 +1,51 @@ { - "args": { - "DD_PROFILING_ENABLED": "yes", - "DD_PROFILING_ALLOCATION_ENABLED": "yes", - "DD_PROFILING_WALL_TIME_ENABLED": "yes", - "DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED": "no", - "DD_PROFILING_ENDPOINT_COLLECTION_ENABLED": "no", - "DD_PROFILING_LOG_LEVEL": "debug", - "EXECUTION_TIME": "5" - }, - "flaky": false, - "skip-asan": false, - "todo": "", - "checks": [ - { - "type": "regex", - "path": "$.sample_types[?(@.type=='samples')].unit", - "match": "^count$" - }, - { - "type": "regex", - "path": "$.sample_types[?(@.type=='wall-time')].unit", - "match": "^nanoseconds$" - }, - { - "type": "regex", - "path": "$.sample_types[?(@.type=='alloc-samples')].unit", - "match": "^count$" - }, - { - "type": "regex", - "path": "$.sample_types[?(@.type=='alloc-space')].unit", - "match": "^bytes$" - }, - { - "comment": "Verify we have samples with both allocation and time data (piggybacked samples)", - "type": "jq-minimum", - "path": ". as $root | (.sample_types | to_entries | map({(.value.type): .key}) | add) as $indices | .samples | map(select(.values[$indices.samples] > 0 and .values[$indices.\"wall-time\"] > 0 and .values[$indices.\"alloc-samples\"] > 0 and .values[$indices.\"alloc-space\"] > 0)) | length", - "match": 1 - }, - { - "comment": "Verify main function appears in the profile", - "type": "jq-minimum", - "path": ".samples | map(select(.location_ids | any(.frames[] | .function // \"\" | contains(\"main\")))) | length", - "match": 1 + "scale_by_duration": true, + "test_name": "php_allocation_time_combined", + "stacks": [ + { + "profile-type": "alloc-size", + "stack-content": [ + { + "regular_expression": " 0 and .values[$indices.\"wall-time\"] > 0 and .values[$indices.\"alloc-samples\"] > 0 and .values[$indices.\"alloc-space\"] > 0 and (.location_ids | any(.frames[] | .function // \"\" | contains(\"str_repeat\"))))) | length", - "match": 1 + "profile-type": "wall-time", + "stack-content": [ + { + "regular_expression": " 0). // -// This test uses str_repeat which allocates large amounts of memory, -// increasing the likelihood of capturing piggybacked samples. +// This test uses str_replace, a frameless function (PHP 8.4+) which allocates +// large amounts of memory, increasing the likelihood of capturing piggybacked +// samples. Frameless functions don't handle VM interrupts, making them ideal +// for demonstrating the piggybacking optimization. function main() { $duration = $_ENV["EXECUTION_TIME"] ?? 10; $end = microtime(true) + $duration; while (microtime(true) < $end) { - // Allocate large strings to trigger allocation sampling - $s = str_repeat("x", 10_000_000); - strlen($s); // Use the string to prevent optimization + // str_replace is frameless in PHP 8.4+ and allocates a new string + $s = str_repeat("x", 10_000_000); // 10MB source + $result = str_replace("x", "y", $s); // 10MB allocation in frameless function + strlen($result); // Use the result to prevent optimization } } From aa62d66f44efae4895410e5d9ffeaf3c9170f953 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Mon, 12 Jan 2026 11:10:26 -0700 Subject: [PATCH 10/32] ci(profiling): run allocation_time_combined on PHP 8.4+ --- .github/workflows/prof_correctness.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index 3d83bd404f6..36e12eefff5 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-24.04 strategy: matrix: - php-version: [8.0, 8.1, 8.2, 8.3, 8.4, 8.5] + php-version: ['8.0', '8.1', '8.2', '8.3', '8.4', '8.5'] phpts: [nts, zts] include: - phpts: zts @@ -103,6 +103,16 @@ jobs: export DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE=1 php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocations.php unset DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE + + - name: Run allocation_time_combined test (PHP 8.4+) + if: fromJSON(matrix.php-version) >= 8.4 + run: | + export DD_PROFILING_LOG_LEVEL=trace + export DD_PROFILING_EXPERIMENTAL_FEATURES_ENABLED=1 + export DD_PROFILING_EXPERIMENTAL_EXCEPTION_SAMPLING_DISTANCE=1 + export DD_PROFILING_EXCEPTION_MESSAGE_ENABLED=1 + php -v + php -d extension=target/profiler-release/libdatadog_php_profiling.so --ri datadog-profiling mkdir -p profiling/tests/correctness/allocation_time_combined/ export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/allocation_time_combined/test.pprof export DD_PROFILING_ALLOCATION_ENABLED=yes @@ -159,6 +169,7 @@ jobs: pprof_path: profiling/tests/correctness/timeline/ - name: Check profiler correctness for allocation_time_combined + if: fromJSON(matrix.php-version) >= 8.4 uses: Datadog/prof-correctness/analyze@main with: expected_json: profiling/tests/correctness/allocation_time_combined.json From b1765c0d473d6e98120ccbb29f344a55de7df713 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Mon, 12 Jan 2026 11:15:37 -0700 Subject: [PATCH 11/32] test(profiling): fix allocation_time_combined float->int --- profiling/tests/correctness/allocation_time_combined.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index e73b3423ff9..03b6b6bd3a6 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -42,8 +42,8 @@ }, { "regular_expression": " Date: Mon, 12 Jan 2026 15:52:53 -0700 Subject: [PATCH 12/32] ci(profiling): update expected data --- .../correctness/allocation_time_combined.json | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 03b6b6bd3a6..8576c344995 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -7,13 +7,13 @@ "stack-content": [ { "regular_expression": " Date: Tue, 13 Jan 2026 16:28:12 -0700 Subject: [PATCH 13/32] ci(profiling): run allocation_time_combined with ALLOCATION_SAMPLING_DISTANCE=1 --- .github/workflows/prof_correctness.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index 36e12eefff5..379732f8bf1 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -116,12 +116,13 @@ jobs: mkdir -p profiling/tests/correctness/allocation_time_combined/ export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/allocation_time_combined/test.pprof export DD_PROFILING_ALLOCATION_ENABLED=yes + export DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE=1 export DD_PROFILING_WALL_TIME_ENABLED=yes export DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED=no export DD_PROFILING_ENDPOINT_COLLECTION_ENABLED=no export EXECUTION_TIME=5 php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocation_time_combined.php - unset DD_PROFILING_ALLOCATION_ENABLED DD_PROFILING_WALL_TIME_ENABLED DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED DD_PROFILING_ENDPOINT_COLLECTION_ENABLED EXECUTION_TIME + unset DD_PROFILING_ALLOCATION_ENABLED DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE DD_PROFILING_WALL_TIME_ENABLED DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED DD_PROFILING_ENDPOINT_COLLECTION_ENABLED EXECUTION_TIME - name: Run ZTS tests if: matrix.phpts == 'zts' From 3a7430ead3173ea555f53309f09cb36da3a69f7b Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Tue, 13 Jan 2026 16:37:18 -0700 Subject: [PATCH 14/32] ci(profiling): ignore main for wall-time for allocation_time_combined --- profiling/tests/correctness/allocation_time_combined.json | 5 ----- 1 file changed, 5 deletions(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 8576c344995..2f8a827d7d7 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -35,11 +35,6 @@ { "profile-type": "wall-time", "stack-content": [ - { - "regular_expression": " Date: Tue, 13 Jan 2026 16:59:39 -0700 Subject: [PATCH 15/32] EXPERIMENTAL: check ZEND_OP_DATA too --- profiling/src/profiling/stack_walking.rs | 63 +++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/profiling/src/profiling/stack_walking.rs b/profiling/src/profiling/stack_walking.rs index 04a8babfca0..7a7e935f9d0 100644 --- a/profiling/src/profiling/stack_walking.rs +++ b/profiling/src/profiling/stack_walking.rs @@ -10,6 +10,7 @@ use crate::bindings::zend_flf_functions; #[cfg(php_frameless)] use crate::bindings::{ ZEND_FRAMELESS_ICALL_0, ZEND_FRAMELESS_ICALL_1, ZEND_FRAMELESS_ICALL_2, ZEND_FRAMELESS_ICALL_3, + ZEND_OP_DATA, }; const COW_PHP_OPEN_TAG: Cow = Cow::Borrowed(" Option<&zend_op> { + let opline = safely_get_opline(execute_data)?; + if opline.opcode as u32 != ZEND_OP_DATA { + return Some(opline); + } + + let func = unsafe { execute_data.func.as_ref()? }; + let op_array = func.op_array()?; + + let opcodes_start = op_array.opcodes; + if opcodes_start.is_null() || execute_data.opline.is_null() { + return None; + } + + let begin = opcodes_start as usize; + let cur = execute_data.opline as usize; + let op_size = core::mem::size_of::(); + if cur < begin + op_size { + return Some(opline); + } + + // SAFETY: we validated bounds and only move by one opcode. + let prev_ptr = unsafe { execute_data.opline.sub(1) }; + if !opline_in_bounds(op_array, prev_ptr) { + return Some(opline); + } + + // SAFETY: prev_ptr is in bounds of op_array opcodes. + let prev = unsafe { &*prev_ptr }; + match prev.opcode as u32 { + ZEND_FRAMELESS_ICALL_0 + | ZEND_FRAMELESS_ICALL_1 + | ZEND_FRAMELESS_ICALL_2 + | ZEND_FRAMELESS_ICALL_3 => { + // Debug aid: crash hard when we observe OP_DATA with a preceding frameless icall. + // This is intended to prove whether allocation sampling ever sees the OP_DATA + // adjacency case in CI. + panic!( + "Observed OP_DATA ({}) with preceding FRAMELESS_ICALL opcode ({}) at execute_data={:p} opline={:p} prev_opline={:p}", + ZEND_OP_DATA, + prev.opcode as u32, + execute_data, + execute_data.opline, + prev_ptr + ); + Some(prev) + } + _ => Some(opline), + } +} + #[cfg(php_run_time_cache)] mod detail { use super::*; @@ -306,7 +367,7 @@ mod detail { // we can check for null. #[cfg(php_frameless)] if !func.is_internal() { - if let Some(opline) = safely_get_opline(execute_data) { + if let Some(opline) = frameless_opline_or_prev(execute_data) { match opline.opcode as u32 { ZEND_FRAMELESS_ICALL_0 | ZEND_FRAMELESS_ICALL_1 From e16e1b107a3096dd9a1d45650e68a4f6351740c1 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Tue, 13 Jan 2026 18:05:47 -0700 Subject: [PATCH 16/32] DEBUG: do we hit frameless icalls? --- profiling/src/profiling/stack_walking.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/profiling/src/profiling/stack_walking.rs b/profiling/src/profiling/stack_walking.rs index 7a7e935f9d0..30fce4f5c06 100644 --- a/profiling/src/profiling/stack_walking.rs +++ b/profiling/src/profiling/stack_walking.rs @@ -169,6 +169,24 @@ unsafe fn extract_file_and_line( #[inline] fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op> { let opline = safely_get_opline(execute_data)?; + if matches!( + opline.opcode as u32, + ZEND_FRAMELESS_ICALL_0 + | ZEND_FRAMELESS_ICALL_1 + | ZEND_FRAMELESS_ICALL_2 + | ZEND_FRAMELESS_ICALL_3 + ) { + // Debug aid: crash hard when we observe a frameless icall as the current opline. + // If this never triggers in CI, then allocation sampling is not observing the + // frameless opcode as the current execute_data->opline at sample time. + panic!( + "Observed FRAMELESS_ICALL opcode ({}) as current opline at execute_data={:p} opline={:p} extended_value={}", + opline.opcode as u32, + execute_data, + execute_data.opline, + opline.extended_value + ); + } if opline.opcode as u32 != ZEND_OP_DATA { return Some(opline); } @@ -212,7 +230,6 @@ fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op execute_data.opline, prev_ptr ); - Some(prev) } _ => Some(opline), } From a3e3e4ed6043fe5378110a813259b1afc85defd8 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Tue, 13 Jan 2026 22:11:26 -0700 Subject: [PATCH 17/32] DEBUG: more info in panic --- profiling/src/profiling/stack_walking.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/profiling/src/profiling/stack_walking.rs b/profiling/src/profiling/stack_walking.rs index 30fce4f5c06..f8501468775 100644 --- a/profiling/src/profiling/stack_walking.rs +++ b/profiling/src/profiling/stack_walking.rs @@ -176,13 +176,17 @@ fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op | ZEND_FRAMELESS_ICALL_2 | ZEND_FRAMELESS_ICALL_3 ) { + // Best-effort decode of the frameless function name for debugging. + let flf_name = unsafe { + let func = &**zend_flf_functions.offset(opline.extended_value as isize); + extract_function_name(func).unwrap_or_else(|| Cow::Borrowed("")) + }; // Debug aid: crash hard when we observe a frameless icall as the current opline. // If this never triggers in CI, then allocation sampling is not observing the // frameless opcode as the current execute_data->opline at sample time. panic!( - "Observed FRAMELESS_ICALL opcode ({}) as current opline at execute_data={:p} opline={:p} extended_value={}", + "Observed FRAMELESS_ICALL opcode ({}) as current opline at execute_data={execute_data:p} opline={:p} extended_value={} flf_func={flf_name}", opline.opcode as u32, - execute_data, execute_data.opline, opline.extended_value ); @@ -223,10 +227,8 @@ fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op // This is intended to prove whether allocation sampling ever sees the OP_DATA // adjacency case in CI. panic!( - "Observed OP_DATA ({}) with preceding FRAMELESS_ICALL opcode ({}) at execute_data={:p} opline={:p} prev_opline={:p}", - ZEND_OP_DATA, + "Observed OP_DATA ({ZEND_OP_DATA}) with preceding FRAMELESS_ICALL opcode ({}) at execute_data={execute_data:p} opline={:p} prev_opline={:p}", prev.opcode as u32, - execute_data, execute_data.opline, prev_ptr ); From eb10ef884921b67627f07284707dfe52cf05c157 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Fri, 16 Jan 2026 17:02:08 -0700 Subject: [PATCH 18/32] refactor: simplify Profiler::get_timeline_timestamp --- profiling/src/profiling/mod.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index 07eb5df447f..389fd35a836 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -1443,15 +1443,11 @@ impl Profiler { /// Gets a timestamp for timeline profiling if timeline is enabled. /// Returns NO_TIMESTAMP if timeline is disabled or if getting the time fails. fn get_timeline_timestamp(&self) -> i64 { - if self.is_timeline_enabled() { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .ok() - .map(|now| now.as_nanos() as i64) - .unwrap_or(NO_TIMESTAMP) - } else { - NO_TIMESTAMP - } + self.is_timeline_enabled() + .then(|| SystemTime::now().duration_since(UNIX_EPOCH).ok()) + .flatten() + .map(|now| now.as_nanos() as i64) + .unwrap_or(NO_TIMESTAMP) } /// Creates the common message labels for all samples. From 73c86a0154b24adec33b275a09748f7f4979c889 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Fri, 16 Jan 2026 17:03:05 -0700 Subject: [PATCH 19/32] Revert "DEBUG: more info in panic" This reverts commit a3e3e4ed6043fe5378110a813259b1afc85defd8. --- profiling/src/profiling/stack_walking.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/profiling/src/profiling/stack_walking.rs b/profiling/src/profiling/stack_walking.rs index f8501468775..30fce4f5c06 100644 --- a/profiling/src/profiling/stack_walking.rs +++ b/profiling/src/profiling/stack_walking.rs @@ -176,17 +176,13 @@ fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op | ZEND_FRAMELESS_ICALL_2 | ZEND_FRAMELESS_ICALL_3 ) { - // Best-effort decode of the frameless function name for debugging. - let flf_name = unsafe { - let func = &**zend_flf_functions.offset(opline.extended_value as isize); - extract_function_name(func).unwrap_or_else(|| Cow::Borrowed("")) - }; // Debug aid: crash hard when we observe a frameless icall as the current opline. // If this never triggers in CI, then allocation sampling is not observing the // frameless opcode as the current execute_data->opline at sample time. panic!( - "Observed FRAMELESS_ICALL opcode ({}) as current opline at execute_data={execute_data:p} opline={:p} extended_value={} flf_func={flf_name}", + "Observed FRAMELESS_ICALL opcode ({}) as current opline at execute_data={:p} opline={:p} extended_value={}", opline.opcode as u32, + execute_data, execute_data.opline, opline.extended_value ); @@ -227,8 +223,10 @@ fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op // This is intended to prove whether allocation sampling ever sees the OP_DATA // adjacency case in CI. panic!( - "Observed OP_DATA ({ZEND_OP_DATA}) with preceding FRAMELESS_ICALL opcode ({}) at execute_data={execute_data:p} opline={:p} prev_opline={:p}", + "Observed OP_DATA ({}) with preceding FRAMELESS_ICALL opcode ({}) at execute_data={:p} opline={:p} prev_opline={:p}", + ZEND_OP_DATA, prev.opcode as u32, + execute_data, execute_data.opline, prev_ptr ); From 520c158849dcb361d9c292f73824e0d4d970e6d0 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Fri, 16 Jan 2026 17:03:28 -0700 Subject: [PATCH 20/32] Revert "DEBUG: do we hit frameless icalls?" This reverts commit e16e1b107a3096dd9a1d45650e68a4f6351740c1. --- profiling/src/profiling/stack_walking.rs | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/profiling/src/profiling/stack_walking.rs b/profiling/src/profiling/stack_walking.rs index 30fce4f5c06..7a7e935f9d0 100644 --- a/profiling/src/profiling/stack_walking.rs +++ b/profiling/src/profiling/stack_walking.rs @@ -169,24 +169,6 @@ unsafe fn extract_file_and_line( #[inline] fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op> { let opline = safely_get_opline(execute_data)?; - if matches!( - opline.opcode as u32, - ZEND_FRAMELESS_ICALL_0 - | ZEND_FRAMELESS_ICALL_1 - | ZEND_FRAMELESS_ICALL_2 - | ZEND_FRAMELESS_ICALL_3 - ) { - // Debug aid: crash hard when we observe a frameless icall as the current opline. - // If this never triggers in CI, then allocation sampling is not observing the - // frameless opcode as the current execute_data->opline at sample time. - panic!( - "Observed FRAMELESS_ICALL opcode ({}) as current opline at execute_data={:p} opline={:p} extended_value={}", - opline.opcode as u32, - execute_data, - execute_data.opline, - opline.extended_value - ); - } if opline.opcode as u32 != ZEND_OP_DATA { return Some(opline); } @@ -230,6 +212,7 @@ fn frameless_opline_or_prev(execute_data: &zend_execute_data) -> Option<&zend_op execute_data.opline, prev_ptr ); + Some(prev) } _ => Some(opline), } From 35ac0b648385b52b2945af178c2ca280dcc1ccdb Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Fri, 16 Jan 2026 17:05:48 -0700 Subject: [PATCH 21/32] undo experimental debugging --- profiling/src/profiling/stack_walking.rs | 63 +----------------------- 1 file changed, 1 insertion(+), 62 deletions(-) diff --git a/profiling/src/profiling/stack_walking.rs b/profiling/src/profiling/stack_walking.rs index 7a7e935f9d0..04a8babfca0 100644 --- a/profiling/src/profiling/stack_walking.rs +++ b/profiling/src/profiling/stack_walking.rs @@ -10,7 +10,6 @@ use crate::bindings::zend_flf_functions; #[cfg(php_frameless)] use crate::bindings::{ ZEND_FRAMELESS_ICALL_0, ZEND_FRAMELESS_ICALL_1, ZEND_FRAMELESS_ICALL_2, ZEND_FRAMELESS_ICALL_3, - ZEND_OP_DATA, }; const COW_PHP_OPEN_TAG: Cow = Cow::Borrowed(" Option<&zend_op> { - let opline = safely_get_opline(execute_data)?; - if opline.opcode as u32 != ZEND_OP_DATA { - return Some(opline); - } - - let func = unsafe { execute_data.func.as_ref()? }; - let op_array = func.op_array()?; - - let opcodes_start = op_array.opcodes; - if opcodes_start.is_null() || execute_data.opline.is_null() { - return None; - } - - let begin = opcodes_start as usize; - let cur = execute_data.opline as usize; - let op_size = core::mem::size_of::(); - if cur < begin + op_size { - return Some(opline); - } - - // SAFETY: we validated bounds and only move by one opcode. - let prev_ptr = unsafe { execute_data.opline.sub(1) }; - if !opline_in_bounds(op_array, prev_ptr) { - return Some(opline); - } - - // SAFETY: prev_ptr is in bounds of op_array opcodes. - let prev = unsafe { &*prev_ptr }; - match prev.opcode as u32 { - ZEND_FRAMELESS_ICALL_0 - | ZEND_FRAMELESS_ICALL_1 - | ZEND_FRAMELESS_ICALL_2 - | ZEND_FRAMELESS_ICALL_3 => { - // Debug aid: crash hard when we observe OP_DATA with a preceding frameless icall. - // This is intended to prove whether allocation sampling ever sees the OP_DATA - // adjacency case in CI. - panic!( - "Observed OP_DATA ({}) with preceding FRAMELESS_ICALL opcode ({}) at execute_data={:p} opline={:p} prev_opline={:p}", - ZEND_OP_DATA, - prev.opcode as u32, - execute_data, - execute_data.opline, - prev_ptr - ); - Some(prev) - } - _ => Some(opline), - } -} - #[cfg(php_run_time_cache)] mod detail { use super::*; @@ -367,7 +306,7 @@ mod detail { // we can check for null. #[cfg(php_frameless)] if !func.is_internal() { - if let Some(opline) = frameless_opline_or_prev(execute_data) { + if let Some(opline) = safely_get_opline(execute_data) { match opline.opcode as u32 { ZEND_FRAMELESS_ICALL_0 | ZEND_FRAMELESS_ICALL_1 From 87a52f4186b066164a9e493dd478ca8b0b551ec8 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Fri, 16 Jan 2026 18:37:39 -0700 Subject: [PATCH 22/32] test: fix expected log message --- profiling/tests/phpt/allocation_sampling_distance.phpt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/tests/phpt/allocation_sampling_distance.phpt b/profiling/tests/phpt/allocation_sampling_distance.phpt index f1069c561cd..46c0c94c66d 100644 --- a/profiling/tests/phpt/allocation_sampling_distance.phpt +++ b/profiling/tests/phpt/allocation_sampling_distance.phpt @@ -44,5 +44,5 @@ $str = \str_repeat('a', 87); Done. --EXPECTREGEX-- .* Memory allocation profiling initialized with a sampling distance of 1 bytes.* -.* Sent stack sample of [0-9]* frames, .* labels, 11[2,9] bytes allocated, and 1 allocations to profiler.* +.* Sent stack sample of [0-9]* frames, .* labels, 11[2,9] bytes allocated, 1 allocations, and .* time interrupts to profiler.* .*Done\..* From 093ce669501820389ed2bc95c02bbb29e1ccc89d Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Fri, 16 Jan 2026 18:49:46 -0700 Subject: [PATCH 23/32] test: use more replaces to exaggerate effect --- .../correctness/allocation_time_combined.json | 18 +++++++++--------- .../correctness/allocation_time_combined.php | 7 ++++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 2f8a827d7d7..1de1fc1b6f8 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -7,13 +7,13 @@ "stack-content": [ { "regular_expression": " Date: Fri, 16 Jan 2026 19:30:02 -0700 Subject: [PATCH 24/32] EXPERIMENT: show leaf frame --- profiling/src/profiling/mod.rs | 9 ++++++++- profiling/tests/phpt/allocation_sampling_distance.phpt | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index 389fd35a836..43336d97844 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -959,8 +959,15 @@ impl Profiler { let result = collect_stack_sample(execute_data); match result { Ok(frames) => { + use core::ops::Deref; let depth = frames.len(); + let first_frame = frames + .first() + .map(|frame| frame.function.deref()) + .unwrap_or("[empty frame]") + .to_string(); + // Optionally collect time data when interrupt_count is provided let (interrupt_count, wall_time, cpu_time, timestamp) = if let Some(count) = interrupt_count { @@ -988,7 +995,7 @@ impl Profiler { timestamp, ) { Ok(_) => trace!( - "Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler." + "Sent stack sample with leaf frame \"{first_frame}\" of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler." ), Err(err) => warn!( "Failed to send stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler: {err}" diff --git a/profiling/tests/phpt/allocation_sampling_distance.phpt b/profiling/tests/phpt/allocation_sampling_distance.phpt index 46c0c94c66d..40f8729e210 100644 --- a/profiling/tests/phpt/allocation_sampling_distance.phpt +++ b/profiling/tests/phpt/allocation_sampling_distance.phpt @@ -44,5 +44,5 @@ $str = \str_repeat('a', 87); Done. --EXPECTREGEX-- .* Memory allocation profiling initialized with a sampling distance of 1 bytes.* -.* Sent stack sample of [0-9]* frames, .* labels, 11[2,9] bytes allocated, 1 allocations, and .* time interrupts to profiler.* +.* Sent stack sample with leaf frame .* of [0-9]* frames, .* labels, 11[2,9] bytes allocated, 1 allocations, and .* time interrupts to profiler.* .*Done\..* From b64dd9a37a6a3ab93fbb6a3c2e35a30a2b3c2116 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Tue, 20 Jan 2026 11:38:19 +0100 Subject: [PATCH 25/32] do not fail fast, finish prof correctness --- .github/workflows/prof_correctness.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index 762f1b1e0b0..e663ee44c64 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -9,6 +9,7 @@ jobs: prof-correctness: runs-on: ubuntu-24.04 strategy: + fail-fast: false matrix: php-version: ['8.0', '8.1', '8.2', '8.3', '8.4', '8.5'] phpts: [nts, zts] From 44e27347e49df6900b05ed3e49d2bfe89d4b041b Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Tue, 20 Jan 2026 11:47:42 +0100 Subject: [PATCH 26/32] upload artifacts --- .github/workflows/prof_correctness.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index e663ee44c64..d3dae97763f 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -129,6 +129,11 @@ jobs: php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocation_time_combined.php unset DD_PROFILING_ALLOCATION_ENABLED DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE DD_PROFILING_WALL_TIME_ENABLED DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED DD_PROFILING_ENDPOINT_COLLECTION_ENABLED EXECUTION_TIME + - name: Temp upload pprof + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + path: /home/runner/work/dd-trace-php/dd-trace-php/profiling/tests/correctness/allocation_time_combined/* + - name: Run ZTS tests if: matrix.phpts == 'zts' run: | From d0631294801cb1741b784cf3d6f04c8f9a882be1 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Tue, 20 Jan 2026 16:33:55 +0100 Subject: [PATCH 27/32] Disable Xdebug for prof-correctness --- .github/workflows/prof_correctness.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index d3dae97763f..b4a81772eb7 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -28,6 +28,7 @@ jobs: uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2.35.5 with: php-version: ${{ matrix.php-version }} + coverage: none extensions: ${{ matrix.extensions }} env: phpts: ${{ matrix.phpts }} From ea6bc22ef15e5c099249e40e0aa2b211f19de3af Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Tue, 20 Jan 2026 16:35:29 +0100 Subject: [PATCH 28/32] Revert "upload artifacts" This reverts commit 44e27347e49df6900b05ed3e49d2bfe89d4b041b. --- .github/workflows/prof_correctness.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index b4a81772eb7..da49c2bd496 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -130,11 +130,6 @@ jobs: php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocation_time_combined.php unset DD_PROFILING_ALLOCATION_ENABLED DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE DD_PROFILING_WALL_TIME_ENABLED DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED DD_PROFILING_ENDPOINT_COLLECTION_ENABLED EXECUTION_TIME - - name: Temp upload pprof - uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 - with: - path: /home/runner/work/dd-trace-php/dd-trace-php/profiling/tests/correctness/allocation_time_combined/* - - name: Run ZTS tests if: matrix.phpts == 'zts' run: | From 5114acda94b54cdf07c912badac7731635426635 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Tue, 20 Jan 2026 10:18:03 -0700 Subject: [PATCH 29/32] test: allow any non-zero wall-time for piggy-backed samples --- profiling/tests/correctness/allocation_time_combined.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 1de1fc1b6f8..e3c1086e907 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -37,8 +37,8 @@ "stack-content": [ { "regular_expression": " Date: Tue, 20 Jan 2026 23:04:54 -0700 Subject: [PATCH 30/32] remove first_frame for performance --- profiling/src/profiling/mod.rs | 9 +-------- profiling/tests/phpt/allocation_sampling_distance.phpt | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/profiling/src/profiling/mod.rs b/profiling/src/profiling/mod.rs index 43336d97844..389fd35a836 100644 --- a/profiling/src/profiling/mod.rs +++ b/profiling/src/profiling/mod.rs @@ -959,15 +959,8 @@ impl Profiler { let result = collect_stack_sample(execute_data); match result { Ok(frames) => { - use core::ops::Deref; let depth = frames.len(); - let first_frame = frames - .first() - .map(|frame| frame.function.deref()) - .unwrap_or("[empty frame]") - .to_string(); - // Optionally collect time data when interrupt_count is provided let (interrupt_count, wall_time, cpu_time, timestamp) = if let Some(count) = interrupt_count { @@ -995,7 +988,7 @@ impl Profiler { timestamp, ) { Ok(_) => trace!( - "Sent stack sample with leaf frame \"{first_frame}\" of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler." + "Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler." ), Err(err) => warn!( "Failed to send stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler: {err}" diff --git a/profiling/tests/phpt/allocation_sampling_distance.phpt b/profiling/tests/phpt/allocation_sampling_distance.phpt index 40f8729e210..46c0c94c66d 100644 --- a/profiling/tests/phpt/allocation_sampling_distance.phpt +++ b/profiling/tests/phpt/allocation_sampling_distance.phpt @@ -44,5 +44,5 @@ $str = \str_repeat('a', 87); Done. --EXPECTREGEX-- .* Memory allocation profiling initialized with a sampling distance of 1 bytes.* -.* Sent stack sample with leaf frame .* of [0-9]* frames, .* labels, 11[2,9] bytes allocated, 1 allocations, and .* time interrupts to profiler.* +.* Sent stack sample of [0-9]* frames, .* labels, 11[2,9] bytes allocated, 1 allocations, and .* time interrupts to profiler.* .*Done\..* From 52876ec75c3b580729d7fd4bf0b7f7b11597da8d Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Wed, 21 Jan 2026 10:03:47 +0100 Subject: [PATCH 31/32] "tighten" value and error for this test On PHP >= 8.4 (with frameless functions), typical CPU utilization of that `str_replace()` function is at around 51%, without frameless functions (PHP <= 8.3) the CPU utilisation for `str_replace()` is at about 99%, this runs that test always and makes sure we match both cases. --- .github/workflows/prof_correctness.yml | 2 -- profiling/tests/correctness/allocation_time_combined.json | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index da49c2bd496..29b6895c33e 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -111,7 +111,6 @@ jobs: unset DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE - name: Run allocation_time_combined test (PHP 8.4+) - if: fromJSON(matrix.php-version) >= 8.4 run: | export DD_PROFILING_LOG_LEVEL=trace export DD_PROFILING_EXPERIMENTAL_FEATURES_ENABLED=1 @@ -182,7 +181,6 @@ jobs: pprof_path: profiling/tests/correctness/timeline/ - name: Check profiler correctness for allocation_time_combined - if: fromJSON(matrix.php-version) >= 8.4 uses: Datadog/prof-correctness/analyze@main with: expected_json: profiling/tests/correctness/allocation_time_combined.json diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index e3c1086e907..580b3b352b6 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -37,8 +37,8 @@ "stack-content": [ { "regular_expression": " Date: Wed, 21 Jan 2026 13:19:00 +0100 Subject: [PATCH 32/32] merge allocations & time combined test to "normal" path without changed allocation sampling distance --- .github/workflows/prof_correctness.yml | 23 ++----------------- .../correctness/allocation_time_combined.json | 4 ++-- 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/.github/workflows/prof_correctness.yml b/.github/workflows/prof_correctness.yml index 29b6895c33e..96f0c257b01 100644 --- a/.github/workflows/prof_correctness.yml +++ b/.github/workflows/prof_correctness.yml @@ -77,7 +77,7 @@ jobs: export DD_PROFILING_EXCEPTION_MESSAGE_ENABLED=1 php -v php -d extension=target/profiler-release/libdatadog_php_profiling.so --ri datadog-profiling - for test_case in "allocations" "time" "strange_frames" "timeline" "exceptions"; do + for test_case in "allocations" "time" "strange_frames" "timeline" "exceptions" "allocation_time_combined"; do mkdir -p profiling/tests/correctness/"$test_case"/ export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/"$test_case"/test.pprof php -d extension="${PWD}/target/profiler-release/libdatadog_php_profiling.so" "profiling/tests/correctness/${test_case}.php" @@ -95,7 +95,7 @@ jobs: export DD_PROFILING_EXCEPTION_MESSAGE_ENABLED=1 php -v php -d extension=target/profiler-release/libdatadog_php_profiling.so --ri datadog-profiling - for test_case in "allocations" "time" "strange_frames" "timeline" "exceptions"; do + for test_case in "allocations" "time" "strange_frames" "timeline" "exceptions" "allocation_time_combined"; do mkdir -p profiling/tests/correctness/"$test_case"/ export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/"$test_case"/test.pprof php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/"$test_case".php @@ -110,25 +110,6 @@ jobs: USE_ZEND_ALLOC=0 php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocations.php unset DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE - - name: Run allocation_time_combined test (PHP 8.4+) - run: | - export DD_PROFILING_LOG_LEVEL=trace - export DD_PROFILING_EXPERIMENTAL_FEATURES_ENABLED=1 - export DD_PROFILING_EXPERIMENTAL_EXCEPTION_SAMPLING_DISTANCE=1 - export DD_PROFILING_EXCEPTION_MESSAGE_ENABLED=1 - php -v - php -d extension=target/profiler-release/libdatadog_php_profiling.so --ri datadog-profiling - mkdir -p profiling/tests/correctness/allocation_time_combined/ - export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/allocation_time_combined/test.pprof - export DD_PROFILING_ALLOCATION_ENABLED=yes - export DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE=1 - export DD_PROFILING_WALL_TIME_ENABLED=yes - export DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED=no - export DD_PROFILING_ENDPOINT_COLLECTION_ENABLED=no - export EXECUTION_TIME=5 - php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocation_time_combined.php - unset DD_PROFILING_ALLOCATION_ENABLED DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE DD_PROFILING_WALL_TIME_ENABLED DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED DD_PROFILING_ENDPOINT_COLLECTION_ENABLED EXECUTION_TIME - - name: Run ZTS tests if: matrix.phpts == 'zts' run: | diff --git a/profiling/tests/correctness/allocation_time_combined.json b/profiling/tests/correctness/allocation_time_combined.json index 580b3b352b6..d7dc726c754 100644 --- a/profiling/tests/correctness/allocation_time_combined.json +++ b/profiling/tests/correctness/allocation_time_combined.json @@ -23,12 +23,12 @@ { "regular_expression": "