From ab48dd2ba90d4aa6b94d7ec11707b7ccc379c3d5 Mon Sep 17 00:00:00 2001 From: Jonas Bostoen Date: Wed, 15 Apr 2026 10:56:30 +0200 Subject: [PATCH 1/4] Drop pid label from process_thread_usage --- README.md | 8 ++++---- prometric-derive/src/lib.rs | 8 ++++---- prometric/src/process.rs | 26 +++++++++++++++++--------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7c1a600..2acfab8 100644 --- a/README.md +++ b/README.md @@ -129,11 +129,11 @@ process_resident_memory_usage 0.00007072418111501723 # HELP process_start_time_seconds The start time of the process in UNIX seconds. # TYPE process_start_time_seconds gauge process_start_time_seconds 1763056609 -# HELP process_thread_usage Per-thread CPU usage as a percentage of the process's CPU usage (Linux only). +# HELP process_thread_usage Thread CPU usage percentage aggregated by thread name (Linux only). # TYPE process_thread_usage gauge -process_thread_usage{name="process::tests:",pid="980490"} 0.9259260296821594 -process_thread_usage{name="test-thread-1",pid="980491"} 0 -process_thread_usage{name="test-thread-2",pid="980492"} 94.44445037841797 +process_thread_usage{name="process::tests:"} 0.9259260296821594 +process_thread_usage{name="test-thread-1"} 0 +process_thread_usage{name="test-thread-2"} 94.44445037841797 # HELP process_threads The number of OS threads used by the process (Linux only). # TYPE process_threads gauge process_threads 3 diff --git a/prometric-derive/src/lib.rs b/prometric-derive/src/lib.rs index 34e185f..0fb389e 100644 --- a/prometric-derive/src/lib.rs +++ b/prometric-derive/src/lib.rs @@ -188,11 +188,11 @@ mod utils; /// # HELP process_start_time_seconds The start time of the process in UNIX seconds. /// # TYPE process_start_time_seconds gauge /// process_start_time_seconds 1763056609 -/// # HELP process_thread_usage Per-thread CPU usage as a percentage of the process's CPU usage (Linux only). +/// # HELP process_thread_usage Thread CPU usage percentage aggregated by thread name (Linux only). /// # TYPE process_thread_usage gauge -/// process_thread_usage{name="process::tests:",pid="980490"} 0.9259260296821594 -/// process_thread_usage{name="test-thread-1",pid="980491"} 0 -/// process_thread_usage{name="test-thread-2",pid="980492"} 94.44445037841797 +/// process_thread_usage{name="process::tests:"} 0.9259260296821594 +/// process_thread_usage{name="test-thread-1"} 0 +/// process_thread_usage{name="test-thread-2"} 94.44445037841797 /// # HELP process_threads The number of OS threads used by the process (Linux only). /// # TYPE process_threads gauge /// process_threads 3 diff --git a/prometric/src/process.rs b/prometric/src/process.rs index f8be3ff..0174517 100644 --- a/prometric/src/process.rs +++ b/prometric/src/process.rs @@ -90,20 +90,28 @@ impl ProcessCollector { let cpu_usage = process.cpu_usage() / self.cores as f32; - // Collect thread stats + // Collect thread stats, aggregated by thread name to avoid high-cardinality + // per-thread/task IDs. if let Some(tasks) = process.tasks() { + let mut thread_usage_by_name = std::collections::BTreeMap::::new(); + tasks.iter().for_each(|pid| { let Some(thread) = self.sys.process(*pid) else { return; }; - let pid = pid.to_string(); - let name = thread.name().to_str().unwrap_or(pid.as_str()); + let name = thread + .name() + .to_str() + .filter(|name| !name.is_empty()) + .unwrap_or("unnamed") + .to_owned(); + + *thread_usage_by_name.entry(name).or_default() += thread.cpu_usage() as f64; + }); - self.metrics - .thread_usage - .with_label_values(&[pid.as_str(), name]) - .set(thread.cpu_usage() as f64); + thread_usage_by_name.into_iter().for_each(|(name, cpu_usage)| { + self.metrics.thread_usage.with_label_values(&[name.as_str()]).set(cpu_usage); }); } @@ -235,9 +243,9 @@ impl ProcessMetrics { let thread_usage: GaugeVec = GaugeVec::new( Opts::new( "process_thread_usage", - "Per-thread CPU usage as a percentage of the process's CPU usage (Linux only).", + "Thread CPU usage percentage aggregated by thread name (Linux only).", ), - &["pid", "name"], + &["name"], ) .unwrap(); From 6cd098a23c8d1308fcebf28c886abd5b89ece9f3 Mon Sep 17 00:00:00 2001 From: Jonas Bostoen Date: Wed, 15 Apr 2026 11:09:25 +0200 Subject: [PATCH 2/4] Avoid extra aggregation map for thread usage --- prometric/src/process.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/prometric/src/process.rs b/prometric/src/process.rs index 0174517..cf573a6 100644 --- a/prometric/src/process.rs +++ b/prometric/src/process.rs @@ -92,26 +92,17 @@ impl ProcessCollector { // Collect thread stats, aggregated by thread name to avoid high-cardinality // per-thread/task IDs. + self.metrics.thread_usage.reset(); if let Some(tasks) = process.tasks() { - let mut thread_usage_by_name = std::collections::BTreeMap::::new(); - tasks.iter().for_each(|pid| { let Some(thread) = self.sys.process(*pid) else { return; }; - let name = thread - .name() - .to_str() - .filter(|name| !name.is_empty()) - .unwrap_or("unnamed") - .to_owned(); - - *thread_usage_by_name.entry(name).or_default() += thread.cpu_usage() as f64; - }); + let name = + thread.name().to_str().filter(|name| !name.is_empty()).unwrap_or("unnamed"); - thread_usage_by_name.into_iter().for_each(|(name, cpu_usage)| { - self.metrics.thread_usage.with_label_values(&[name.as_str()]).set(cpu_usage); + self.metrics.thread_usage.with_label_values(&[name]).add(thread.cpu_usage() as f64); }); } From e3e02773c0de4232b796ab40f7beb046059cdac8 Mon Sep 17 00:00:00 2001 From: Jonas Bostoen Date: Wed, 15 Apr 2026 11:18:54 +0200 Subject: [PATCH 3/4] Suffix duplicate thread names in process metrics --- README.md | 2 +- prometric-derive/src/lib.rs | 2 +- prometric/src/process.rs | 40 ++++++++++++++++++++++++++++--------- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 2acfab8..24b358e 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ process_resident_memory_usage 0.00007072418111501723 # HELP process_start_time_seconds The start time of the process in UNIX seconds. # TYPE process_start_time_seconds gauge process_start_time_seconds 1763056609 -# HELP process_thread_usage Thread CPU usage percentage aggregated by thread name (Linux only). +# HELP process_thread_usage Thread CPU usage percentage by thread name, with `#` appended for duplicate names (Linux only). # TYPE process_thread_usage gauge process_thread_usage{name="process::tests:"} 0.9259260296821594 process_thread_usage{name="test-thread-1"} 0 diff --git a/prometric-derive/src/lib.rs b/prometric-derive/src/lib.rs index 0fb389e..3626477 100644 --- a/prometric-derive/src/lib.rs +++ b/prometric-derive/src/lib.rs @@ -188,7 +188,7 @@ mod utils; /// # HELP process_start_time_seconds The start time of the process in UNIX seconds. /// # TYPE process_start_time_seconds gauge /// process_start_time_seconds 1763056609 -/// # HELP process_thread_usage Thread CPU usage percentage aggregated by thread name (Linux only). +/// # HELP process_thread_usage Thread CPU usage percentage by thread name, with `#` appended for duplicate names (Linux only). /// # TYPE process_thread_usage gauge /// process_thread_usage{name="process::tests:"} 0.9259260296821594 /// process_thread_usage{name="test-thread-1"} 0 diff --git a/prometric/src/process.rs b/prometric/src/process.rs index cf573a6..ec5fcc5 100644 --- a/prometric/src/process.rs +++ b/prometric/src/process.rs @@ -90,20 +90,42 @@ impl ProcessCollector { let cpu_usage = process.cpu_usage() / self.cores as f32; - // Collect thread stats, aggregated by thread name to avoid high-cardinality - // per-thread/task IDs. + // Collect thread stats by thread name to avoid high-cardinality per-thread/task + // IDs. For duplicate names, append a deterministic instance suffix based on PID + // order (for example `worker#1`, `worker#2`). self.metrics.thread_usage.reset(); if let Some(tasks) = process.tasks() { - tasks.iter().for_each(|pid| { - let Some(thread) = self.sys.process(*pid) else { - return; - }; + let mut threads: Vec<_> = tasks + .iter() + .filter_map(|pid| self.sys.process(*pid).map(|thread| (*pid, thread))) + .collect(); + threads.sort_unstable_by_key(|(pid, _)| pid.as_u32()); + + let mut name_counts = + std::collections::HashMap::<&str, usize>::with_capacity(threads.len()); + for (_, thread) in &threads { + let name = + thread.name().to_str().filter(|name| !name.is_empty()).unwrap_or("unnamed"); + *name_counts.entry(name).or_default() += 1; + } + let mut instance_counts = + std::collections::HashMap::<&str, usize>::with_capacity(name_counts.len()); + for (_, thread) in threads { let name = thread.name().to_str().filter(|name| !name.is_empty()).unwrap_or("unnamed"); + let cpu_usage = thread.cpu_usage() as f64; - self.metrics.thread_usage.with_label_values(&[name]).add(thread.cpu_usage() as f64); - }); + if name_counts.get(name).copied().unwrap_or(0) > 1 { + let instance = instance_counts.entry(name).or_default(); + *instance += 1; + + let label = format!("{name}#{instance}"); + self.metrics.thread_usage.with_label_values(&[label.as_str()]).set(cpu_usage); + } else { + self.metrics.thread_usage.with_label_values(&[name]).set(cpu_usage); + } + } } let threads = process.tasks().map(|tasks| tasks.len()).unwrap_or(0); @@ -234,7 +256,7 @@ impl ProcessMetrics { let thread_usage: GaugeVec = GaugeVec::new( Opts::new( "process_thread_usage", - "Thread CPU usage percentage aggregated by thread name (Linux only).", + "Thread CPU usage percentage by thread name, with `#` appended for duplicate names (Linux only).", ), &["name"], ) From cc80cd5b27c9c1af4dd95c741655b693c63fbdc9 Mon Sep 17 00:00:00 2001 From: Jonas Bostoen Date: Wed, 15 Apr 2026 11:22:58 +0200 Subject: [PATCH 4/4] Reset thread usage series before pid relabeling --- README.md | 8 +++--- prometric-derive/src/lib.rs | 8 +++--- prometric/src/process.rs | 53 ++++++++++++------------------------- 3 files changed, 25 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 24b358e..7c1a600 100644 --- a/README.md +++ b/README.md @@ -129,11 +129,11 @@ process_resident_memory_usage 0.00007072418111501723 # HELP process_start_time_seconds The start time of the process in UNIX seconds. # TYPE process_start_time_seconds gauge process_start_time_seconds 1763056609 -# HELP process_thread_usage Thread CPU usage percentage by thread name, with `#` appended for duplicate names (Linux only). +# HELP process_thread_usage Per-thread CPU usage as a percentage of the process's CPU usage (Linux only). # TYPE process_thread_usage gauge -process_thread_usage{name="process::tests:"} 0.9259260296821594 -process_thread_usage{name="test-thread-1"} 0 -process_thread_usage{name="test-thread-2"} 94.44445037841797 +process_thread_usage{name="process::tests:",pid="980490"} 0.9259260296821594 +process_thread_usage{name="test-thread-1",pid="980491"} 0 +process_thread_usage{name="test-thread-2",pid="980492"} 94.44445037841797 # HELP process_threads The number of OS threads used by the process (Linux only). # TYPE process_threads gauge process_threads 3 diff --git a/prometric-derive/src/lib.rs b/prometric-derive/src/lib.rs index 3626477..34e185f 100644 --- a/prometric-derive/src/lib.rs +++ b/prometric-derive/src/lib.rs @@ -188,11 +188,11 @@ mod utils; /// # HELP process_start_time_seconds The start time of the process in UNIX seconds. /// # TYPE process_start_time_seconds gauge /// process_start_time_seconds 1763056609 -/// # HELP process_thread_usage Thread CPU usage percentage by thread name, with `#` appended for duplicate names (Linux only). +/// # HELP process_thread_usage Per-thread CPU usage as a percentage of the process's CPU usage (Linux only). /// # TYPE process_thread_usage gauge -/// process_thread_usage{name="process::tests:"} 0.9259260296821594 -/// process_thread_usage{name="test-thread-1"} 0 -/// process_thread_usage{name="test-thread-2"} 94.44445037841797 +/// process_thread_usage{name="process::tests:",pid="980490"} 0.9259260296821594 +/// process_thread_usage{name="test-thread-1",pid="980491"} 0 +/// process_thread_usage{name="test-thread-2",pid="980492"} 94.44445037841797 /// # HELP process_threads The number of OS threads used by the process (Linux only). /// # TYPE process_threads gauge /// process_threads 3 diff --git a/prometric/src/process.rs b/prometric/src/process.rs index ec5fcc5..a363141 100644 --- a/prometric/src/process.rs +++ b/prometric/src/process.rs @@ -90,42 +90,23 @@ impl ProcessCollector { let cpu_usage = process.cpu_usage() / self.cores as f32; - // Collect thread stats by thread name to avoid high-cardinality per-thread/task - // IDs. For duplicate names, append a deterministic instance suffix based on PID - // order (for example `worker#1`, `worker#2`). + // Collect thread stats and reset the vector each scrape so exited threads do not + // leave stale PID-labelled series behind. self.metrics.thread_usage.reset(); if let Some(tasks) = process.tasks() { - let mut threads: Vec<_> = tasks - .iter() - .filter_map(|pid| self.sys.process(*pid).map(|thread| (*pid, thread))) - .collect(); - threads.sort_unstable_by_key(|(pid, _)| pid.as_u32()); - - let mut name_counts = - std::collections::HashMap::<&str, usize>::with_capacity(threads.len()); - for (_, thread) in &threads { - let name = - thread.name().to_str().filter(|name| !name.is_empty()).unwrap_or("unnamed"); - *name_counts.entry(name).or_default() += 1; - } - - let mut instance_counts = - std::collections::HashMap::<&str, usize>::with_capacity(name_counts.len()); - for (_, thread) in threads { - let name = - thread.name().to_str().filter(|name| !name.is_empty()).unwrap_or("unnamed"); - let cpu_usage = thread.cpu_usage() as f64; - - if name_counts.get(name).copied().unwrap_or(0) > 1 { - let instance = instance_counts.entry(name).or_default(); - *instance += 1; - - let label = format!("{name}#{instance}"); - self.metrics.thread_usage.with_label_values(&[label.as_str()]).set(cpu_usage); - } else { - self.metrics.thread_usage.with_label_values(&[name]).set(cpu_usage); - } - } + tasks.iter().for_each(|pid| { + let Some(thread) = self.sys.process(*pid) else { + return; + }; + + let pid = pid.to_string(); + let name = thread.name().to_str().unwrap_or(pid.as_str()); + + self.metrics + .thread_usage + .with_label_values(&[pid.as_str(), name]) + .set(thread.cpu_usage() as f64); + }); } let threads = process.tasks().map(|tasks| tasks.len()).unwrap_or(0); @@ -256,9 +237,9 @@ impl ProcessMetrics { let thread_usage: GaugeVec = GaugeVec::new( Opts::new( "process_thread_usage", - "Thread CPU usage percentage by thread name, with `#` appended for duplicate names (Linux only).", + "Per-thread CPU usage as a percentage of the process's CPU usage (Linux only).", ), - &["name"], + &["pid", "name"], ) .unwrap();