From 08b9a21c9aaf9dee1093088f4492264d67ac05f9 Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 22 May 2026 09:58:10 +0200 Subject: [PATCH 01/17] wip --- Cargo.lock | 190 +++++++++++++ Cargo.toml | 3 + src/batch_queue/mod.rs | 128 ++++++++- src/error_tracking/fingerprint.rs | 166 ++++++++++++ src/error_tracking/mod.rs | 3 + src/error_tracking/sourcemaps.rs | 430 ++++++++++++++++++++++++++++++ src/error_tracking/v3.rs | 103 +++++++ src/handler/mod.rs | 148 ++++------ src/handler/web.rs | 80 +++--- src/main.rs | 2 + src/tinybird.rs | 34 +++ 11 files changed, 1155 insertions(+), 132 deletions(-) create mode 100644 src/error_tracking/fingerprint.rs create mode 100644 src/error_tracking/mod.rs create mode 100644 src/error_tracking/sourcemaps.rs create mode 100644 src/error_tracking/v3.rs diff --git a/Cargo.lock b/Cargo.lock index 9998015..ea9cda9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,41 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "aead" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d122413f284cf2d62fb1b7db97e02edb8cda96d769b16e443a4f6195e35662b0" +dependencies = [ + "crypto-common 0.1.7", + "generic-array", +] + +[[package]] +name = "aes" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures 0.2.17", +] + +[[package]] +name = "aes-gcm" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831010a0f742e1209b3bcea8fab6a8e149051ba6099432c8cb2cc117dec3ead1" +dependencies = [ + "aead", + "aes", + "cipher", + "ctr", + "ghash", + "subtle", +] + [[package]] name = "ahash" version = "0.8.12" @@ -553,6 +588,18 @@ dependencies = [ "serde_core", ] +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -675,6 +722,16 @@ dependencies = [ "windows-link", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common 0.1.7", + "inout", +] + [[package]] name = "cmake" version = "0.1.57" @@ -873,6 +930,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", + "rand_core 0.6.4", "typenum", ] @@ -885,6 +943,15 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "ctr" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0369ee1ad671834580515889b80f2ea915f23b8be8d0daa4bbaf2ac5c7590835" +dependencies = [ + "cipher", +] + [[package]] name = "ctutils" version = "0.4.2" @@ -898,11 +965,13 @@ dependencies = [ name = "data-collector" version = "0.1.0" dependencies = [ + "aes-gcm", "aws-sdk-s3", "axum", "chrono", "dotenvy", "flate2", + "hex", "metrics", "metrics-exporter-prometheus", "moka", @@ -913,6 +982,7 @@ dependencies = [ "serde_json", "serde_yaml", "sha2 0.11.0", + "sourcemap", "sqlx", "tempfile", "tokio", @@ -926,6 +996,22 @@ dependencies = [ "zstd", ] +[[package]] +name = "data-encoding" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "serde", + "uuid", +] + [[package]] name = "der" version = "0.7.10" @@ -1169,6 +1255,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures-channel" version = "0.3.32" @@ -1304,6 +1396,16 @@ dependencies = [ "wasip3", ] +[[package]] +name = "ghash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d8a4362ccb29cb0b265253fb0a2728f592895ee6854fd9bc13f2ffda266ff1" +dependencies = [ + "opaque-debug", + "polyval", +] + [[package]] name = "group" version = "0.13.0" @@ -1745,6 +1847,12 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "if_chain" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd62e6b5e86ea8eeeb8db1de02880a6abc01a397b2ebb64b5d74ac255318f5cb" + [[package]] name = "indexmap" version = "2.13.0" @@ -1757,6 +1865,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -2114,6 +2231,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + [[package]] name = "openssl-probe" version = "0.2.1" @@ -2221,6 +2344,18 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "polyval" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d1fe60d06143b2430aa532c94cfe9e29783047f06c0d7fd359a9a51b729fa25" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "opaque-debug", + "universal-hash", +] + [[package]] name = "portable-atomic" version = "1.13.0" @@ -2365,6 +2500,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -3028,6 +3169,24 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "sourcemap" +version = "9.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "314d62a489431668f719ada776ca1d49b924db951b7450f8974c9ae51ab05ad7" +dependencies = [ + "base64-simd", + "bitvec", + "data-encoding", + "debugid", + "if_chain", + "rustc-hash", + "serde", + "serde_json", + "unicode-id-start", + "url", +] + [[package]] name = "spin" version = "0.9.8" @@ -3332,6 +3491,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.27.0" @@ -3663,6 +3828,12 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" +[[package]] +name = "unicode-id-start" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81b79ad29b5e19de4260020f8919b443b2ef0277d242ce532ec7b7a2cc8b6007" + [[package]] name = "unicode-ident" version = "1.0.22" @@ -3690,6 +3861,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "universal-hash" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc1de2c688dc15305988b563c3854064043356019f97a4b46276fe734c4f07ea" +dependencies = [ + "crypto-common 0.1.7", + "subtle", +] + [[package]] name = "unsafe-libyaml" version = "0.2.11" @@ -4364,6 +4545,15 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xmlparser" version = "0.13.6" diff --git a/Cargo.toml b/Cargo.toml index 9c84f11..85ac6db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,9 @@ regex = "1.12.2" serde = "1.0.228" serde_json = "1.0.145" sha2 = "0.11" +aes-gcm = "0.10.3" +hex = "0.4.3" +sourcemap = "9.3.2" sqlx = { version = "0.8.6", features = ["runtime-tokio-rustls", "postgres", "sqlite", "macros", "uuid", "time"] } tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread", "signal"] } zstd = "0.13.3" diff --git a/src/batch_queue/mod.rs b/src/batch_queue/mod.rs index 6bda7e7..0767023 100644 --- a/src/batch_queue/mod.rs +++ b/src/batch_queue/mod.rs @@ -1,9 +1,11 @@ mod backup_store; pub use backup_store::BackupStore; +use crate::error_tracking::sourcemaps::SourcemapResolver; use crate::polar::{PolarClient, UsageCounts}; use crate::tinybird::{ - ErrorRow, ErrorTrackingRow, ModsEventRow, ReplayRow, TinybirdClient, WebEventRow, WebVitalRow, + ErrorOccurrenceV3Row, ErrorRow, ErrorTrackingRow, ModsEventRow, ReplayRow, TinybirdClient, + WebEventRow, WebVitalRow, }; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -82,6 +84,11 @@ pub enum QueuedEvent { #[serde(skip_serializing_if = "Option::is_none")] tracking: Option, }, + ErrorOccurrenceV3 { + row: Box, + #[serde(skip_serializing_if = "Option::is_none")] + tracking: Option, + }, WebVital { row: WebVitalRow, #[serde(skip_serializing_if = "Option::is_none")] @@ -101,6 +108,7 @@ impl QueuedEvent { QueuedEvent::ModsEvent { .. } => "mods_events", QueuedEvent::Error(_) => "errors", QueuedEvent::ErrorTracking { .. } => "error_occurences_v2", + QueuedEvent::ErrorOccurrenceV3 { .. } => "error_tracking_v3", QueuedEvent::WebVital { .. } => "web_vitals", QueuedEvent::Replay { .. } => "session_replays", } @@ -115,6 +123,7 @@ struct InMemoryBatch { mods_events: Vec<(ModsEventRow, Option)>, errors: Vec, error_trackings: Vec<(ErrorTrackingRow, Option)>, + error_occurrences_v3: Vec<(ErrorOccurrenceV3Row, Option)>, web_vitals: Vec<(WebVitalRow, Option)>, replays: Vec<(ReplayRow, Option)>, } @@ -126,6 +135,7 @@ impl Default for InMemoryBatch { mods_events: Vec::with_capacity(INITIAL_BATCH_CAPACITY), errors: Vec::new(), error_trackings: Vec::new(), + error_occurrences_v3: Vec::new(), web_vitals: Vec::with_capacity(INITIAL_BATCH_CAPACITY / 4), replays: Vec::new(), } @@ -138,6 +148,7 @@ impl InMemoryBatch { && self.mods_events.is_empty() && self.errors.is_empty() && self.error_trackings.is_empty() + && self.error_occurrences_v3.is_empty() && self.web_vitals.is_empty() && self.replays.is_empty() } @@ -147,6 +158,7 @@ impl InMemoryBatch { + self.mods_events.len() + self.errors.len() + self.error_trackings.len() + + self.error_occurrences_v3.len() + self.web_vitals.len() + self.replays.len() } @@ -159,6 +171,9 @@ impl InMemoryBatch { QueuedEvent::ErrorTracking { row, tracking } => { self.error_trackings.push((*row, tracking)) } + QueuedEvent::ErrorOccurrenceV3 { row, tracking } => { + self.error_occurrences_v3.push((*row, tracking)) + } QueuedEvent::WebVital { row, tracking } => self.web_vitals.push((row, tracking)), QueuedEvent::Replay { row, tracking } => self.replays.push((row, tracking)), } @@ -186,6 +201,14 @@ impl InMemoryBatch { tracking, } })); + result.extend( + self.error_occurrences_v3 + .into_iter() + .map(|(row, tracking)| QueuedEvent::ErrorOccurrenceV3 { + row: Box::new(row), + tracking, + }), + ); result.extend( self.web_vitals .into_iter() @@ -203,6 +226,7 @@ impl InMemoryBatch { let estimated_owners = (self.web_events.len() + self.mods_events.len() + self.error_trackings.len() + + self.error_occurrences_v3.len() + self.web_vitals.len() + self.replays.len()) .min(100); @@ -230,6 +254,7 @@ impl InMemoryBatch { count_usage!(&self.web_events, events); count_usage!(&self.mods_events, events); count_usage!(&self.error_trackings, error_tracking); + count_usage!(&self.error_occurrences_v3, error_tracking); count_usage!(&self.web_vitals, web_vitals); for (row, ctx) in &self.replays { if let Some(ctx) = ctx { @@ -258,9 +283,11 @@ struct BatchSendResult { failed_mods_events: Vec<(ModsEventRow, Option)>, failed_errors: Vec, failed_error_trackings: Vec<(ErrorTrackingRow, Option)>, + failed_error_occurrences_v3: Vec<(ErrorOccurrenceV3Row, Option)>, failed_web_vitals: Vec<(WebVitalRow, Option)>, failed_replays: Vec<(ReplayRow, Option)>, had_permanent_failure: bool, + errors: Vec, } impl BatchSendResult { @@ -269,6 +296,7 @@ impl BatchSendResult { || !self.failed_mods_events.is_empty() || !self.failed_errors.is_empty() || !self.failed_error_trackings.is_empty() + || !self.failed_error_occurrences_v3.is_empty() || !self.failed_web_vitals.is_empty() || !self.failed_replays.is_empty() } @@ -279,6 +307,7 @@ impl BatchSendResult { mods_events: self.failed_mods_events, errors: self.failed_errors, error_trackings: self.failed_error_trackings, + error_occurrences_v3: self.failed_error_occurrences_v3, web_vitals: self.failed_web_vitals, replays: self.failed_replays, } @@ -289,14 +318,40 @@ impl BatchSendResult { + self.failed_mods_events.len() + self.failed_errors.len() + self.failed_error_trackings.len() + + self.failed_error_occurrences_v3.len() + self.failed_web_vitals.len() + self.failed_replays.len() } + + fn error_summary(&self) -> String { + if self.errors.is_empty() { + "unknown error".to_string() + } else { + self.errors.join("; ") + } + } +} + +fn record_batch_error( + result: &mut BatchSendResult, + datasource: &'static str, + rows: usize, + error: &crate::tinybird::TinybirdError, +) { + let permanence = if error.is_transient() { + "transient" + } else { + "permanent" + }; + result + .errors + .push(format!("{datasource} rows={rows} {permanence}: {error}")); } pub struct BatchQueue { tinybird: Arc, polar: Option>, + sourcemaps: Option>, pub(crate) backup_store: Arc, sender: mpsc::Sender, in_memory_batch: Arc>, @@ -315,6 +370,7 @@ impl BatchQueue { polar: Option>, backup_path: &Path, backup_enabled: bool, + sourcemaps: Option>, ) -> Arc { let backup_store = Arc::new(if backup_enabled { BackupStore::new(backup_path) @@ -328,6 +384,7 @@ impl BatchQueue { let queue = Arc::new(Self { tinybird, polar, + sourcemaps, backup_store, sender, in_memory_batch, @@ -489,10 +546,12 @@ impl BatchQueue { if result.had_permanent_failure { error!( + errors = %result.error_summary(), "Permanent failure, backing up {} events", - result.failure_count() + result.failure_count(), ); - self.backup_events(result.into_in_memory_batch(), "Permanent API error") + let backup_reason = format!("Permanent API error: {}", result.error_summary()); + self.backup_events(result.into_in_memory_batch(), &backup_reason) .await; return; } @@ -501,19 +560,23 @@ impl BatchQueue { if retry_count >= MAX_RETRIES { error!( + errors = %result.error_summary(), "Batch failed after {} retries, backing up {} events", retry_count, result.failure_count() ); - self.backup_events(result.into_in_memory_batch(), "Max retries exceeded") + let backup_reason = format!("Max retries exceeded: {}", result.error_summary()); + self.backup_events(result.into_in_memory_batch(), &backup_reason) .await; return; } + let error_summary = result.error_summary(); current_batch = result.into_in_memory_batch(); let delay = Self::calculate_retry_delay(retry_count); warn!( + errors = %error_summary, "Batch send failed (attempt {}), retrying {} events in {:?}", retry_count, current_batch.total_count(), @@ -532,6 +595,7 @@ impl BatchQueue { mods_events, errors, error_trackings, + error_occurrences_v3, web_vitals, replays, } = batch; @@ -539,6 +603,9 @@ impl BatchQueue { let web_event_rows: Vec<_> = web_events.iter().map(|(e, _)| e).collect(); let mods_event_rows: Vec<_> = mods_events.iter().map(|(e, _)| e).collect(); let error_tracking_rows: Vec<_> = error_trackings.iter().map(|(e, _)| e).collect(); + let error_occurrences_v3 = self.enrich_error_occurrences_v3(error_occurrences_v3).await; + let error_occurrence_v3_rows: Vec<_> = + error_occurrences_v3.iter().map(|(e, _)| e).collect(); let web_vital_rows: Vec<_> = web_vitals.iter().map(|(e, _)| e).collect(); let replay_rows: Vec<_> = replays.iter().map(|(e, _)| e).collect(); @@ -547,6 +614,7 @@ impl BatchQueue { mods_events_res, errors_res, error_trackings_res, + error_occurrences_v3_res, web_vitals_res, replays_res, ) = tokio::join!( @@ -580,6 +648,15 @@ impl BatchQueue { .await } }, + async { + if error_occurrence_v3_rows.is_empty() { + Ok(()) + } else { + self.tinybird + .insert_error_occurrences_v3(&error_occurrence_v3_rows) + .await + } + }, async { if web_vital_rows.is_empty() { Ok(()) @@ -597,6 +674,7 @@ impl BatchQueue { ); if let Err(e) = web_events_res { + record_batch_error(&mut result, "web_events", web_events.len(), &e); if !e.is_transient() { result.had_permanent_failure = true; } @@ -604,6 +682,7 @@ impl BatchQueue { } if let Err(e) = mods_events_res { + record_batch_error(&mut result, "mods_events", mods_events.len(), &e); if !e.is_transient() { result.had_permanent_failure = true; } @@ -611,6 +690,7 @@ impl BatchQueue { } if let Err(e) = errors_res { + record_batch_error(&mut result, "errors", errors.len(), &e); if !e.is_transient() { result.had_permanent_failure = true; } @@ -618,13 +698,33 @@ impl BatchQueue { } if let Err(e) = error_trackings_res { + record_batch_error( + &mut result, + "error_occurences_v2", + error_trackings.len(), + &e, + ); if !e.is_transient() { result.had_permanent_failure = true; } result.failed_error_trackings = error_trackings; } + if let Err(e) = error_occurrences_v3_res { + record_batch_error( + &mut result, + "error_tracking_v3", + error_occurrences_v3.len(), + &e, + ); + if !e.is_transient() { + result.had_permanent_failure = true; + } + result.failed_error_occurrences_v3 = error_occurrences_v3; + } + if let Err(e) = web_vitals_res { + record_batch_error(&mut result, "web_vitals", web_vitals.len(), &e); if !e.is_transient() { result.had_permanent_failure = true; } @@ -632,6 +732,7 @@ impl BatchQueue { } if let Err(e) = replays_res { + record_batch_error(&mut result, "session_replays", replays.len(), &e); if !e.is_transient() { result.had_permanent_failure = true; } @@ -641,6 +742,25 @@ impl BatchQueue { result } + async fn enrich_error_occurrences_v3( + &self, + rows: Vec<(ErrorOccurrenceV3Row, Option)>, + ) -> Vec<(ErrorOccurrenceV3Row, Option)> { + if rows.is_empty() || self.sourcemaps.is_none() { + return rows; + } + + let resolver = self.sourcemaps.as_deref(); + let mut enriched = Vec::with_capacity(rows.len()); + for (row, tracking) in rows { + enriched.push(( + crate::error_tracking::v3::enrich_with_sourcemap(resolver, row).await, + tracking, + )); + } + enriched + } + async fn backup_events(&self, batch: InMemoryBatch, error_msg: &str) { let events = batch.into_queued_events(); warn!("Backing up {} events: {}", events.len(), error_msg); diff --git a/src/error_tracking/fingerprint.rs b/src/error_tracking/fingerprint.rs new file mode 100644 index 0000000..317d1d1 --- /dev/null +++ b/src/error_tracking/fingerprint.rs @@ -0,0 +1,166 @@ +use crate::utils::sha256_hex; +use regex::Regex; +use std::sync::LazyLock; + +static UUID_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"(?i)\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b") + .expect("valid uuid regex") +}); +static HEX_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?i)\b0x[0-9a-f]+\b").expect("valid hex regex")); +static QUOTED_RE: LazyLock = + LazyLock::new(|| Regex::new(r#""[^"]*"|'[^']*'|`[^`]*`"#).expect("valid quoted regex")); +static NUMBER_RE: LazyLock = + LazyLock::new(|| Regex::new(r"\b\d+(?:\.\d+)?\b").expect("valid number regex")); +static URL_OR_PATH_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"(?i)(https?://)?([^/\s\)]+/)+([^/\s\):]+)").expect("valid path regex") +}); +static HASHISH_RE: LazyLock = + LazyLock::new(|| Regex::new(r"\b[0-9a-f]{12,}\b").expect("valid hash regex")); +static WHITESPACE_RE: LazyLock = + LazyLock::new(|| Regex::new(r"\s+").expect("valid whitespace regex")); + +pub fn exact_hash(error_type: &str, message: &str, stacktrace: &str) -> String { + sha256_hex(&[ + error_type.as_bytes(), + b"\x1f", + message.as_bytes(), + b"\x1f", + stacktrace.as_bytes(), + ]) +} + +pub fn group_hash(error_type: &str, message: &str, stacktrace: &str) -> String { + let normalized = normalize_for_grouping(error_type, message, stacktrace); + sha256_hex(&[normalized.as_bytes()]) +} + +fn normalize_for_grouping(error_type: &str, message: &str, stacktrace: &str) -> String { + let mut out = String::new(); + out.push_str(&normalize_piece(error_type)); + out.push('\n'); + out.push_str(&normalize_piece(message)); + + for line in stacktrace.lines().take(50) { + let normalized = normalize_piece(line); + if normalized.is_empty() { + continue; + } + out.push('\n'); + out.push_str(&normalized); + } + + out +} + +fn normalize_piece(input: &str) -> String { + let mut value = input.trim().to_ascii_lowercase(); + value = UUID_RE.replace_all(&value, "").into_owned(); + value = HEX_RE.replace_all(&value, "").into_owned(); + value = HASHISH_RE.replace_all(&value, "").into_owned(); + value = QUOTED_RE.replace_all(&value, "").into_owned(); + value = remove_frame_line_columns(&value); + value = URL_OR_PATH_RE.replace_all(&value, "$3").into_owned(); + value = NUMBER_RE.replace_all(&value, "").into_owned(); + value = WHITESPACE_RE.replace_all(&value, " ").into_owned(); + value.trim().to_string() +} + +fn remove_frame_line_columns(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + let mut offset = 0; + while let Some(relative_start) = input[offset..].find(':') { + let start = offset + relative_start; + out.push_str(&input[offset..start]); + if let Some(end) = line_column_suffix_end(&input[start..]) { + offset = start + end; + } else { + out.push(':'); + offset = start + 1; + } + } + out.push_str(&input[offset..]); + remove_trailing_line_column(&out) +} + +fn line_column_suffix_end(input: &str) -> Option { + let mut bytes = input.as_bytes(); + if bytes.first() != Some(&b':') { + return None; + } + bytes = &bytes[1..]; + let first_digits = bytes.iter().take_while(|b| b.is_ascii_digit()).count(); + if first_digits == 0 || bytes.get(first_digits) != Some(&b':') { + return None; + } + bytes = &bytes[first_digits + 1..]; + let second_digits = bytes.iter().take_while(|b| b.is_ascii_digit()).count(); + if second_digits == 0 { + return None; + } + let end = 1 + first_digits + 1 + second_digits; + let next = input.as_bytes().get(end).copied(); + matches!(next, Some(b')') | Some(b' ') | None).then_some(end) +} + +fn remove_trailing_line_column(input: &str) -> String { + let suffix_offset = input.strip_suffix(')').map(|_| 1).unwrap_or(0); + let scan_end = input.len().saturating_sub(suffix_offset); + let Some((before_col, _)) = split_suffix_number(&input[..scan_end]) else { + return input.to_string(); + }; + let Some(before_colon) = before_col.strip_suffix(':') else { + return input.to_string(); + }; + let Some((before_line, _)) = split_suffix_number(before_colon) else { + return input.to_string(); + }; + let Some(prefix) = before_line.strip_suffix(':') else { + return input.to_string(); + }; + + let mut out = prefix.to_string(); + if suffix_offset == 1 { + out.push(')'); + } + out +} + +fn split_suffix_number(input: &str) -> Option<(&str, &str)> { + let start = input + .char_indices() + .rev() + .find_map(|(idx, ch)| (!ch.is_ascii_digit()).then_some(idx + ch.len_utf8())) + .unwrap_or(0); + (start != input.len()).then_some((&input[..start], &input[start..])) +} + +#[cfg(test)] +mod tests { + use super::{group_hash, normalize_piece}; + + #[test] + fn normalizes_noisy_values() { + let normalized = normalize_piece( + r#" at fn (https://cdn.example.com/assets/app.abc123.js:1742:19) id="u-42" 0xabc"#, + ); + + assert_eq!(normalized, "at fn (app.abc123.js) id= "); + } + + #[test] + fn group_hash_ignores_line_column_and_quoted_message_values() { + let a = group_hash( + "TypeError", + "Cannot read property 'name' of user 123", + " at render (/app/static/chunk.js:10:20)", + ); + let b = group_hash( + "TypeError", + "Cannot read property 'email' of user 456", + " at render (/app/static/chunk.js:99:1)", + ); + + assert_eq!(a, b); + } +} diff --git a/src/error_tracking/mod.rs b/src/error_tracking/mod.rs new file mode 100644 index 0000000..35b8e0b --- /dev/null +++ b/src/error_tracking/mod.rs @@ -0,0 +1,3 @@ +pub mod fingerprint; +pub mod sourcemaps; +pub mod v3; diff --git a/src/error_tracking/sourcemaps.rs b/src/error_tracking/sourcemaps.rs new file mode 100644 index 0000000..0dc6b05 --- /dev/null +++ b/src/error_tracking/sourcemaps.rs @@ -0,0 +1,430 @@ +use aes_gcm::{Aes256Gcm, KeyInit, Nonce, aead::Aead}; +use aws_sdk_s3::Client; +use moka::future::Cache; +use sourcemap::SourceMap; +use std::sync::Arc; +use std::time::Duration; +use tracing::warn; +use uuid::Uuid; + +const NONCE_LEN: usize = 12; +const TAG_LEN: usize = 16; +const MAP_CACHE_CAPACITY: u64 = 512; +const MAP_CACHE_TTL: Duration = Duration::from_secs(600); + +#[derive(Clone)] +pub struct SourcemapResolver { + client: Client, + bucket: Arc, + crypto: Arc, + maps: Cache>>, +} + +#[derive(Debug, Clone)] +pub struct MappedStacktrace { + pub stacktrace: String, + pub mapping_used: String, +} + +#[derive(Debug, Clone, Copy)] +struct JavaScriptFrame<'a> { + prefix: &'a str, + file_name: &'a str, + line: u32, + column: u32, + suffix: &'static str, +} + +struct OriginalPosition { + source: String, + line: u32, + column: u32, + name: Option, +} + +struct SourcemapCrypto { + cipher: Aes256Gcm, +} + +impl SourcemapResolver { + pub fn from_env() -> Option { + let bucket = std::env::var("SOURCEMAPS_S3_BUCKET").ok()?; + let endpoint = std::env::var("SOURCEMAPS_S3_ENDPOINT").ok()?; + let access_key_id = std::env::var("SOURCEMAPS_S3_ACCESS_KEY_ID").ok()?; + let secret_access_key = std::env::var("SOURCEMAPS_S3_SECRET_ACCESS_KEY").ok()?; + let file_key = std::env::var("SOURCEMAPS_S3_FILE_ENCRYPTION_KEY").ok()?; + let region = + std::env::var("SOURCEMAPS_S3_REGION").unwrap_or_else(|_| "us-east-1".to_string()); + + let crypto = match SourcemapCrypto::new(&file_key) { + Ok(crypto) => Arc::new(crypto), + Err(()) => { + warn!("Sourcemap resolver disabled: invalid file encryption key"); + return None; + } + }; + + let credentials = aws_sdk_s3::config::Credentials::new( + access_key_id, + secret_access_key, + None, + None, + "env", + ); + let client = Client::from_conf( + aws_sdk_s3::Config::builder() + .behavior_version(aws_sdk_s3::config::BehaviorVersion::latest()) + .region(aws_sdk_s3::config::Region::new(region)) + .credentials_provider(credentials) + .endpoint_url(endpoint) + .force_path_style(true) + .build(), + ); + + Some(Self { + client, + bucket: bucket.into(), + crypto, + maps: Cache::builder() + .max_capacity(MAP_CACHE_CAPACITY) + .time_to_idle(MAP_CACHE_TTL) + .build(), + }) + } + + pub async fn apply_javascript( + &self, + project_id: Uuid, + build_id: &str, + stacktrace: &str, + ) -> Option { + if build_id.is_empty() || stacktrace.is_empty() { + return None; + } + + let mut mapped_any = false; + let mut mapped_stacktrace = String::with_capacity(stacktrace.len()); + + for (idx, line) in stacktrace.lines().enumerate() { + if idx > 0 { + mapped_stacktrace.push('\n'); + } + + let Some(frame) = parse_javascript_frame(line) else { + mapped_stacktrace.push_str(line); + continue; + }; + + match self.apply_frame(project_id, build_id, &frame).await { + Some(mapped) => { + mapped_any = true; + mapped_stacktrace.push_str(&mapped); + } + None => mapped_stacktrace.push_str(line), + } + } + + mapped_any.then(|| MappedStacktrace { + stacktrace: mapped_stacktrace, + mapping_used: format!("javascript:{build_id}"), + }) + } + + async fn apply_frame( + &self, + project_id: Uuid, + build_id: &str, + frame: &JavaScriptFrame<'_>, + ) -> Option { + let map = self.load_map(project_id, build_id, frame.file_name).await?; + let original = apply_source_map(&map, frame.line, frame.column)?; + + let mut out = String::with_capacity( + frame.prefix.len() + + frame.suffix.len() + + original.source.len() + + original.name.as_ref().map(String::len).unwrap_or(0) + + 32, + ); + out.push_str(frame.prefix); + push_original_position(&mut out, &original); + out.push_str(frame.suffix); + Some(out) + } + + async fn load_map( + &self, + project_id: Uuid, + build_id: &str, + file_name: &str, + ) -> Option> { + let key = s3_key(project_id, build_id, file_name); + if let Some(map) = self + .maps + .get_with(key.clone(), async move { self.fetch_map(&key).await }) + .await + { + return Some(map); + } + + let basename = file_name.rsplit('/').next().unwrap_or(file_name); + if basename == file_name { + return None; + } + + let fallback_key = s3_key(project_id, build_id, basename); + self.maps + .get_with(fallback_key.clone(), async move { + self.fetch_map(&fallback_key).await + }) + .await + } + + async fn fetch_map(&self, key: &str) -> Option> { + let response = self + .client + .get_object() + .bucket(self.bucket.as_ref()) + .key(key) + .send() + .await + .map_err(|error| { + warn!(key, %error, "Failed to fetch sourcemap"); + }) + .ok()?; + let encrypted = response + .body + .collect() + .await + .map_err(|error| { + warn!(key, %error, "Failed to read sourcemap object"); + }) + .ok()? + .to_vec(); + let compressed = self + .crypto + .decrypt(&encrypted) + .map_err(|()| { + warn!(key, "Failed to decrypt sourcemap"); + }) + .ok()?; + let data = zstd::stream::decode_all(compressed.as_slice()) + .map_err(|error| { + warn!(key, %error, "Failed to decompress sourcemap"); + }) + .ok()?; + let map = SourceMap::from_slice(&data) + .map_err(|error| { + warn!(key, %error, "Failed to parse sourcemap"); + }) + .ok()?; + Some(Arc::new(map)) + } +} + +impl SourcemapCrypto { + fn new(hex_key: &str) -> Result { + let key_bytes = hex::decode(hex_key).map_err(|_| ())?; + if key_bytes.len() != 32 { + return Err(()); + } + let cipher = Aes256Gcm::new_from_slice(&key_bytes).map_err(|_| ())?; + Ok(Self { cipher }) + } + + fn decrypt(&self, data: &[u8]) -> Result, ()> { + if data.len() < NONCE_LEN + TAG_LEN { + return Err(()); + } + + let nonce_bytes = &data[..NONCE_LEN]; + let tag = &data[NONCE_LEN..NONCE_LEN + TAG_LEN]; + let ciphertext = &data[NONCE_LEN + TAG_LEN..]; + + let mut payload = Vec::with_capacity(ciphertext.len() + TAG_LEN); + payload.extend_from_slice(ciphertext); + payload.extend_from_slice(tag); + + self.cipher + .decrypt(Nonce::from_slice(nonce_bytes), payload.as_slice()) + .map_err(|_| ()) + } +} + +fn apply_source_map(map: &SourceMap, line: u32, column: u32) -> Option { + let token = map.lookup_token(line.saturating_sub(1), column.saturating_sub(1))?; + let source = token.get_source()?; + let src_line = token.get_src_line(); + let src_col = token.get_src_col(); + + if src_line == u32::MAX || src_col == u32::MAX { + return None; + } + + Some(OriginalPosition { + source: source.to_string(), + line: src_line.saturating_add(1), + column: src_col.saturating_add(1), + name: token.get_name().map(ToString::to_string), + }) +} + +fn push_original_position(out: &mut String, original: &OriginalPosition) { + if let Some(name) = original.name.as_deref().filter(|name| !name.is_empty()) { + out.push_str(name); + out.push_str(" ("); + out.push_str(&original.source); + out.push(':'); + push_u32(out, original.line); + out.push(':'); + push_u32(out, original.column); + out.push(')'); + } else { + out.push_str(&original.source); + out.push(':'); + push_u32(out, original.line); + out.push(':'); + push_u32(out, original.column); + } +} + +fn push_u32(out: &mut String, value: u32) { + use std::fmt::Write; + let _ = write!(out, "{value}"); +} + +fn parse_javascript_frame(line: &str) -> Option> { + let trimmed = line.trim_end(); + let mut end = trimmed.len(); + let suffix = if trimmed.ends_with(')') { + end -= 1; + ")" + } else { + "" + }; + + let before_suffix = &trimmed[..end]; + let (before_column, column) = split_trailing_u32(before_suffix)?; + let before_column = before_column.strip_suffix(':')?; + let (before_line, line_no) = split_trailing_u32(before_column)?; + let file_part = before_line.strip_suffix(':')?; + + let file_start = file_part + .rfind([' ', '(', '@']) + .map(|idx| idx + 1) + .unwrap_or(0); + let raw_file = &file_part[file_start..]; + if raw_file.is_empty() { + return None; + } + + Some(JavaScriptFrame { + prefix: &trimmed[..file_start], + file_name: normalize_file_name(raw_file), + line: line_no, + column, + suffix, + }) +} + +fn split_trailing_u32(input: &str) -> Option<(&str, u32)> { + let start = input + .char_indices() + .rev() + .find_map(|(idx, ch)| (!ch.is_ascii_digit()).then_some(idx + ch.len_utf8())) + .unwrap_or(0); + if start == input.len() { + return None; + } + Some((&input[..start], input[start..].parse().ok()?)) +} + +fn normalize_file_name(raw_file: &str) -> &str { + let without_query = raw_file.split_once('?').map_or(raw_file, |(path, _)| path); + let without_query = without_query + .split_once('#') + .map_or(without_query, |(path, _)| path); + + let without_scheme = without_query + .split_once("://") + .and_then(|(_, rest)| rest.split_once('/').map(|(_, path)| path)) + .unwrap_or(without_query); + without_scheme.trim_start_matches('/') +} + +fn s3_key(project_id: Uuid, build_id: &str, file_name: &str) -> String { + let map_suffix = if file_name.ends_with(".map") { + "" + } else { + ".map" + }; + let mut key = + String::with_capacity(36 + 1 + build_id.len() + 1 + file_name.len() + map_suffix.len()); + use std::fmt::Write; + let _ = write!(key, "{project_id}"); + key.push('/'); + key.push_str(build_id); + key.push('/'); + key.push_str(file_name); + key.push_str(map_suffix); + key +} + +#[cfg(test)] +mod tests { + use super::{normalize_file_name, parse_javascript_frame, s3_key}; + use uuid::Uuid; + + #[test] + fn parses_chrome_frame() { + let frame = + parse_javascript_frame(" at render (https://cdn.test/assets/app.js:12:34)").unwrap(); + + assert_eq!(frame.prefix, " at render ("); + assert_eq!(frame.file_name, "assets/app.js"); + assert_eq!(frame.line, 12); + assert_eq!(frame.column, 34); + assert_eq!(frame.suffix, ")"); + } + + #[test] + fn parses_firefox_frame() { + let frame = parse_javascript_frame("render@https://cdn.test/assets/app.js:12:34").unwrap(); + + assert_eq!(frame.prefix, "render@"); + assert_eq!(frame.file_name, "assets/app.js"); + assert_eq!(frame.line, 12); + assert_eq!(frame.column, 34); + } + + #[test] + fn normalizes_file_name() { + assert_eq!( + normalize_file_name("https://cdn.test/assets/app.js?v=1"), + "assets/app.js" + ); + assert_eq!(normalize_file_name("/assets/chunk.js"), "assets/chunk.js"); + } + + #[test] + fn appends_map_suffix() { + let project_id = Uuid::parse_str("01954b9b-7b1d-72b8-8af3-f8d058f60b79").unwrap(); + assert_eq!( + s3_key(project_id, "build-1", "app.js"), + "01954b9b-7b1d-72b8-8af3-f8d058f60b79/build-1/app.js.map" + ); + assert_eq!( + s3_key(project_id, "build-1", "app.js.map"), + "01954b9b-7b1d-72b8-8af3-f8d058f60b79/build-1/app.js.map" + ); + } + + #[test] + fn builds_matching_s3_key() { + let project_id = Uuid::parse_str("01954b9b-7b1d-72b8-8af3-f8d058f60b79").unwrap(); + assert_eq!( + s3_key(project_id, "build-1", "app.js.map"), + "01954b9b-7b1d-72b8-8af3-f8d058f60b79/build-1/app.js.map" + ); + } +} diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs new file mode 100644 index 0000000..ac3bb87 --- /dev/null +++ b/src/error_tracking/v3.rs @@ -0,0 +1,103 @@ +use crate::error_tracking::fingerprint; +use crate::error_tracking::sourcemaps::SourcemapResolver; +use crate::models::ErrorTracking; +use crate::tinybird::{ErrorOccurrenceV3Row, WebEventRow}; +use chrono::Utc; +use serde_json::Value; +use std::collections::HashMap; +use uuid::Uuid; + +pub struct WebOccurrenceInput<'a> { + pub project_id: Uuid, + pub release: Option<&'a str>, + pub user_id: Option<&'a str>, + pub session_id: Option<&'a str>, + pub window_id: Option<&'a str>, + pub sdk_name: Option<&'a str>, + pub sdk_version: Option<&'a str>, + pub context: &'a str, +} + +pub fn build_web_occurrence( + input: &WebOccurrenceInput<'_>, + error: &ErrorTracking, +) -> ErrorOccurrenceV3Row { + let stacktrace = error + .error + .stack + .as_ref() + .map(|stack| stack.join("\n")) + .unwrap_or_default(); + let error_type = error.error.error.clone(); + let error_message = error.error.message.clone().unwrap_or_default(); + let source_stack = stacktrace.as_str(); + + ErrorOccurrenceV3Row { + timestamp: Utc::now(), + project_id: input.project_id, + // TODO(error-tracking-v3): hardcoded to "prod" while v3 is being tested. + // Replace this with the SDK/request-provided environment once grouping and + // release behavior are verified in production data. + environment: "prod".to_string(), + release: input.release.unwrap_or_default().to_string(), + group_hash: fingerprint::group_hash(&error_type, &error_message, source_stack), + exact_hash: fingerprint::exact_hash(&error_type, &error_message, source_stack), + error_type, + error_message, + handled: error.handled.unwrap_or(false), + stacktrace, + mapped_stacktrace: None, + mapping_used: None, + user_id: input.user_id.unwrap_or_default().to_string(), + session_id: input.session_id.unwrap_or_default().to_string(), + window_id: input.window_id.unwrap_or_default().to_string(), + platform: "web".to_string(), + runtime: "browser".to_string(), + sdk_name: input.sdk_name.unwrap_or_default().to_string(), + sdk_version: input.sdk_version.unwrap_or_default().to_string(), + context: input.context.to_string(), + } +} + +pub async fn enrich_with_sourcemap( + resolver: Option<&SourcemapResolver>, + mut row: ErrorOccurrenceV3Row, +) -> ErrorOccurrenceV3Row { + let Some(resolver) = resolver else { + return row; + }; + let build_id = row.release.as_str(); + if build_id.is_empty() || row.stacktrace.is_empty() { + return row; + } + + if let Some(mapped) = resolver + .apply_javascript(row.project_id, build_id, &row.stacktrace) + .await + { + row.group_hash = + fingerprint::group_hash(&row.error_type, &row.error_message, &mapped.stacktrace); + row.exact_hash = + fingerprint::exact_hash(&row.error_type, &row.error_message, &mapped.stacktrace); + row.mapped_stacktrace = Some(mapped.stacktrace); + row.mapping_used = Some(mapped.mapping_used); + } + + row +} + +pub fn web_context(row: &WebEventRow, custom: &HashMap) -> String { + let mut context = match serde_json::to_value(row) { + Ok(Value::Object(context)) => context, + _ => serde_json::Map::new(), + }; + + if !custom.is_empty() { + context.insert( + "custom".to_string(), + Value::Object(custom.iter().map(|(k, v)| (k.clone(), v.clone())).collect()), + ); + } + + serde_json::to_string(&context).unwrap_or_else(|_| "{}".to_string()) +} diff --git a/src/handler/mod.rs b/src/handler/mod.rs index ab46308..7887524 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -16,7 +16,9 @@ pub use web::web; use self::java_stack_parameterization::build_parameterized_error_rows; use crate::batch_queue::{BatchQueue, FailedRequest, QueuedEvent, RequestType, TrackingContext}; use crate::models::{DataSource, Error, ErrorTracking}; -use crate::tinybird::{ErrorRow, ErrorTrackingRow, ModsEventRow, WebEventRow}; +use crate::tinybird::{ + ErrorOccurrenceV3Row, ErrorRow, ErrorTrackingRow, ModsEventRow, WebEventRow, +}; use crate::utils::sha256_hex; use axum::Json; use axum::http::{HeaderMap, StatusCode}; @@ -415,63 +417,6 @@ fn to_custom_json(data: &HashMap) -> String { } } -fn build_web_entry_data( - session_id: Option<&str>, - country: Option<&str>, - row: &WebEventRow, - custom: &HashMap, -) -> String { - let mut data = match serde_json::to_value(row) { - Ok(Value::Object(data)) => data, - _ => serde_json::Map::new(), - }; - for key in [ - "id", - "project_id", - "session_id", - "country", - "custom", - "created_at", - ] { - data.remove(key); - } - data.retain(|_, value| !value.is_null()); - - data.insert( - "session_id".to_string(), - Value::String(session_id.unwrap_or_default().to_string()), - ); - data.insert( - "country".to_string(), - Value::String(country.unwrap_or_default().to_string()), - ); - - for (key, value) in custom { - data.insert(key.clone(), value.clone()); - } - - serde_json::to_string(&data).unwrap_or_else(|_| "{}".to_string()) -} - -pub fn build_web_error_entry_details( - session_id: Option<&str>, - country: Option<&str>, - row: &WebEventRow, - custom: &HashMap, -) -> ErrorEntryDetails { - ErrorEntryDetails { - source_kind: "web-analytics".to_string(), - entry_session_id: session_id.unwrap_or_default().to_string(), - entry_country: country.unwrap_or_default().to_string(), - entry_browser: row.browser.clone().unwrap_or_default(), - entry_device: row.device.clone().unwrap_or_default(), - entry_os: row.os.clone().unwrap_or_default(), - os_version: row.os_version.clone().unwrap_or_default(), - entry_data: build_web_entry_data(session_id, country, row, custom), - ..ErrorEntryDetails::default() - } -} - pub fn build_mods_error_entry_details( country: Option<&str>, row: &ModsEventRow, @@ -773,6 +718,27 @@ pub async fn insert_error_entries( Ok(()) } +pub async fn insert_error_occurrence_v3( + batch_queue: &BatchQueue, + row: ErrorOccurrenceV3Row, + tracking: Option, +) -> Result<(), HandlerResponse> { + batch_queue + .queue_event(QueuedEvent::ErrorOccurrenceV3 { + row: Box::new(row), + tracking, + }) + .await + .map_err(|e| { + error!("Failed to queue error occurrence v3: {}", e); + error_response( + StatusCode::INTERNAL_SERVER_ERROR, + "Failed to queue error occurrence", + ) + })?; + Ok(()) +} + pub fn resolve_identity_key( session_id: Option<&str>, fallback_identifier: Option<&str>, @@ -980,17 +946,14 @@ async fn process_web_request( request.country.clone(), &valid_custom, ); - let error_entry_details = build_web_error_entry_details( - parsed.session_id.as_deref(), - request.country.as_deref(), - &event_row, - &valid_custom, - ); - - let data_entry_id = - insert_web_event(batch_queue, event_row.clone(), Some(tracking_ctx.clone())) - .await - .map_err(|_| "Failed to queue event".to_string())?; + let should_process_errors = ctx.error_tracking_enabled && has_errors; + let error_v3_context = should_process_errors.then(|| { + parsed + .context + .as_ref() + .map(|value| serde_json::to_string(value).unwrap_or_else(|_| "{}".to_string())) + .unwrap_or_else(|| crate::error_tracking::v3::web_context(&event_row, &valid_custom)) + }); if let Some(session_id) = parsed.session_id.as_deref() && let Some(replay_storage) = replay_storage @@ -1013,35 +976,34 @@ async fn process_web_request( warn!("Failed to persist replay filter metadata: {}", error); } - if ctx.error_tracking_enabled - && let Some(errors) = parsed.errors - { + insert_web_event(batch_queue, event_row, Some(tracking_ctx.clone())) + .await + .map_err(|_| "Failed to queue event".to_string())?; + + if should_process_errors && let Some(errors) = parsed.errors { + let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); + // The browser SDK sends this as `buildId`; the Tinybird v3 schema stores it as `release`. + let release = parsed.build_id.as_deref(); for mut error in errors { if error.session_id.is_none() { error.session_id = parsed.session_id.clone(); } - if error.build_id.is_none() { - error.build_id = parsed.build_id.clone(); - } - let identity_key = resolve_identity_key( - error.session_id.as_deref(), - Some(fallback_identity.as_str()), - ); - insert_error_entries( - batch_queue, - ctx.project_id, - Some(data_entry_id), - error, - ErrorEntryParams { - identity_key, - context: None, - details: error_entry_details.clone(), - tracking_ctx: Some(tracking_ctx.clone()), - stack_processing: ErrorStackProcessing::Raw, + let occurrence = crate::error_tracking::v3::build_web_occurrence( + &crate::error_tracking::v3::WebOccurrenceInput { + project_id: ctx.project_id, + release, + user_id: Some(fallback_identity.as_str()), + session_id: error.session_id.as_deref(), + window_id: parsed.window_id.as_deref(), + sdk_name: parsed.sdk_name.as_deref(), + sdk_version: parsed.sdk_version.as_deref(), + context: error_v3_context, }, - ) - .await - .map_err(|_| "Failed to queue error".to_string())?; + &error, + ); + insert_error_occurrence_v3(batch_queue, occurrence, Some(tracking_ctx.clone())) + .await + .map_err(|_| "Failed to queue error occurrence".to_string())?; } if let Some(session_id) = parsed.session_id.as_deref() diff --git a/src/handler/web.rs b/src/handler/web.rs index 3a2fb57..86f89b8 100644 --- a/src/handler/web.rs +++ b/src/handler/web.rs @@ -1,11 +1,11 @@ use super::{ - EncodingQuery, ErrorEntryParams, ErrorStackProcessing, WEB_EVENT_FIELDS, - build_web_error_entry_details, check_ip_allowed, decompress_body, error_response, + EncodingQuery, WEB_EVENT_FIELDS, check_ip_allowed, decompress_body, error_response, extract_known_fields, get_authorization, get_client_ip, get_country, get_request_origin, - insert_error_entries, insert_web_event, load_project_context, resolve_identity_key, - success_response, validate_hostname, + insert_error_occurrence_v3, insert_web_event, load_project_context, success_response, + validate_hostname, }; use crate::batch_queue::{FailedRequest, RequestType, TrackingContext}; +use crate::error_tracking::v3::{WebOccurrenceInput, build_web_occurrence, web_context}; use crate::identity::resolve_person_for_distinct_id; use crate::models::{AppState, ErrorTracking}; use crate::utils::debounce::should_debounce; @@ -31,6 +31,14 @@ pub(crate) struct WebRequest { pub(crate) session_id: Option, #[serde(default)] pub(crate) build_id: Option, + #[serde(default)] + pub(crate) window_id: Option, + #[serde(default)] + pub(crate) sdk_name: Option, + #[serde(default)] + pub(crate) sdk_version: Option, + #[serde(default)] + pub(crate) context: Option, } pub async fn web( @@ -53,6 +61,10 @@ pub async fn web( errors, session_id: parsed_session_id, build_id, + window_id, + sdk_name, + sdk_version, + context, } = match serde_json::from_slice(&body) { Ok(req) => req, Err(_) => return error_response(StatusCode::BAD_REQUEST, "Invalid JSON"), @@ -190,12 +202,13 @@ pub async fn web( country.clone(), &valid_custom, ); - let error_entry_details = build_web_error_entry_details( - session_id.as_deref(), - country.as_deref(), - &event_row, - &valid_custom, - ); + let should_process_errors = ctx.error_tracking_enabled && HAS_ERRORS(&errors); + let error_v3_context = should_process_errors.then(|| { + context + .as_ref() + .map(|value| serde_json::to_string(value).unwrap_or_else(|_| "{}".to_string())) + .unwrap_or_else(|| web_context(&event_row, &valid_custom)) + }); if let Some(session_id) = session_id.as_deref() && let Some(replay_storage) = state.replay_storage.as_deref() @@ -218,41 +231,38 @@ pub async fn web( warn!("Failed to persist replay filter metadata: {}", error); } - let data_entry_id = if is_debounced { - None - } else { + if !is_debounced { match insert_web_event(&state.batch_queue, event_row, Some(tracking_ctx.clone())).await { - Ok(id) => Some(id), + Ok(_) => {} Err(e) => return e, } - }; + } - if ctx.error_tracking_enabled - && let Some(error_list) = errors - { + if should_process_errors && let Some(error_list) = errors { + let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); + // The browser SDK sends this as `buildId`; the Tinybird v3 schema stores it as `release`. + let release = build_id.as_deref(); for mut error in error_list { if error.session_id.is_none() { error.session_id = session_id.clone(); } - if error.build_id.is_none() { - error.build_id = build_id.clone(); - } - let identity_key = resolve_identity_key( - error.session_id.as_deref(), - Some(fallback_identity.as_str()), + let occurrence = build_web_occurrence( + &WebOccurrenceInput { + project_id: ctx.project_id, + release, + user_id: Some(fallback_identity.as_str()), + session_id: error.session_id.as_deref(), + window_id: window_id.as_deref(), + sdk_name: sdk_name.as_deref(), + sdk_version: sdk_version.as_deref(), + context: error_v3_context, + }, + &error, ); - if let Err(e) = insert_error_entries( + if let Err(e) = insert_error_occurrence_v3( &state.batch_queue, - ctx.project_id, - data_entry_id, - error, - ErrorEntryParams { - identity_key, - context: None, - details: error_entry_details.clone(), - tracking_ctx: Some(tracking_ctx.clone()), - stack_processing: ErrorStackProcessing::Raw, - }, + occurrence, + Some(tracking_ctx.clone()), ) .await { diff --git a/src/main.rs b/src/main.rs index 792cdcd..3b5b482 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,6 +21,7 @@ use tower_http::cors::{AllowOrigin, CorsLayer}; use tower_http::decompression::RequestDecompressionLayer; use tracing::{info, warn}; mod batch_queue; +mod error_tracking; mod handler; mod identity; mod models; @@ -110,6 +111,7 @@ async fn main() { polar_client, &backup_path, backup_store_enabled, + error_tracking::sourcemaps::SourcemapResolver::from_env().map(Arc::new), ); let replay_storage = match replay_storage::ReplayStorage::from_env() { Ok(Some(storage)) => { diff --git a/src/tinybird.rs b/src/tinybird.rs index ebfda12..5e108d1 100644 --- a/src/tinybird.rs +++ b/src/tinybird.rs @@ -111,6 +111,33 @@ pub struct ErrorTrackingRow { pub created_at: DateTime, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ErrorOccurrenceV3Row { + #[serde(with = "chrono::serde::ts_milliseconds")] + pub timestamp: DateTime, + pub project_id: Uuid, + pub environment: String, + pub release: String, + pub group_hash: String, + pub exact_hash: String, + pub error_type: String, + pub error_message: String, + pub handled: bool, + pub stacktrace: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub mapped_stacktrace: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub mapping_used: Option, + pub user_id: String, + pub session_id: String, + pub window_id: String, + pub platform: String, + pub runtime: String, + pub sdk_name: String, + pub sdk_version: String, + pub context: String, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WebVitalRow { pub id: Uuid, @@ -271,6 +298,13 @@ impl TinybirdClient { self.send_batch("error_occurences_v2", rows).await } + pub async fn insert_error_occurrences_v3( + &self, + rows: &[&ErrorOccurrenceV3Row], + ) -> Result<(), TinybirdError> { + self.send_batch("error_tracking_v3", rows).await + } + pub async fn insert_web_vitals(&self, rows: &[&WebVitalRow]) -> Result<(), TinybirdError> { self.send_batch("web_vitals", rows).await } From 08fd0c5cf5437e90833480ad87c80bd404861148 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 27 May 2026 10:36:42 +0200 Subject: [PATCH 02/17] port more --- src/batch_queue/mod.rs | 199 +++----------- src/error_tracking/java_fingerprint.rs | 97 +++++++ src/error_tracking/mod.rs | 1 + src/error_tracking/v3.rs | 173 ++++++++++++- src/handler/collect.rs | 49 ++-- src/handler/error.rs | 37 +-- src/handler/java_stack_parameterization.rs | 210 --------------- src/handler/mod.rs | 287 ++------------------- src/tinybird.rs | 55 ---- test.ts | 90 +++++++ 10 files changed, 463 insertions(+), 735 deletions(-) create mode 100644 src/error_tracking/java_fingerprint.rs delete mode 100644 src/handler/java_stack_parameterization.rs create mode 100644 test.ts diff --git a/src/batch_queue/mod.rs b/src/batch_queue/mod.rs index 0767023..5c5a686 100644 --- a/src/batch_queue/mod.rs +++ b/src/batch_queue/mod.rs @@ -4,8 +4,7 @@ pub use backup_store::BackupStore; use crate::error_tracking::sourcemaps::SourcemapResolver; use crate::polar::{PolarClient, UsageCounts}; use crate::tinybird::{ - ErrorOccurrenceV3Row, ErrorRow, ErrorTrackingRow, ModsEventRow, ReplayRow, TinybirdClient, - WebEventRow, WebVitalRow, + ErrorOccurrenceV3Row, ModsEventRow, ReplayRow, TinybirdClient, WebEventRow, WebVitalRow, }; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -78,12 +77,6 @@ pub enum QueuedEvent { #[serde(skip_serializing_if = "Option::is_none")] tracking: Option, }, - Error(ErrorRow), - ErrorTracking { - row: Box, - #[serde(skip_serializing_if = "Option::is_none")] - tracking: Option, - }, ErrorOccurrenceV3 { row: Box, #[serde(skip_serializing_if = "Option::is_none")] @@ -106,8 +99,6 @@ impl QueuedEvent { match self { QueuedEvent::WebEvent { .. } => "web_events", QueuedEvent::ModsEvent { .. } => "mods_events", - QueuedEvent::Error(_) => "errors", - QueuedEvent::ErrorTracking { .. } => "error_occurences_v2", QueuedEvent::ErrorOccurrenceV3 { .. } => "error_tracking_v3", QueuedEvent::WebVital { .. } => "web_vitals", QueuedEvent::Replay { .. } => "session_replays", @@ -121,8 +112,6 @@ const INITIAL_BATCH_CAPACITY: usize = 64; struct InMemoryBatch { web_events: Vec<(WebEventRow, Option)>, mods_events: Vec<(ModsEventRow, Option)>, - errors: Vec, - error_trackings: Vec<(ErrorTrackingRow, Option)>, error_occurrences_v3: Vec<(ErrorOccurrenceV3Row, Option)>, web_vitals: Vec<(WebVitalRow, Option)>, replays: Vec<(ReplayRow, Option)>, @@ -133,8 +122,6 @@ impl Default for InMemoryBatch { Self { web_events: Vec::with_capacity(INITIAL_BATCH_CAPACITY), mods_events: Vec::with_capacity(INITIAL_BATCH_CAPACITY), - errors: Vec::new(), - error_trackings: Vec::new(), error_occurrences_v3: Vec::new(), web_vitals: Vec::with_capacity(INITIAL_BATCH_CAPACITY / 4), replays: Vec::new(), @@ -146,8 +133,6 @@ impl InMemoryBatch { fn is_empty(&self) -> bool { self.web_events.is_empty() && self.mods_events.is_empty() - && self.errors.is_empty() - && self.error_trackings.is_empty() && self.error_occurrences_v3.is_empty() && self.web_vitals.is_empty() && self.replays.is_empty() @@ -156,8 +141,6 @@ impl InMemoryBatch { fn total_count(&self) -> usize { self.web_events.len() + self.mods_events.len() - + self.errors.len() - + self.error_trackings.len() + self.error_occurrences_v3.len() + self.web_vitals.len() + self.replays.len() @@ -167,10 +150,6 @@ impl InMemoryBatch { match event { QueuedEvent::WebEvent { row, tracking } => self.web_events.push((*row, tracking)), QueuedEvent::ModsEvent { row, tracking } => self.mods_events.push((row, tracking)), - QueuedEvent::Error(e) => self.errors.push(e), - QueuedEvent::ErrorTracking { row, tracking } => { - self.error_trackings.push((*row, tracking)) - } QueuedEvent::ErrorOccurrenceV3 { row, tracking } => { self.error_occurrences_v3.push((*row, tracking)) } @@ -194,13 +173,6 @@ impl InMemoryBatch { .into_iter() .map(|(row, tracking)| QueuedEvent::ModsEvent { row, tracking }), ); - result.extend(self.errors.into_iter().map(QueuedEvent::Error)); - result.extend(self.error_trackings.into_iter().map(|(row, tracking)| { - QueuedEvent::ErrorTracking { - row: Box::new(row), - tracking, - } - })); result.extend( self.error_occurrences_v3 .into_iter() @@ -225,7 +197,6 @@ impl InMemoryBatch { fn aggregate_usage(&self) -> AggregatedUsage { let estimated_owners = (self.web_events.len() + self.mods_events.len() - + self.error_trackings.len() + self.error_occurrences_v3.len() + self.web_vitals.len() + self.replays.len()) @@ -253,7 +224,6 @@ impl InMemoryBatch { count_usage!(&self.web_events, events); count_usage!(&self.mods_events, events); - count_usage!(&self.error_trackings, error_tracking); count_usage!(&self.error_occurrences_v3, error_tracking); count_usage!(&self.web_vitals, web_vitals); for (row, ctx) in &self.replays { @@ -281,8 +251,6 @@ impl InMemoryBatch { struct BatchSendResult { failed_web_events: Vec<(WebEventRow, Option)>, failed_mods_events: Vec<(ModsEventRow, Option)>, - failed_errors: Vec, - failed_error_trackings: Vec<(ErrorTrackingRow, Option)>, failed_error_occurrences_v3: Vec<(ErrorOccurrenceV3Row, Option)>, failed_web_vitals: Vec<(WebVitalRow, Option)>, failed_replays: Vec<(ReplayRow, Option)>, @@ -294,8 +262,6 @@ impl BatchSendResult { fn has_failures(&self) -> bool { !self.failed_web_events.is_empty() || !self.failed_mods_events.is_empty() - || !self.failed_errors.is_empty() - || !self.failed_error_trackings.is_empty() || !self.failed_error_occurrences_v3.is_empty() || !self.failed_web_vitals.is_empty() || !self.failed_replays.is_empty() @@ -305,8 +271,6 @@ impl BatchSendResult { InMemoryBatch { web_events: self.failed_web_events, mods_events: self.failed_mods_events, - errors: self.failed_errors, - error_trackings: self.failed_error_trackings, error_occurrences_v3: self.failed_error_occurrences_v3, web_vitals: self.failed_web_vitals, replays: self.failed_replays, @@ -316,8 +280,6 @@ impl BatchSendResult { fn failure_count(&self) -> usize { self.failed_web_events.len() + self.failed_mods_events.len() - + self.failed_errors.len() - + self.failed_error_trackings.len() + self.failed_error_occurrences_v3.len() + self.failed_web_vitals.len() + self.failed_replays.len() @@ -593,8 +555,6 @@ impl BatchQueue { let InMemoryBatch { web_events, mods_events, - errors, - error_trackings, error_occurrences_v3, web_vitals, replays, @@ -602,7 +562,6 @@ impl BatchQueue { let web_event_rows: Vec<_> = web_events.iter().map(|(e, _)| e).collect(); let mods_event_rows: Vec<_> = mods_events.iter().map(|(e, _)| e).collect(); - let error_tracking_rows: Vec<_> = error_trackings.iter().map(|(e, _)| e).collect(); let error_occurrences_v3 = self.enrich_error_occurrences_v3(error_occurrences_v3).await; let error_occurrence_v3_rows: Vec<_> = error_occurrences_v3.iter().map(|(e, _)| e).collect(); @@ -612,8 +571,6 @@ impl BatchQueue { let ( web_events_res, mods_events_res, - errors_res, - error_trackings_res, error_occurrences_v3_res, web_vitals_res, replays_res, @@ -632,22 +589,6 @@ impl BatchQueue { self.tinybird.insert_mods_events(&mods_event_rows).await } }, - async { - if errors.is_empty() { - Ok(()) - } else { - self.tinybird.insert_errors(&errors).await - } - }, - async { - if error_tracking_rows.is_empty() { - Ok(()) - } else { - self.tinybird - .insert_error_trackings(&error_tracking_rows) - .await - } - }, async { if error_occurrence_v3_rows.is_empty() { Ok(()) @@ -689,27 +630,6 @@ impl BatchQueue { result.failed_mods_events = mods_events; } - if let Err(e) = errors_res { - record_batch_error(&mut result, "errors", errors.len(), &e); - if !e.is_transient() { - result.had_permanent_failure = true; - } - result.failed_errors = errors; - } - - if let Err(e) = error_trackings_res { - record_batch_error( - &mut result, - "error_occurences_v2", - error_trackings.len(), - &e, - ); - if !e.is_transient() { - result.had_permanent_failure = true; - } - result.failed_error_trackings = error_trackings; - } - if let Err(e) = error_occurrences_v3_res { record_batch_error( &mut result, @@ -946,6 +866,34 @@ mod tests { } } + fn create_test_error_occurrence() -> QueuedEvent { + QueuedEvent::ErrorOccurrenceV3 { + row: Box::new(ErrorOccurrenceV3Row { + timestamp: Utc::now(), + project_id: Uuid::new_v4(), + environment: "prod".to_string(), + release: String::new(), + group_hash: "group".to_string(), + exact_hash: "exact".to_string(), + error_type: "TestError".to_string(), + error_message: "Test message".to_string(), + handled: false, + stacktrace: "line1".to_string(), + mapped_stacktrace: None, + mapping_used: None, + user_id: String::new(), + session_id: String::new(), + window_id: String::new(), + platform: "web".to_string(), + runtime: "browser".to_string(), + sdk_name: String::new(), + sdk_version: String::new(), + context: "{}".to_string(), + }), + tracking: None, + } + } + mod backup_store_tests { use super::*; @@ -1010,13 +958,7 @@ mod tests { let event = create_test_queued_event(); store.backup_events(&[event], None).await.unwrap(); - let error = QueuedEvent::Error(ErrorRow { - hash: "error-hash".to_string(), - name: "TestError".to_string(), - message: "Test message".to_string(), - stack: vec!["line1".to_string()], - cause_hash: None, - }); + let error = create_test_error_occurrence(); store.backup_events(&[error], None).await.unwrap(); let vital = QueuedEvent::WebVital { @@ -1046,7 +988,7 @@ mod tests { assert_eq!(events.len(), 3); assert!(matches!(events[0].1, QueuedEvent::ModsEvent { .. })); - assert!(matches!(events[1].1, QueuedEvent::Error(_))); + assert!(matches!(events[1].1, QueuedEvent::ErrorOccurrenceV3 { .. })); assert!(matches!(events[2].1, QueuedEvent::WebVital { .. })); } @@ -1133,13 +1075,7 @@ mod tests { batch.push(create_test_queued_event()); batch.push(create_test_queued_event()); - batch.push(QueuedEvent::Error(ErrorRow { - hash: "error-hash".to_string(), - name: "E".to_string(), - message: "M".to_string(), - stack: vec![], - cause_hash: None, - })); + batch.push(create_test_error_occurrence()); assert_eq!(batch.total_count(), 3); } @@ -1149,13 +1085,7 @@ mod tests { let mut batch = InMemoryBatch::default(); batch.push(create_test_queued_event()); - batch.push(QueuedEvent::Error(ErrorRow { - hash: "error-hash".to_string(), - name: "E".to_string(), - message: "M".to_string(), - stack: vec![], - cause_hash: None, - })); + batch.push(create_test_error_occurrence()); batch.push(QueuedEvent::WebVital { row: WebVitalRow { id: Uuid::new_v4(), @@ -1177,10 +1107,9 @@ mod tests { }); assert_eq!(batch.mods_events.len(), 1); - assert_eq!(batch.errors.len(), 1); + assert_eq!(batch.error_occurrences_v3.len(), 1); assert_eq!(batch.web_vitals.len(), 1); assert!(batch.web_events.is_empty()); - assert!(batch.error_trackings.is_empty()); assert!(batch.replays.is_empty()); } @@ -1188,18 +1117,12 @@ mod tests { fn test_into_queued_events() { let mut batch = InMemoryBatch::default(); batch.push(create_test_queued_event()); - batch.push(QueuedEvent::Error(ErrorRow { - hash: "error-hash".to_string(), - name: "E".to_string(), - message: "M".to_string(), - stack: vec![], - cause_hash: None, - })); + batch.push(create_test_error_occurrence()); let queued = batch.into_queued_events(); assert_eq!(queued.len(), 2); assert!(matches!(queued[0], QueuedEvent::ModsEvent { .. })); - assert!(matches!(queued[1], QueuedEvent::Error(_))); + assert!(matches!(queued[1], QueuedEvent::ErrorOccurrenceV3 { .. })); } } @@ -1237,54 +1160,8 @@ mod tests { fn test_datasource_names() { assert_eq!(create_test_queued_event().datasource(), "mods_events"); assert_eq!( - QueuedEvent::Error(ErrorRow { - hash: "error-hash".to_string(), - name: "E".to_string(), - message: "M".to_string(), - stack: vec![], - cause_hash: None, - }) - .datasource(), - "errors" - ); - assert_eq!( - QueuedEvent::ErrorTracking { - row: Box::new(ErrorTrackingRow { - id: Uuid::new_v4(), - project_id: Uuid::new_v4(), - hash: "hash".to_string(), - error_hash: "error-hash".to_string(), - count: 3, - data_entry_id: Some(Uuid::new_v4()), - session_id: None, - identity_key: None, - build_id: None, - plugin_version: String::new(), - source_kind: "error".to_string(), - entry_session_id: String::new(), - entry_country: String::new(), - entry_browser: String::new(), - entry_device: String::new(), - entry_os: String::new(), - player_count: None, - online_mode: None, - minecraft_version: String::new(), - server_type: String::new(), - java_version: String::new(), - java_vendor: String::new(), - os_version: String::new(), - os_arch: String::new(), - core_count: None, - entry_data: "{}".to_string(), - stack_placeholders: "{}".to_string(), - context: None, - handled: None, - created_at: Utc::now(), - }), - tracking: None, - } - .datasource(), - "error_occurences_v2" + create_test_error_occurrence().datasource(), + "error_tracking_v3" ); assert_eq!( QueuedEvent::WebVital { diff --git a/src/error_tracking/java_fingerprint.rs b/src/error_tracking/java_fingerprint.rs new file mode 100644 index 0000000..1914ae7 --- /dev/null +++ b/src/error_tracking/java_fingerprint.rs @@ -0,0 +1,97 @@ +use crate::utils::sha256_hex; +use regex::Regex; +use std::sync::LazyLock; + +static UUID_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"(?i)\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b") + .expect("valid uuid regex") +}); +static HEX_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?i)\b0x[0-9a-f]+\b").expect("valid hex regex")); +static QUOTED_RE: LazyLock = + LazyLock::new(|| Regex::new(r#""[^"]*"|'[^']*'|`[^`]*`"#).expect("valid quoted regex")); +static HASHISH_RE: LazyLock = + LazyLock::new(|| Regex::new(r"(?i)\b[0-9a-f]{12,}\b").expect("valid hash regex")); +static NUMBER_RE: LazyLock = + LazyLock::new(|| Regex::new(r"\b\d+(?:\.\d+)?\b").expect("valid number regex")); +static JAR_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"(?i)\b[A-Za-z0-9._+-]+(?:-\d+(?:\.\d+)*(?:[-+][A-Za-z0-9._-]+)?)?\.jar\b") + .expect("valid jar regex") +}); +static JAVA_FRAME_LINE_RE: LazyLock = + LazyLock::new(|| Regex::new(r"\(([^():]+\.java):\d+\)").expect("valid java frame regex")); +static LAMBDA_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"\$\$Lambda(?:\$[0-9]+)?(?:/[0-9a-fx]+)?").expect("valid lambda regex") +}); +static WHITESPACE_RE: LazyLock = + LazyLock::new(|| Regex::new(r"\s+").expect("valid whitespace regex")); + +pub fn group_hash(error_type: &str, message: &str, stacktrace: &str) -> String { + let normalized = normalize_for_grouping(error_type, message, stacktrace); + sha256_hex(&[normalized.as_bytes()]) +} + +fn normalize_for_grouping(error_type: &str, message: &str, stacktrace: &str) -> String { + let mut out = String::new(); + out.push_str(&normalize_piece(error_type)); + out.push('\n'); + out.push_str(&normalize_piece(message)); + + for line in stacktrace.lines().take(80) { + let normalized = normalize_piece(line); + if normalized.is_empty() { + continue; + } + out.push('\n'); + out.push_str(&normalized); + } + + out +} + +fn normalize_piece(input: &str) -> String { + let mut value = input.trim().to_ascii_lowercase(); + value = UUID_RE.replace_all(&value, "").into_owned(); + value = HEX_RE.replace_all(&value, "").into_owned(); + value = HASHISH_RE.replace_all(&value, "").into_owned(); + value = QUOTED_RE.replace_all(&value, "").into_owned(); + value = JAR_RE.replace_all(&value, "").into_owned(); + value = JAVA_FRAME_LINE_RE.replace_all(&value, "($1)").into_owned(); + value = LAMBDA_RE.replace_all(&value, "$$Lambda").into_owned(); + value = NUMBER_RE.replace_all(&value, "").into_owned(); + value = WHITESPACE_RE.replace_all(&value, " ").into_owned(); + value.trim().to_string() +} + +#[cfg(test)] +mod tests { + use super::{group_hash, normalize_piece}; + + #[test] + fn normalizes_java_frame_noise() { + let normalized = normalize_piece( + "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42) ~[plugin-1.2.3.jar:?]", + ); + + assert_eq!( + normalized, + "at //com.example.plugin.handle(plugin.java) ~[:?]" + ); + } + + #[test] + fn group_hash_ignores_jar_versions_and_line_numbers() { + let a = group_hash( + "java.lang.RuntimeException", + "Failed for player 123", + "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)", + ); + let b = group_hash( + "java.lang.RuntimeException", + "Failed for player 456", + "\tat plugin-9.9.9.jar//com.example.Plugin.handle(Plugin.java:99)", + ); + + assert_eq!(a, b); + } +} diff --git a/src/error_tracking/mod.rs b/src/error_tracking/mod.rs index 35b8e0b..c1e2ae5 100644 --- a/src/error_tracking/mod.rs +++ b/src/error_tracking/mod.rs @@ -1,3 +1,4 @@ pub mod fingerprint; +pub mod java_fingerprint; pub mod sourcemaps; pub mod v3; diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index ac3bb87..9bed8ee 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -1,7 +1,7 @@ -use crate::error_tracking::fingerprint; use crate::error_tracking::sourcemaps::SourcemapResolver; +use crate::error_tracking::{fingerprint, java_fingerprint}; use crate::models::ErrorTracking; -use crate::tinybird::{ErrorOccurrenceV3Row, WebEventRow}; +use crate::tinybird::{ErrorOccurrenceV3Row, ModsEventRow, WebEventRow}; use chrono::Utc; use serde_json::Value; use std::collections::HashMap; @@ -18,10 +18,105 @@ pub struct WebOccurrenceInput<'a> { pub context: &'a str, } +pub struct ModsOccurrenceInput<'a> { + pub project_id: Uuid, + pub release: Option<&'a str>, + pub server_id: &'a str, + pub session_id: Option<&'a str>, + pub sdk_version: Option<&'a str>, + pub context: &'a str, +} + +pub struct ErrorOnlyOccurrenceInput<'a> { + pub project_id: Uuid, + pub release: Option<&'a str>, + pub session_id: Option<&'a str>, + pub sdk_name: Option<&'a str>, + pub sdk_version: Option<&'a str>, + pub context: &'a str, +} + pub fn build_web_occurrence( input: &WebOccurrenceInput<'_>, error: &ErrorTracking, ) -> ErrorOccurrenceV3Row { + build_occurrence( + OccurrenceInput { + project_id: input.project_id, + release: input.release, + user_id: input.user_id, + session_id: input.session_id, + window_id: input.window_id, + platform: "web", + runtime: "browser", + sdk_name: input.sdk_name, + sdk_version: input.sdk_version, + context: input.context, + group_hash: fingerprint::group_hash, + }, + error, + ) +} + +pub fn build_mods_occurrence( + input: &ModsOccurrenceInput<'_>, + error: &ErrorTracking, +) -> ErrorOccurrenceV3Row { + build_occurrence( + OccurrenceInput { + project_id: input.project_id, + release: input.release, + user_id: Some(input.server_id), + session_id: input.session_id, + window_id: None, + platform: "minecraft-plugin", + runtime: "java", + sdk_name: Some("minecraft-plugin"), + sdk_version: input.sdk_version, + context: input.context, + group_hash: java_fingerprint::group_hash, + }, + error, + ) +} + +pub fn build_error_only_occurrence( + input: &ErrorOnlyOccurrenceInput<'_>, + error: &ErrorTracking, +) -> ErrorOccurrenceV3Row { + build_occurrence( + OccurrenceInput { + project_id: input.project_id, + release: input.release, + user_id: None, + session_id: input.session_id, + window_id: None, + platform: "web", + runtime: "browser", + sdk_name: input.sdk_name, + sdk_version: input.sdk_version, + context: input.context, + group_hash: fingerprint::group_hash, + }, + error, + ) +} + +struct OccurrenceInput<'a> { + project_id: Uuid, + release: Option<&'a str>, + user_id: Option<&'a str>, + session_id: Option<&'a str>, + window_id: Option<&'a str>, + platform: &'a str, + runtime: &'a str, + sdk_name: Option<&'a str>, + sdk_version: Option<&'a str>, + context: &'a str, + group_hash: fn(&str, &str, &str) -> String, +} + +fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorOccurrenceV3Row { let stacktrace = error .error .stack @@ -40,7 +135,7 @@ pub fn build_web_occurrence( // release behavior are verified in production data. environment: "prod".to_string(), release: input.release.unwrap_or_default().to_string(), - group_hash: fingerprint::group_hash(&error_type, &error_message, source_stack), + group_hash: (input.group_hash)(&error_type, &error_message, source_stack), exact_hash: fingerprint::exact_hash(&error_type, &error_message, source_stack), error_type, error_message, @@ -51,8 +146,8 @@ pub fn build_web_occurrence( user_id: input.user_id.unwrap_or_default().to_string(), session_id: input.session_id.unwrap_or_default().to_string(), window_id: input.window_id.unwrap_or_default().to_string(), - platform: "web".to_string(), - runtime: "browser".to_string(), + platform: input.platform.to_string(), + runtime: input.runtime.to_string(), sdk_name: input.sdk_name.unwrap_or_default().to_string(), sdk_version: input.sdk_version.unwrap_or_default().to_string(), context: input.context.to_string(), @@ -63,6 +158,10 @@ pub async fn enrich_with_sourcemap( resolver: Option<&SourcemapResolver>, mut row: ErrorOccurrenceV3Row, ) -> ErrorOccurrenceV3Row { + if row.platform != "web" || row.runtime != "browser" { + return row; + } + let Some(resolver) = resolver else { return row; }; @@ -101,3 +200,67 @@ pub fn web_context(row: &WebEventRow, custom: &HashMap) -> String serde_json::to_string(&context).unwrap_or_else(|_| "{}".to_string()) } + +pub fn mods_context(row: &ModsEventRow, custom: &HashMap) -> String { + let mut context = match serde_json::to_value(row) { + Ok(Value::Object(context)) => context, + _ => serde_json::Map::new(), + }; + + if !custom.is_empty() { + context.insert( + "custom".to_string(), + Value::Object(custom.iter().map(|(k, v)| (k.clone(), v.clone())).collect()), + ); + } + + serde_json::to_string(&context).unwrap_or_else(|_| "{}".to_string()) +} + +#[cfg(test)] +mod tests { + use super::{ModsOccurrenceInput, build_mods_occurrence}; + use crate::error_tracking::java_fingerprint; + use crate::models::{Error, ErrorTracking}; + use uuid::Uuid; + + #[test] + fn mods_occurrences_use_java_group_hash() { + let error = ErrorTracking { + hash: "legacy-client-hash".to_string(), + error: Error { + error: "java.lang.RuntimeException".to_string(), + message: Some("Failed for player 123".to_string()), + stack: Some(vec![ + "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)".to_string(), + ]), + cause: None, + }, + count: None, + session_id: None, + build_id: None, + handled: None, + }; + + let row = build_mods_occurrence( + &ModsOccurrenceInput { + project_id: Uuid::new_v4(), + release: None, + server_id: "server-id", + session_id: None, + sdk_version: None, + context: "{}", + }, + &error, + ); + + assert_eq!( + row.group_hash, + java_fingerprint::group_hash( + "java.lang.RuntimeException", + "Failed for player 123", + "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)" + ) + ); + } +} diff --git a/src/handler/collect.rs b/src/handler/collect.rs index b7b27bf..f285d66 100644 --- a/src/handler/collect.rs +++ b/src/handler/collect.rs @@ -1,10 +1,10 @@ use super::{ - ErrorEntryParams, ErrorStackProcessing, MODS_EVENT_FIELDS, build_mods_error_entry_details, - build_mods_event_row, check_ip_allowed, error_response, extract_known_fields, - get_authorization, get_client_ip, get_country, insert_error_entries, insert_mods_event, - load_project_context, resolve_identity_key, success_response, + MODS_EVENT_FIELDS, build_mods_event_row, check_ip_allowed, error_response, + extract_known_fields, get_authorization, get_client_ip, get_country, + insert_error_occurrence_v3, insert_mods_event, load_project_context, success_response, }; use crate::batch_queue::{FailedRequest, RequestType, TrackingContext}; +use crate::error_tracking::v3::{ModsOccurrenceInput, build_mods_occurrence, mods_context}; use crate::models::{AppState, Request}; use crate::validation::validate_and_filter_payload; use axum::body::Bytes; @@ -103,17 +103,19 @@ pub async fn collect( &mut known, &valid_custom, ); + let error_v3_context = ctx + .error_tracking_enabled + .then(|| mods_context(&event_row, &valid_custom)); - let data_entry_id = match insert_mods_event( + if let Err(e) = insert_mods_event( &state.batch_queue, event_row.clone(), Some(tracking_ctx.clone()), ) .await { - Ok(id) => id, - Err(e) => return e, - }; + return e; + } if !ctx.error_tracking_enabled { return success_response(warnings); @@ -122,29 +124,28 @@ pub async fn collect( if let Some(errors) = errors && !errors.is_empty() { - let error_entry_details = - build_mods_error_entry_details(country.as_deref(), &event_row, &valid_custom); let fallback_identity = server_id.to_string(); + let sdk_version = event_row.plugin_version.as_deref(); + let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); for mut error in errors { if error.session_id.is_none() { error.session_id = session_id.clone(); } - let identity_key = resolve_identity_key( - error.session_id.as_deref(), - Some(fallback_identity.as_str()), + let occurrence = build_mods_occurrence( + &ModsOccurrenceInput { + project_id: ctx.project_id, + release: error.build_id.as_deref(), + server_id: fallback_identity.as_str(), + session_id: error.session_id.as_deref(), + sdk_version, + context: error_v3_context, + }, + &error, ); - if let Err(e) = insert_error_entries( + if let Err(e) = insert_error_occurrence_v3( &state.batch_queue, - ctx.project_id, - Some(data_entry_id), - error, - ErrorEntryParams { - identity_key, - context: None, - details: error_entry_details.clone(), - tracking_ctx: Some(tracking_ctx.clone()), - stack_processing: ErrorStackProcessing::JavaCollect, - }, + occurrence, + Some(tracking_ctx.clone()), ) .await { diff --git a/src/handler/error.rs b/src/handler/error.rs index f7b0e45..9a019dc 100644 --- a/src/handler/error.rs +++ b/src/handler/error.rs @@ -1,9 +1,9 @@ use super::{ - ErrorEntryDetails, ErrorEntryParams, ErrorStackProcessing, check_ip_allowed, error_response, - get_authorization, get_client_ip, insert_error_entries, load_project_context, - resolve_identity_key, success_response, + check_ip_allowed, error_response, get_authorization, get_client_ip, insert_error_occurrence_v3, + load_project_context, success_response, }; use crate::batch_queue::TrackingContext; +use crate::error_tracking::v3::{ErrorOnlyOccurrenceInput, build_error_only_occurrence}; use crate::models::{AppState, ErrorTracking}; use axum::Json; use axum::extract::State; @@ -24,6 +24,10 @@ pub(crate) struct ErrorRequest { build_id: Option, #[serde(default)] context: Option, + #[serde(default, alias = "sdk_name")] + sdk_name: Option, + #[serde(default, alias = "sdk_version")] + sdk_version: Option, } pub async fn error( @@ -68,21 +72,20 @@ pub async fn error( error.build_id = payload.build_id.clone(); } let replay_session_id = error.session_id.clone(); - let identity_key = resolve_identity_key(error.session_id.as_deref(), None); - if let Err(e) = insert_error_entries( - &state.batch_queue, - ctx.project_id, - None, - error, - ErrorEntryParams { - identity_key, - context: context.clone(), - details: ErrorEntryDetails::error_only(), - tracking_ctx: Some(tracking_ctx.clone()), - stack_processing: ErrorStackProcessing::Raw, + let occurrence = build_error_only_occurrence( + &ErrorOnlyOccurrenceInput { + project_id: ctx.project_id, + release: error.build_id.as_deref(), + session_id: error.session_id.as_deref(), + sdk_name: payload.sdk_name.as_deref(), + sdk_version: payload.sdk_version.as_deref(), + context: context.as_deref().unwrap_or("{}"), }, - ) - .await + &error, + ); + if let Err(e) = + insert_error_occurrence_v3(&state.batch_queue, occurrence, Some(tracking_ctx.clone())) + .await { return e; } diff --git a/src/handler/java_stack_parameterization.rs b/src/handler/java_stack_parameterization.rs deleted file mode 100644 index bee59bd..0000000 --- a/src/handler/java_stack_parameterization.rs +++ /dev/null @@ -1,210 +0,0 @@ -use crate::models::Error; -use crate::tinybird::ErrorRow; -use crate::utils::sha256_hex; -use regex::{Captures, Regex}; -use serde_json::{Map, Value}; -use std::collections::HashMap; -use std::sync::LazyLock; - -static STACKTRACE_JAR_PATTERN: LazyLock = - LazyLock::new(|| Regex::new(r"[A-Za-z0-9._+-]+\.jar").expect("valid jar regex")); - -pub struct ParameterizedErrorRows { - pub error_hash: String, - pub rows: Vec, - pub stack_placeholders: String, -} - -#[derive(Default)] -struct StackPlaceholderState { - next_jar_index: usize, - placeholders_by_value: HashMap, - values_by_placeholder: Map, -} - -impl StackPlaceholderState { - fn placeholder_for_jar(&mut self, jar_name: &str) -> String { - if let Some(existing) = self.placeholders_by_value.get(jar_name) { - return existing.clone(); - } - - let placeholder = format!("__FASTSTATS_JAR_{}__", self.next_jar_index); - self.next_jar_index += 1; - self.placeholders_by_value - .insert(jar_name.to_string(), placeholder.clone()); - self.values_by_placeholder - .insert(placeholder.clone(), Value::String(jar_name.to_string())); - placeholder - } - - fn parameterize_stack_line(&mut self, line: String) -> String { - if !line.contains(".jar") { - return line; - } - - STACKTRACE_JAR_PATTERN - .replace_all(&line, |captures: &Captures| { - self.placeholder_for_jar(&captures[0]) - }) - .into_owned() - } - - fn parameterize_stack(&mut self, stack: Option>) -> Vec { - stack - .unwrap_or_default() - .into_iter() - .map(|line| self.parameterize_stack_line(line)) - .collect() - } - - fn into_json_string(self) -> String { - Value::Object(self.values_by_placeholder).to_string() - } -} - -fn build_error_rows( - mut error: Error, - errors: &mut Vec, - placeholders: &mut StackPlaceholderState, -) -> String { - let cause = error - .cause - .take() - .map(|cause| build_error_rows(*cause, errors, placeholders)); - let cause_hash = cause.as_deref().unwrap_or(""); - let message = error.message.unwrap_or_default(); - let stack = placeholders.parameterize_stack(error.stack); - let stack_json = serde_json::to_string(&stack).unwrap_or_default(); - let hash = sha256_hex(&[ - error.error.as_bytes(), - b"\x1f", - message.as_bytes(), - b"\x1f", - stack_json.as_bytes(), - b"\x1f", - cause_hash.as_bytes(), - ]); - errors.push(ErrorRow { - hash: hash.clone(), - name: error.error, - message, - stack, - cause_hash: cause, - }); - - hash -} - -pub fn build_parameterized_error_rows(error: Error) -> ParameterizedErrorRows { - let mut placeholders = StackPlaceholderState::default(); - let mut rows = Vec::new(); - let error_hash = build_error_rows(error, &mut rows, &mut placeholders); - - ParameterizedErrorRows { - error_hash, - rows, - stack_placeholders: placeholders.into_json_string(), - } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - - fn build_test_error(stack: &[&str], cause: Option) -> Error { - Error { - error: "java.lang.RuntimeException".to_string(), - message: Some("boom".to_string()), - stack: Some(stack.iter().map(|line| (*line).to_string()).collect()), - cause: cause.map(Box::new), - } - } - - #[test] - fn parameterizes_jar_names_without_changing_non_jar_frames() { - let result = build_parameterized_error_rows(build_test_error( - &[ - "java.lang.RuntimeException: boom", - "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)", - "\tat com.example.App.main(App.java:10)", - ], - None, - )); - - assert_eq!(result.rows.len(), 1); - assert_eq!( - result.rows[0].stack, - vec![ - "java.lang.RuntimeException: boom".to_string(), - "\tat __FASTSTATS_JAR_0__//com.example.Plugin.handle(Plugin.java:42)".to_string(), - "\tat com.example.App.main(App.java:10)".to_string(), - ] - ); - assert_eq!( - serde_json::from_str::(&result.stack_placeholders).unwrap(), - json!({ "__FASTSTATS_JAR_0__": "plugin-1.2.3.jar" }) - ); - } - - #[test] - fn reuses_placeholders_for_repeated_jar_names() { - let result = build_parameterized_error_rows(build_test_error( - &[ - "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)", - "\t... 9 more ~[plugin-1.2.3.jar:?]", - ], - None, - )); - - assert_eq!( - result.rows[0].stack, - vec![ - "\tat __FASTSTATS_JAR_0__//com.example.Plugin.handle(Plugin.java:42)".to_string(), - "\t... 9 more ~[__FASTSTATS_JAR_0__:?]".to_string(), - ] - ); - assert_eq!( - serde_json::from_str::(&result.stack_placeholders).unwrap(), - json!({ "__FASTSTATS_JAR_0__": "plugin-1.2.3.jar" }) - ); - } - - #[test] - fn canonical_error_hash_matches_when_only_jar_names_change() { - let first = build_parameterized_error_rows(build_test_error( - &["\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)"], - None, - )); - let second = build_parameterized_error_rows(build_test_error( - &["\tat plugin-9.9.9.jar//com.example.Plugin.handle(Plugin.java:42)"], - None, - )); - - assert_eq!(first.error_hash, second.error_hash); - assert_eq!(first.rows[0].stack, second.rows[0].stack); - } - - #[test] - fn shares_placeholder_space_across_root_and_cause_stacks() { - let cause = build_test_error( - &["\tat helper-2.0.0.jar//com.example.Helper.call(Helper.java:12)"], - None, - ); - let root = build_test_error( - &["\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)"], - Some(cause), - ); - - let result = build_parameterized_error_rows(root); - - assert_eq!(result.rows.len(), 2); - assert_eq!( - serde_json::from_str::(&result.stack_placeholders).unwrap(), - json!({ - "__FASTSTATS_JAR_0__": "helper-2.0.0.jar", - "__FASTSTATS_JAR_1__": "plugin-1.2.3.jar", - }) - ); - } -} diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 7887524..4c4dccc 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -1,7 +1,6 @@ mod collect; mod error; mod identify; -mod java_stack_parameterization; mod replay; mod vitals; mod web; @@ -13,13 +12,9 @@ pub use replay::replay; pub use vitals::vitals; pub use web::web; -use self::java_stack_parameterization::build_parameterized_error_rows; use crate::batch_queue::{BatchQueue, FailedRequest, QueuedEvent, RequestType, TrackingContext}; -use crate::models::{DataSource, Error, ErrorTracking}; -use crate::tinybird::{ - ErrorOccurrenceV3Row, ErrorRow, ErrorTrackingRow, ModsEventRow, WebEventRow, -}; -use crate::utils::sha256_hex; +use crate::models::DataSource; +use crate::tinybird::{ErrorOccurrenceV3Row, ModsEventRow, WebEventRow}; use axum::Json; use axum::http::{HeaderMap, StatusCode}; use moka::future::Cache; @@ -43,52 +38,6 @@ static PROJECT_CACHE: LazyLock>> = LazyLock::n pub type HandlerResponse = (StatusCode, Json); -#[derive(Clone, Copy, Debug, Default)] -pub enum ErrorStackProcessing { - #[default] - Raw, - JavaCollect, -} - -pub struct ErrorEntryParams { - pub identity_key: Option, - pub context: Option, - pub details: ErrorEntryDetails, - pub tracking_ctx: Option, - pub stack_processing: ErrorStackProcessing, -} - -#[derive(Clone, Default)] -pub struct ErrorEntryDetails { - pub plugin_version: String, - pub source_kind: String, - pub entry_session_id: String, - pub entry_country: String, - pub entry_browser: String, - pub entry_device: String, - pub entry_os: String, - pub player_count: Option, - pub online_mode: Option, - pub minecraft_version: String, - pub server_type: String, - pub java_version: String, - pub java_vendor: String, - pub os_version: String, - pub os_arch: String, - pub core_count: Option, - pub entry_data: String, -} - -impl ErrorEntryDetails { - pub fn error_only() -> Self { - Self { - source_kind: "error".to_string(), - entry_data: "{}".to_string(), - ..Self::default() - } - } -} - #[derive(Debug, Deserialize, Default)] pub struct EncodingQuery { pub encoding: Option, @@ -417,31 +366,6 @@ fn to_custom_json(data: &HashMap) -> String { } } -pub fn build_mods_error_entry_details( - country: Option<&str>, - row: &ModsEventRow, - custom: &HashMap, -) -> ErrorEntryDetails { - ErrorEntryDetails { - plugin_version: row.plugin_version.clone().unwrap_or_default(), - source_kind: "minecraft-plugin".to_string(), - entry_session_id: row.server_id.to_string(), - entry_country: country.unwrap_or_default().to_string(), - entry_os: row.os_name.clone().unwrap_or_default(), - player_count: row.player_count, - online_mode: row.online_mode, - minecraft_version: row.minecraft_version.clone().unwrap_or_default(), - server_type: row.server_type.clone().unwrap_or_default(), - java_version: row.java_version.clone().unwrap_or_default(), - java_vendor: row.java_vendor.clone().unwrap_or_default(), - os_version: row.os_version.clone().unwrap_or_default(), - os_arch: row.os_arch.clone().unwrap_or_default(), - core_count: row.core_count, - entry_data: to_custom_json(custom), - ..ErrorEntryDetails::default() - } -} - /// Known internal fields for web_events row. These are extracted before /// datasource validation so they always reach the Tinybird row. const WEB_EVENT_FIELDS: &[&str] = &[ @@ -596,128 +520,6 @@ pub async fn insert_mods_event( Ok(event_id) } -fn build_error_rows(mut error: Error, errors: &mut Vec) -> String { - let cause = error - .cause - .take() - .map(|cause| build_error_rows(*cause, errors)); - let cause_hash = cause.as_deref().unwrap_or(""); - let message = error.message.unwrap_or_default(); - let stack = error.stack.unwrap_or_default(); - let stack_json = serde_json::to_string(&stack).unwrap_or_default(); - let hash = sha256_hex(&[ - error.error.as_bytes(), - b"\x1f", - message.as_bytes(), - b"\x1f", - stack_json.as_bytes(), - b"\x1f", - cause_hash.as_bytes(), - ]); - errors.push(ErrorRow { - hash: hash.clone(), - name: error.error, - message, - stack, - cause_hash: cause, - }); - - hash -} - -pub async fn insert_error_entries( - batch_queue: &BatchQueue, - project_id: Uuid, - data_entry_id: Option, - data: ErrorTracking, - params: ErrorEntryParams, -) -> Result<(), HandlerResponse> { - let ErrorTracking { - hash, - error, - count, - session_id, - build_id, - handled, - } = data; - - let (error_hash, error_rows, stack_placeholders) = match params.stack_processing { - ErrorStackProcessing::Raw => { - let mut error_rows = Vec::new(); - let error_hash = build_error_rows(error, &mut error_rows); - (error_hash, error_rows, "{}".to_string()) - } - ErrorStackProcessing::JavaCollect => { - let parameterized = build_parameterized_error_rows(error); - ( - parameterized.error_hash, - parameterized.rows, - parameterized.stack_placeholders, - ) - } - }; - - for error_row in error_rows { - batch_queue - .queue_event(QueuedEvent::Error(error_row)) - .await - .map_err(|e| { - error!("Failed to queue error: {}", e); - error_response(StatusCode::INTERNAL_SERVER_ERROR, "Failed to queue error") - })?; - } - - let occurrence_count = count.unwrap_or(1).max(1) as u32; - let created_at = chrono::Utc::now(); - let error_tracking = ErrorTrackingRow { - id: Uuid::new_v4(), - project_id, - hash, - error_hash, - count: occurrence_count, - data_entry_id, - session_id, - identity_key: params.identity_key, - build_id, - plugin_version: params.details.plugin_version, - source_kind: params.details.source_kind, - entry_session_id: params.details.entry_session_id, - entry_country: params.details.entry_country, - entry_browser: params.details.entry_browser, - entry_device: params.details.entry_device, - entry_os: params.details.entry_os, - player_count: params.details.player_count, - online_mode: params.details.online_mode, - minecraft_version: params.details.minecraft_version, - server_type: params.details.server_type, - java_version: params.details.java_version, - java_vendor: params.details.java_vendor, - os_version: params.details.os_version, - os_arch: params.details.os_arch, - core_count: params.details.core_count, - entry_data: params.details.entry_data, - stack_placeholders, - context: params.context, - handled, - created_at, - }; - - batch_queue - .queue_event(QueuedEvent::ErrorTracking { - row: Box::new(error_tracking), - tracking: params.tracking_ctx, - }) - .await - .map_err(|e| { - error!("Failed to queue error tracking: {}", e); - error_response( - StatusCode::INTERNAL_SERVER_ERROR, - "Failed to queue error tracking", - ) - })?; - Ok(()) -} - pub async fn insert_error_occurrence_v3( batch_queue: &BatchQueue, row: ErrorOccurrenceV3Row, @@ -739,20 +541,6 @@ pub async fn insert_error_occurrence_v3( Ok(()) } -pub fn resolve_identity_key( - session_id: Option<&str>, - fallback_identifier: Option<&str>, -) -> Option { - session_id - .filter(|value| !value.is_empty()) - .map(str::to_string) - .or_else(|| { - fallback_identifier - .filter(|value| !value.is_empty()) - .map(str::to_string) - }) -} - pub async fn process_failed_request( batch_queue: &BatchQueue, pool: &sqlx::PgPool, @@ -812,41 +600,39 @@ async fn process_collect_request( &valid_custom, ); - let data_entry_id = - insert_mods_event(batch_queue, event_row.clone(), Some(tracking_ctx.clone())) - .await - .map_err(|_| "Failed to queue event".to_string())?; + let error_v3_context = ctx + .error_tracking_enabled + .then(|| crate::error_tracking::v3::mods_context(&event_row, &valid_custom)); + + insert_mods_event(batch_queue, event_row.clone(), Some(tracking_ctx.clone())) + .await + .map_err(|_| "Failed to queue event".to_string())?; if ctx.error_tracking_enabled && let Some(errors) = errors && !errors.is_empty() { - let error_entry_details = - build_mods_error_entry_details(request.country.as_deref(), &event_row, &valid_custom); let fallback_identity = server_id.to_string(); + let sdk_version = event_row.plugin_version.as_deref(); + let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); for mut error in errors { if error.session_id.is_none() { error.session_id = session_id.clone(); } - let identity_key = resolve_identity_key( - error.session_id.as_deref(), - Some(fallback_identity.as_str()), - ); - insert_error_entries( - batch_queue, - ctx.project_id, - Some(data_entry_id), - error, - ErrorEntryParams { - identity_key, - context: None, - details: error_entry_details.clone(), - tracking_ctx: Some(tracking_ctx.clone()), - stack_processing: ErrorStackProcessing::JavaCollect, + let occurrence = crate::error_tracking::v3::build_mods_occurrence( + &crate::error_tracking::v3::ModsOccurrenceInput { + project_id: ctx.project_id, + release: error.build_id.as_deref(), + server_id: fallback_identity.as_str(), + session_id: error.session_id.as_deref(), + sdk_version, + context: error_v3_context, }, - ) - .await - .map_err(|_| "Failed to queue error".to_string())?; + &error, + ); + insert_error_occurrence_v3(batch_queue, occurrence, Some(tracking_ctx.clone())) + .await + .map_err(|_| "Failed to queue error".to_string())?; } } @@ -1439,29 +1225,4 @@ mod tests { ); } } - - mod identity_resolution { - use super::*; - - #[test] - fn prefers_session_id_when_present() { - assert_eq!( - resolve_identity_key(Some("session-1"), Some("fallback-1")), - Some("session-1".to_string()) - ); - } - - #[test] - fn falls_back_when_session_missing() { - assert_eq!( - resolve_identity_key(None, Some("fallback-1")), - Some("fallback-1".to_string()) - ); - } - - #[test] - fn ignores_empty_values() { - assert_eq!(resolve_identity_key(Some(""), Some("")), None); - } - } } diff --git a/src/tinybird.rs b/src/tinybird.rs index 5e108d1..567a96d 100644 --- a/src/tinybird.rs +++ b/src/tinybird.rs @@ -67,50 +67,6 @@ pub struct ModsEventRow { pub created_at: DateTime, } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ErrorRow { - pub hash: String, - pub name: String, - pub message: String, - pub stack: Vec, - pub cause_hash: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ErrorTrackingRow { - pub id: Uuid, - pub project_id: Uuid, - pub hash: String, - pub error_hash: String, - pub count: u32, - pub data_entry_id: Option, - pub session_id: Option, - pub identity_key: Option, - pub build_id: Option, - pub plugin_version: String, - pub source_kind: String, - pub entry_session_id: String, - pub entry_country: String, - pub entry_browser: String, - pub entry_device: String, - pub entry_os: String, - pub player_count: Option, - pub online_mode: Option, - pub minecraft_version: String, - pub server_type: String, - pub java_version: String, - pub java_vendor: String, - pub os_version: String, - pub os_arch: String, - pub core_count: Option, - pub entry_data: String, - pub stack_placeholders: String, - pub context: Option, - pub handled: Option, - #[serde(with = "chrono::serde::ts_milliseconds")] - pub created_at: DateTime, -} - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ErrorOccurrenceV3Row { #[serde(with = "chrono::serde::ts_milliseconds")] @@ -287,17 +243,6 @@ impl TinybirdClient { self.send_batch("mods_events", events).await } - pub async fn insert_errors(&self, errors: &[ErrorRow]) -> Result<(), TinybirdError> { - self.send_batch("errors", errors).await - } - - pub async fn insert_error_trackings( - &self, - rows: &[&ErrorTrackingRow], - ) -> Result<(), TinybirdError> { - self.send_batch("error_occurences_v2", rows).await - } - pub async fn insert_error_occurrences_v3( &self, rows: &[&ErrorOccurrenceV3Row], diff --git a/test.ts b/test.ts new file mode 100644 index 0000000..204fc84 --- /dev/null +++ b/test.ts @@ -0,0 +1,90 @@ +declare const process: { + env: Record; + argv: string[]; + exitCode?: number; +}; + +type CollectPayload = { + server_id: string; + data: Record; + errors?: Array>; +}; + +const endpoint = process.env.COLLECT_URL ?? "http://localhost:7000/v1/collect"; +const token = process.env.PROJECT_TOKEN ?? "0f0606db75c90ca0c3681cc623a55bc8"; + +const defaults: CollectPayload = { + server_id: crypto.randomUUID(), + data: { + player_count: 12, + online_mode: true, + plugin_version: "0.0.0-test", + minecraft_version: "1.21.1", + server_type: "paper", + java_version: "21", + java_vendor: "Eclipse Adoptium", + os_name: "Linux", + os_arch: "amd64", + os_version: "6.8.0", + core_count: 8, + number_map: { + hello: Math.floor(Math.random() * 1000), + another: Math.floor(Math.random() * 1000), + }, + }, +}; + +function parseOverrides(): Partial { + const input = process.argv.slice(2).join(" ").trim(); + if (!input) { + return {}; + } + + try { + return JSON.parse(input); + } catch (error) { + throw new Error( + `Could not parse CLI JSON override: ${error instanceof Error ? error.message : String(error)}`, + ); + } +} + +function mergePayload(overrides: Partial): CollectPayload { + return { + ...defaults, + ...overrides, + data: { + ...defaults.data, + ...(overrides.data ?? {}), + }, + }; +} + +async function main() { + if (token === "replace-me-with-project-token") { + console.warn( + "PROJECT_TOKEN is not set; the request will probably be unauthorized.", + ); + } + + const payload = mergePayload(parseOverrides()); + + const response = await fetch(endpoint, { + method: "POST", + headers: { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), + }); + + const text = await response.text(); + console.log("Status:", response.status, response.statusText); + console.log("Response:", text || ""); + console.log("Sent:", JSON.stringify(payload, null, 2)); +} + +main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); From 1304daf7317bd7b70beb4083404007a424c205a3 Mon Sep 17 00:00:00 2001 From: Luca Date: Wed, 27 May 2026 10:52:01 +0200 Subject: [PATCH 03/17] remove test file --- test.ts | 90 --------------------------------------------------------- 1 file changed, 90 deletions(-) delete mode 100644 test.ts diff --git a/test.ts b/test.ts deleted file mode 100644 index 204fc84..0000000 --- a/test.ts +++ /dev/null @@ -1,90 +0,0 @@ -declare const process: { - env: Record; - argv: string[]; - exitCode?: number; -}; - -type CollectPayload = { - server_id: string; - data: Record; - errors?: Array>; -}; - -const endpoint = process.env.COLLECT_URL ?? "http://localhost:7000/v1/collect"; -const token = process.env.PROJECT_TOKEN ?? "0f0606db75c90ca0c3681cc623a55bc8"; - -const defaults: CollectPayload = { - server_id: crypto.randomUUID(), - data: { - player_count: 12, - online_mode: true, - plugin_version: "0.0.0-test", - minecraft_version: "1.21.1", - server_type: "paper", - java_version: "21", - java_vendor: "Eclipse Adoptium", - os_name: "Linux", - os_arch: "amd64", - os_version: "6.8.0", - core_count: 8, - number_map: { - hello: Math.floor(Math.random() * 1000), - another: Math.floor(Math.random() * 1000), - }, - }, -}; - -function parseOverrides(): Partial { - const input = process.argv.slice(2).join(" ").trim(); - if (!input) { - return {}; - } - - try { - return JSON.parse(input); - } catch (error) { - throw new Error( - `Could not parse CLI JSON override: ${error instanceof Error ? error.message : String(error)}`, - ); - } -} - -function mergePayload(overrides: Partial): CollectPayload { - return { - ...defaults, - ...overrides, - data: { - ...defaults.data, - ...(overrides.data ?? {}), - }, - }; -} - -async function main() { - if (token === "replace-me-with-project-token") { - console.warn( - "PROJECT_TOKEN is not set; the request will probably be unauthorized.", - ); - } - - const payload = mergePayload(parseOverrides()); - - const response = await fetch(endpoint, { - method: "POST", - headers: { - Authorization: `Bearer ${token}`, - "Content-Type": "application/json", - }, - body: JSON.stringify(payload), - }); - - const text = await response.text(); - console.log("Status:", response.status, response.statusText); - console.log("Response:", text || ""); - console.log("Sent:", JSON.stringify(payload, null, 2)); -} - -main().catch((error) => { - console.error(error); - process.exitCode = 1; -}); From 98efbc4ef94c3fc89e3489395c3927ca24892ccf Mon Sep 17 00:00:00 2001 From: Luca Date: Fri, 29 May 2026 11:58:48 +0200 Subject: [PATCH 04/17] add proguard --- src/error_tracking/mod.rs | 1 + src/error_tracking/proguard.rs | 446 +++++++++++++++++++++++++++++++ src/error_tracking/sourcemaps.rs | 184 ++++++++++++- src/error_tracking/v3.rs | 33 ++- src/main.rs | 2 +- 5 files changed, 654 insertions(+), 12 deletions(-) create mode 100644 src/error_tracking/proguard.rs diff --git a/src/error_tracking/mod.rs b/src/error_tracking/mod.rs index c1e2ae5..8767851 100644 --- a/src/error_tracking/mod.rs +++ b/src/error_tracking/mod.rs @@ -1,4 +1,5 @@ pub mod fingerprint; pub mod java_fingerprint; +pub mod proguard; pub mod sourcemaps; pub mod v3; diff --git a/src/error_tracking/proguard.rs b/src/error_tracking/proguard.rs new file mode 100644 index 0000000..68db993 --- /dev/null +++ b/src/error_tracking/proguard.rs @@ -0,0 +1,446 @@ +use std::collections::HashMap; +use std::collections::hash_map::Entry; + +use uuid::Uuid; + +const PROGUARD_DIR: &str = "proguard"; + +pub struct ProguardMapping { + classes: HashMap, +} + +struct ClassMapping { + original_name: String, + file_name: Option, + methods: Vec, + fields: HashMap, +} + +struct MethodMapping { + original_name: String, + obfuscated_name: String, + start_line: Option, + end_line: Option, +} + +impl ProguardMapping { + pub fn parse(input: &str) -> Result { + let mut classes = HashMap::new(); + let mut current_class: Option<(String, ClassMapping)> = None; + + for line in input.lines() { + let line = line.trim_end(); + if line.is_empty() { + continue; + } + + if line.starts_with('#') { + if let Some((_, class)) = current_class.as_mut() + && let Some(file_name) = extract_file_name(line) + { + class.file_name = Some(file_name); + } + continue; + } + + if !line.starts_with(' ') && !line.starts_with('\t') { + if let Some((obfuscated, class)) = current_class.take() { + classes.insert(obfuscated, class); + } + + if let Some((original, obfuscated)) = parse_class_line(line) { + current_class = Some(( + obfuscated.to_owned(), + ClassMapping { + original_name: original.to_owned(), + file_name: None, + methods: Vec::new(), + fields: HashMap::new(), + }, + )); + } + continue; + } + + if let Some((_, class)) = current_class.as_mut() { + parse_member_line(line.trim(), class); + } + } + + if let Some((obfuscated, class)) = current_class { + classes.insert(obfuscated, class); + } + + Ok(Self { classes }) + } + + pub fn parse_many_bytes(parts: &[Vec]) -> Result { + let mut classes: HashMap = HashMap::new(); + + for part in parts { + let input = std::str::from_utf8(part).map_err(|_| ())?; + let mapping = Self::parse(input)?; + for (obfuscated_name, class) in mapping.classes { + match classes.entry(obfuscated_name) { + Entry::Occupied(mut occupied) => occupied.get_mut().merge(class), + Entry::Vacant(vacant) => { + vacant.insert(class); + } + } + } + } + + Ok(ProguardMapping { classes }) + } + + pub fn retrace(&self, stacktrace: &str) -> String { + let mut out = String::with_capacity(stacktrace.len()); + + let mut first = true; + for line in stacktrace.lines() { + if !first { + out.push('\n'); + } + first = false; + self.retrace_line_into(line, &mut out); + } + + if stacktrace.ends_with('\n') { + out.push('\n'); + } + + out + } + + fn retrace_line_into(&self, line: &str, out: &mut String) { + let trimmed = line.trim_start(); + let prefix_len = line.len() - trimmed.len(); + let prefix = &line[..prefix_len]; + + if let Some(rest) = trimmed.strip_prefix("at ") { + self.retrace_stack_frame(prefix, rest, out); + } else { + self.retrace_exception_line_into(line, out); + } + } + + fn retrace_stack_frame(&self, prefix: &str, rest: &str, out: &mut String) { + let Some(paren_start) = rest.find('(') else { + push_java_frame(out, prefix, rest); + return; + }; + + let qualified = &rest[..paren_start]; + let location = &rest[paren_start..]; + let (class_prefix, qualified) = split_container_prefix(qualified); + + let Some(dot_pos) = qualified.rfind('.') else { + push_java_frame(out, prefix, rest); + return; + }; + + let obf_class = &qualified[..dot_pos]; + let obf_method = &qualified[dot_pos + 1..]; + + let Some(class) = self.classes.get(obf_class) else { + push_java_frame(out, prefix, rest); + return; + }; + + let line_num = parse_stacktrace_line_number(location); + let resolved_method = self.resolve_method(class, obf_method, line_num); + let method_name = resolved_method + .map(|m| m.original_name.as_str()) + .unwrap_or(obf_method); + let source_file = class.file_name.as_deref().unwrap_or("Unknown Source"); + + out.reserve( + prefix.len() + + 4 + + class_prefix.len() + + class.original_name.len() + + method_name.len() + + source_file.len() + + 16, + ); + out.push_str(prefix); + out.push_str("at "); + out.push_str(class_prefix); + out.push_str(&class.original_name); + out.push('.'); + out.push_str(method_name); + out.push('('); + out.push_str(source_file); + if let Some(n) = line_num { + out.push(':'); + push_u32(out, n); + } + out.push(')'); + } + + fn retrace_exception_line_into(&self, line: &str, out: &mut String) { + let trimmed = line.trim_start(); + let prefix_len = line.len() - trimmed.len(); + let prefix = &line[..prefix_len]; + + let (before_class, class_and_rest) = if let Some(rest) = trimmed.strip_prefix("Caused by: ") + { + ("Caused by: ", rest) + } else { + ("", trimmed) + }; + + let (class_part, suffix) = class_and_rest + .split_once(": ") + .map(|(c, m)| (c, Some(m))) + .unwrap_or((class_and_rest, None)); + + let (class_prefix, obf_class) = split_container_prefix(class_part); + + if let Some(class) = self.classes.get(obf_class) { + out.reserve( + prefix.len() + + before_class.len() + + class_prefix.len() + + class.original_name.len() + + suffix.map(str::len).unwrap_or(0) + + 2, + ); + out.push_str(prefix); + out.push_str(before_class); + out.push_str(class_prefix); + out.push_str(&class.original_name); + + if let Some(message) = suffix { + out.push_str(": "); + out.push_str(message); + } + } else { + out.push_str(line); + } + } + + fn resolve_method<'a>( + &'a self, + class: &'a ClassMapping, + obf_method: &str, + line: Option, + ) -> Option<&'a MethodMapping> { + if let Some(line_num) = line { + class + .methods + .iter() + .find(|m| { + m.obfuscated_name == obf_method + && matches!( + (m.start_line, m.end_line), + (Some(start), Some(end)) + if line_num >= start && line_num <= end + ) + }) + .or_else(|| { + class + .methods + .iter() + .find(|m| m.obfuscated_name == obf_method) + }) + } else { + class + .methods + .iter() + .find(|m| m.obfuscated_name == obf_method) + } + } +} + +impl ClassMapping { + fn merge(&mut self, other: ClassMapping) { + if self.file_name.is_none() { + self.file_name = other.file_name; + } + self.methods.extend(other.methods); + self.fields.extend(other.fields); + } +} + +fn extract_file_name(line: &str) -> Option { + let json_start = line.find('{')?; + let meta = serde_json::from_str::(&line[json_start..]).ok()?; + meta.get("fileName") + .and_then(|v| v.as_str()) + .map(str::to_owned) +} + +fn parse_class_line(line: &str) -> Option<(&str, &str)> { + let line = line.strip_suffix(':')?; + let (original, obfuscated) = line.split_once(" -> ")?; + Some((original.trim(), obfuscated.trim())) +} + +fn split_container_prefix(qualified: &str) -> (&str, &str) { + if let Some(slash_pos) = qualified.rfind('/') { + (&qualified[..=slash_pos], &qualified[slash_pos + 1..]) + } else { + ("", qualified) + } +} + +fn parse_stacktrace_line_number(location: &str) -> Option { + location + .trim_start_matches('(') + .trim_end_matches(')') + .rsplit_once(':') + .and_then(|(_, num)| num.parse::().ok()) +} + +fn parse_member_line(line: &str, class: &mut ClassMapping) { + let Some((original_part, obfuscated)) = line.rsplit_once(" -> ") else { + return; + }; + + let obfuscated = obfuscated.trim(); + + if let Some(method) = parse_method_with_lines(original_part) { + class.methods.push(MethodMapping { + original_name: method.name, + obfuscated_name: obfuscated.to_owned(), + start_line: Some(method.start_line), + end_line: Some(method.end_line), + }); + return; + } + + if original_part.contains('(') { + if let Some(method_name) = parse_method_name(original_part) { + class.methods.push(MethodMapping { + original_name: method_name, + obfuscated_name: obfuscated.to_owned(), + start_line: None, + end_line: None, + }); + } + return; + } + + if let Some(field_name) = original_part.split_whitespace().last() { + class + .fields + .insert(obfuscated.to_owned(), field_name.to_owned()); + } +} + +struct ParsedMethod { + name: String, + start_line: u32, + end_line: u32, +} + +fn parse_method_name(s: &str) -> Option { + let paren_pos = s.find('(')?; + let before_paren = &s[..paren_pos]; + let method_name = before_paren + .rsplit_once(' ') + .map(|(_, name)| name) + .unwrap_or(before_paren); + Some(method_name.to_owned()) +} + +fn parse_method_with_lines(s: &str) -> Option { + let s = s.trim(); + let (start_str, rest) = s.split_once(':')?; + let start_line = start_str.parse().ok()?; + let (end_str, rest) = rest.split_once(':')?; + let end_line = end_str.parse().ok()?; + + let paren_pos = rest.find('(')?; + let before_paren = &rest[..paren_pos]; + let method_name = before_paren + .rsplit_once(' ') + .map(|(_, name)| name) + .unwrap_or(before_paren); + + Some(ParsedMethod { + name: method_name.to_owned(), + start_line, + end_line, + }) +} + +fn push_java_frame(out: &mut String, prefix: &str, rest: &str) { + out.reserve(prefix.len() + 3 + rest.len()); + out.push_str(prefix); + out.push_str("at "); + out.push_str(rest); +} + +fn push_u32(out: &mut String, value: u32) { + use std::fmt::Write; + let _ = write!(out, "{value}"); +} + +pub fn s3_prefix(project_id: Uuid, build_id: &str) -> String { + let mut key = String::with_capacity(36 + 1 + build_id.len() + 1 + PROGUARD_DIR.len() + 1); + use std::fmt::Write; + let _ = write!(key, "{project_id}"); + key.push('/'); + key.push_str(build_id); + key.push('/'); + key.push_str(PROGUARD_DIR); + key.push('/'); + key +} + +#[cfg(test)] +mod tests { + use super::{ProguardMapping, s3_prefix}; + use uuid::Uuid; + + #[test] + fn builds_s3_prefix_with_trailing_slash() { + let project_id = Uuid::parse_str("01954b9b-7b1d-72b8-8af3-f8d058f60b79").unwrap(); + assert_eq!( + s3_prefix(project_id, "build-1"), + "01954b9b-7b1d-72b8-8af3-f8d058f60b79/build-1/proguard/" + ); + } + + #[test] + fn retraces_r8_stacktrace() { + let mapping = ProguardMapping::parse( + r#"core.file.FileIO -> a.a.a: +# {"fileName":"FileIO.java","id":"sourceFile"} + 92:92:core.file.FileIO reload() -> c +"#, + ) + .unwrap(); + let input = "\ +java.lang.RuntimeException: oops +\tat a.a.a.c(SourceFile:92)"; + + assert_eq!( + mapping.retrace(input), + "\ +java.lang.RuntimeException: oops +\tat core.file.FileIO.reload(FileIO.java:92)" + ); + } + + #[test] + fn parse_many_bytes_combines_split_mapping_files() { + let parts = vec![ + b"core.file.FileIO -> a.a.a:\n# {\"fileName\":\"FileIO.java\",\"id\":\"sourceFile\"}\n 92:92:core.file.FileIO reload() -> c\n".to_vec(), + b"core.file.Validatable -> a.a.b:\n# {\"fileName\":\"Validatable.java\",\"id\":\"sourceFile\"}\n 26:26:core.file.FileIO validate() -> a_\n".to_vec(), + ]; + let mapping = ProguardMapping::parse_many_bytes(&parts).unwrap(); + let input = "\ +\tat a.a.a.c(SourceFile:92) +\tat a.a.b.a_(SourceFile:26)"; + + assert_eq!( + mapping.retrace(input), + "\ +\tat core.file.FileIO.reload(FileIO.java:92) +\tat core.file.Validatable.validate(Validatable.java:26)" + ); + } +} diff --git a/src/error_tracking/sourcemaps.rs b/src/error_tracking/sourcemaps.rs index 0dc6b05..7b390cd 100644 --- a/src/error_tracking/sourcemaps.rs +++ b/src/error_tracking/sourcemaps.rs @@ -1,7 +1,9 @@ +use crate::error_tracking::proguard::ProguardMapping; use aes_gcm::{Aes256Gcm, KeyInit, Nonce, aead::Aead}; use aws_sdk_s3::Client; use moka::future::Cache; use sourcemap::SourceMap; +use sqlx::PgPool; use std::sync::Arc; use std::time::Duration; use tracing::warn; @@ -11,13 +13,17 @@ const NONCE_LEN: usize = 12; const TAG_LEN: usize = 16; const MAP_CACHE_CAPACITY: u64 = 512; const MAP_CACHE_TTL: Duration = Duration::from_secs(600); +const BUILD_CACHE_CAPACITY: u64 = 2048; #[derive(Clone)] pub struct SourcemapResolver { client: Client, + db: PgPool, bucket: Arc, crypto: Arc, maps: Cache>>, + proguard_maps: Cache>>, + known_builds: Cache, } #[derive(Debug, Clone)] @@ -47,7 +53,7 @@ struct SourcemapCrypto { } impl SourcemapResolver { - pub fn from_env() -> Option { + pub fn from_env(db: PgPool) -> Option { let bucket = std::env::var("SOURCEMAPS_S3_BUCKET").ok()?; let endpoint = std::env::var("SOURCEMAPS_S3_ENDPOINT").ok()?; let access_key_id = std::env::var("SOURCEMAPS_S3_ACCESS_KEY_ID").ok()?; @@ -83,12 +89,21 @@ impl SourcemapResolver { Some(Self { client, + db, bucket: bucket.into(), crypto, maps: Cache::builder() .max_capacity(MAP_CACHE_CAPACITY) .time_to_idle(MAP_CACHE_TTL) .build(), + proguard_maps: Cache::builder() + .max_capacity(MAP_CACHE_CAPACITY) + .time_to_idle(MAP_CACHE_TTL) + .build(), + known_builds: Cache::builder() + .max_capacity(BUILD_CACHE_CAPACITY) + .time_to_idle(MAP_CACHE_TTL) + .build(), }) } @@ -101,6 +116,9 @@ impl SourcemapResolver { if build_id.is_empty() || stacktrace.is_empty() { return None; } + if !self.build_exists(project_id, build_id).await { + return None; + } let mut mapped_any = false; let mut mapped_stacktrace = String::with_capacity(stacktrace.len()); @@ -130,6 +148,56 @@ impl SourcemapResolver { }) } + pub async fn apply_r8( + &self, + project_id: Uuid, + build_id: &str, + stacktrace: &str, + ) -> Option { + if build_id.is_empty() || stacktrace.is_empty() { + return None; + } + if !self.build_exists(project_id, build_id).await { + return None; + } + + let mapping = self.load_proguard_mapping(project_id, build_id).await?; + let mapped_stacktrace = mapping.retrace(stacktrace); + if mapped_stacktrace == stacktrace { + return None; + } + + Some(MappedStacktrace { + stacktrace: mapped_stacktrace, + mapping_used: format!("r8:{build_id}"), + }) + } + + async fn build_exists(&self, project_id: Uuid, build_id: &str) -> bool { + let key = build_cache_key(project_id, build_id); + self.known_builds + .get_with(key, async move { + sqlx::query_scalar::<_, bool>( + r#" + SELECT EXISTS( + SELECT 1 + FROM project_build_ids + WHERE project_id = $1 AND build_id = $2 + ) + "#, + ) + .bind(project_id) + .bind(build_id) + .fetch_one(&self.db) + .await + .map_err(|error| { + warn!(%project_id, build_id, %error, "Failed to check sourcemap build id"); + }) + .unwrap_or(false) + }) + .await + } + async fn apply_frame( &self, project_id: Uuid, @@ -220,6 +288,111 @@ impl SourcemapResolver { .ok()?; Some(Arc::new(map)) } + + async fn load_proguard_mapping( + &self, + project_id: Uuid, + build_id: &str, + ) -> Option> { + let prefix = crate::error_tracking::proguard::s3_prefix(project_id, build_id); + self.proguard_maps + .get_with(prefix.clone(), async move { + self.fetch_proguard_mapping(&prefix).await + }) + .await + } + + async fn fetch_proguard_mapping(&self, prefix: &str) -> Option> { + let mut keys = self + .list_keys(prefix) + .await + .map_err(|error| { + warn!(prefix, %error, "Failed to list proguard mappings"); + }) + .ok()?; + keys.sort_unstable(); + if keys.is_empty() { + return None; + } + + let mut contents = Vec::with_capacity(keys.len()); + for key in keys { + contents.push(self.fetch_bytes(&key).await?); + } + + ProguardMapping::parse_many_bytes(&contents) + .map(Arc::new) + .map_err(|()| { + warn!(prefix, "Failed to parse proguard mappings"); + }) + .ok() + } + + async fn list_keys(&self, prefix: &str) -> Result, aws_sdk_s3::Error> { + let mut keys = Vec::new(); + let mut continuation_token: Option = None; + + loop { + let mut req = self + .client + .list_objects_v2() + .bucket(self.bucket.as_ref()) + .prefix(prefix); + + if let Some(token) = &continuation_token { + req = req.continuation_token(token); + } + + let resp = req.send().await?; + for object in resp.contents() { + if let Some(key) = object.key() { + keys.push(key.to_string()); + } + } + + if resp.is_truncated() != Some(true) { + break; + } + continuation_token = resp.next_continuation_token().map(Into::into); + } + + Ok(keys) + } + + async fn fetch_bytes(&self, key: &str) -> Option> { + let response = self + .client + .get_object() + .bucket(self.bucket.as_ref()) + .key(key) + .send() + .await + .map_err(|error| { + warn!(key, %error, "Failed to fetch mapping object"); + }) + .ok()?; + let encrypted = response + .body + .collect() + .await + .map_err(|error| { + warn!(key, %error, "Failed to read mapping object"); + }) + .ok()? + .to_vec(); + let compressed = self + .crypto + .decrypt(&encrypted) + .map_err(|()| { + warn!(key, "Failed to decrypt mapping object"); + }) + .ok()?; + zstd::stream::decode_all(compressed.as_slice()) + .map_err(|error| { + warn!(key, %error, "Failed to decompress mapping object"); + }) + .ok() + } } impl SourcemapCrypto { @@ -370,6 +543,15 @@ fn s3_key(project_id: Uuid, build_id: &str, file_name: &str) -> String { key } +fn build_cache_key(project_id: Uuid, build_id: &str) -> String { + let mut key = String::with_capacity(36 + 1 + build_id.len()); + use std::fmt::Write; + let _ = write!(key, "{project_id}"); + key.push('/'); + key.push_str(build_id); + key +} + #[cfg(test)] mod tests { use super::{normalize_file_name, parse_javascript_frame, s3_key}; diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index 9bed8ee..a9c6811 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -158,10 +158,6 @@ pub async fn enrich_with_sourcemap( resolver: Option<&SourcemapResolver>, mut row: ErrorOccurrenceV3Row, ) -> ErrorOccurrenceV3Row { - if row.platform != "web" || row.runtime != "browser" { - return row; - } - let Some(resolver) = resolver else { return row; }; @@ -170,12 +166,29 @@ pub async fn enrich_with_sourcemap( return row; } - if let Some(mapped) = resolver - .apply_javascript(row.project_id, build_id, &row.stacktrace) - .await - { - row.group_hash = - fingerprint::group_hash(&row.error_type, &row.error_message, &mapped.stacktrace); + let mapped = if row.platform == "web" && row.runtime == "browser" { + resolver + .apply_javascript(row.project_id, build_id, &row.stacktrace) + .await + } else if row.runtime == "java" { + resolver + .apply_r8(row.project_id, build_id, &row.stacktrace) + .await + } else { + None + }; + + if let Some(mapped) = mapped { + if row.runtime == "java" { + row.group_hash = java_fingerprint::group_hash( + &row.error_type, + &row.error_message, + &mapped.stacktrace, + ); + } else { + row.group_hash = + fingerprint::group_hash(&row.error_type, &row.error_message, &mapped.stacktrace); + } row.exact_hash = fingerprint::exact_hash(&row.error_type, &row.error_message, &mapped.stacktrace); row.mapped_stacktrace = Some(mapped.stacktrace); diff --git a/src/main.rs b/src/main.rs index 3b5b482..2842e88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -111,7 +111,7 @@ async fn main() { polar_client, &backup_path, backup_store_enabled, - error_tracking::sourcemaps::SourcemapResolver::from_env().map(Arc::new), + error_tracking::sourcemaps::SourcemapResolver::from_env(pool.clone()).map(Arc::new), ); let replay_storage = match replay_storage::ReplayStorage::from_env() { Ok(Some(storage)) => { From 531eb7f902cd464a883065c36646f87ec6814fea Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 10:27:04 +0200 Subject: [PATCH 05/17] context --- src/error_tracking/v3.rs | 93 ++++++++++++++++++++++++++++++++++------ src/handler/collect.rs | 10 +++-- src/handler/error.rs | 10 ++--- src/handler/mod.rs | 33 ++++++++------ src/handler/web.rs | 17 ++++---- src/models.rs | 44 +++++++++++++++++-- 6 files changed, 159 insertions(+), 48 deletions(-) diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index a9c6811..2863d62 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -3,7 +3,7 @@ use crate::error_tracking::{fingerprint, java_fingerprint}; use crate::models::ErrorTracking; use crate::tinybird::{ErrorOccurrenceV3Row, ModsEventRow, WebEventRow}; use chrono::Utc; -use serde_json::Value; +use serde_json::{Map, Value}; use std::collections::HashMap; use uuid::Uuid; @@ -15,7 +15,7 @@ pub struct WebOccurrenceInput<'a> { pub window_id: Option<&'a str>, pub sdk_name: Option<&'a str>, pub sdk_version: Option<&'a str>, - pub context: &'a str, + pub context: &'a Value, } pub struct ModsOccurrenceInput<'a> { @@ -24,7 +24,7 @@ pub struct ModsOccurrenceInput<'a> { pub server_id: &'a str, pub session_id: Option<&'a str>, pub sdk_version: Option<&'a str>, - pub context: &'a str, + pub context: &'a Value, } pub struct ErrorOnlyOccurrenceInput<'a> { @@ -33,7 +33,7 @@ pub struct ErrorOnlyOccurrenceInput<'a> { pub session_id: Option<&'a str>, pub sdk_name: Option<&'a str>, pub sdk_version: Option<&'a str>, - pub context: &'a str, + pub context: &'a Value, } pub fn build_web_occurrence( @@ -112,7 +112,7 @@ struct OccurrenceInput<'a> { runtime: &'a str, sdk_name: Option<&'a str>, sdk_version: Option<&'a str>, - context: &'a str, + context: &'a Value, group_hash: fn(&str, &str, &str) -> String, } @@ -150,7 +150,7 @@ fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorO runtime: input.runtime.to_string(), sdk_name: input.sdk_name.unwrap_or_default().to_string(), sdk_version: input.sdk_version.unwrap_or_default().to_string(), - context: input.context.to_string(), + context: occurrence_context(input.context, error.context.as_ref()), } } @@ -198,7 +198,7 @@ pub async fn enrich_with_sourcemap( row } -pub fn web_context(row: &WebEventRow, custom: &HashMap) -> String { +pub fn web_context(row: &WebEventRow, custom: &HashMap) -> Value { let mut context = match serde_json::to_value(row) { Ok(Value::Object(context)) => context, _ => serde_json::Map::new(), @@ -211,10 +211,10 @@ pub fn web_context(row: &WebEventRow, custom: &HashMap) -> String ); } - serde_json::to_string(&context).unwrap_or_else(|_| "{}".to_string()) + Value::Object(context) } -pub fn mods_context(row: &ModsEventRow, custom: &HashMap) -> String { +pub fn mods_context(row: &ModsEventRow, custom: &HashMap) -> Value { let mut context = match serde_json::to_value(row) { Ok(Value::Object(context)) => context, _ => serde_json::Map::new(), @@ -227,20 +227,68 @@ pub fn mods_context(row: &ModsEventRow, custom: &HashMap) -> Stri ); } - serde_json::to_string(&context).unwrap_or_else(|_| "{}".to_string()) + Value::Object(context) +} + +pub fn empty_context() -> Value { + Value::Object(Map::new()) +} + +pub fn request_context(provided: Option, fallback: impl FnOnce() -> Value) -> Value { + provided.unwrap_or_else(fallback) +} + +pub fn occurrence_context(base_context: &Value, error_context: Option<&Value>) -> String { + let Some(error_context) = error_context else { + return serialize_context(base_context); + }; + + let merged = merge_context_values(base_context.clone(), error_context.clone()); + serialize_context(&merged) +} + +fn serialize_context(context: &Value) -> String { + serde_json::to_string(context).unwrap_or_else(|_| "{}".to_string()) +} + +fn merge_context_values(base_context: Value, error_context: Value) -> Value { + match (base_context, error_context) { + (Value::Object(mut base), Value::Object(error)) => { + for (key, value) in error { + base.insert(key, value); + } + Value::Object(base) + } + (Value::Object(mut base), error) => { + base.insert("error".to_string(), error); + Value::Object(base) + } + (base, Value::Object(mut error)) => { + if !matches!(base, Value::Object(ref object) if object.is_empty()) { + error.insert("request".to_string(), base); + } + Value::Object(error) + } + (base, error) => { + let mut context = Map::new(); + context.insert("request".to_string(), base); + context.insert("error".to_string(), error); + Value::Object(context) + } + } } #[cfg(test)] mod tests { - use super::{ModsOccurrenceInput, build_mods_occurrence}; + use super::{ModsOccurrenceInput, build_mods_occurrence, empty_context, occurrence_context}; use crate::error_tracking::java_fingerprint; use crate::models::{Error, ErrorTracking}; + use serde_json::json; use uuid::Uuid; #[test] fn mods_occurrences_use_java_group_hash() { let error = ErrorTracking { - hash: "legacy-client-hash".to_string(), error: Error { error: "java.lang.RuntimeException".to_string(), message: Some("Failed for player 123".to_string()), @@ -252,6 +300,7 @@ mod tests { count: None, session_id: None, build_id: None, + context: None, handled: None, }; @@ -262,7 +311,7 @@ mod tests { server_id: "server-id", session_id: None, sdk_version: None, - context: "{}", + context: &empty_context(), }, &error, ); @@ -276,4 +325,22 @@ mod tests { ) ); } + + #[test] + fn occurrence_context_merges_error_context_over_base_context() { + let context = occurrence_context( + &json!({"page":"/checkout","plan":"pro"}), + Some(&json!({"plan":"enterprise","component":"pay-button"})), + ); + + let parsed: serde_json::Value = serde_json::from_str(&context).unwrap(); + assert_eq!( + parsed, + json!({ + "page": "/checkout", + "plan": "enterprise", + "component": "pay-button" + }) + ); + } } diff --git a/src/handler/collect.rs b/src/handler/collect.rs index f285d66..1e8a34a 100644 --- a/src/handler/collect.rs +++ b/src/handler/collect.rs @@ -4,7 +4,9 @@ use super::{ insert_error_occurrence_v3, insert_mods_event, load_project_context, success_response, }; use crate::batch_queue::{FailedRequest, RequestType, TrackingContext}; -use crate::error_tracking::v3::{ModsOccurrenceInput, build_mods_occurrence, mods_context}; +use crate::error_tracking::v3::{ + ModsOccurrenceInput, build_mods_occurrence, mods_context, request_context, +}; use crate::models::{AppState, Request}; use crate::validation::validate_and_filter_payload; use axum::body::Bytes; @@ -72,6 +74,7 @@ pub async fn collect( id, mut data, errors, + context, session_id, } = req; @@ -105,7 +108,7 @@ pub async fn collect( ); let error_v3_context = ctx .error_tracking_enabled - .then(|| mods_context(&event_row, &valid_custom)); + .then(|| request_context(context, || mods_context(&event_row, &valid_custom))); if let Err(e) = insert_mods_event( &state.batch_queue, @@ -121,12 +124,11 @@ pub async fn collect( return success_response(warnings); } - if let Some(errors) = errors + if let (Some(errors), Some(error_v3_context)) = (errors, error_v3_context.as_ref()) && !errors.is_empty() { let fallback_identity = server_id.to_string(); let sdk_version = event_row.plugin_version.as_deref(); - let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); for mut error in errors { if error.session_id.is_none() { error.session_id = session_id.clone(); diff --git a/src/handler/error.rs b/src/handler/error.rs index 9a019dc..7960ab1 100644 --- a/src/handler/error.rs +++ b/src/handler/error.rs @@ -3,7 +3,9 @@ use super::{ load_project_context, success_response, }; use crate::batch_queue::TrackingContext; -use crate::error_tracking::v3::{ErrorOnlyOccurrenceInput, build_error_only_occurrence}; +use crate::error_tracking::v3::{ + ErrorOnlyOccurrenceInput, build_error_only_occurrence, empty_context, request_context, +}; use crate::models::{AppState, ErrorTracking}; use axum::Json; use axum::extract::State; @@ -60,9 +62,7 @@ pub async fn error( organization_id: ctx.organization_id.as_deref().map(Into::into), }; - let context = payload - .context - .map(|v| serde_json::to_string(&v).unwrap_or_else(|_| "{}".to_string())); + let context = request_context(payload.context, empty_context); for mut error in payload.errors { if error.session_id.is_none() { @@ -79,7 +79,7 @@ pub async fn error( session_id: error.session_id.as_deref(), sdk_name: payload.sdk_name.as_deref(), sdk_version: payload.sdk_version.as_deref(), - context: context.as_deref().unwrap_or("{}"), + context: &context, }, &error, ); diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 4c4dccc..6dbfda7 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -574,6 +574,7 @@ async fn process_collect_request( id, mut data, errors, + context, session_id, } = req; @@ -600,21 +601,24 @@ async fn process_collect_request( &valid_custom, ); - let error_v3_context = ctx - .error_tracking_enabled - .then(|| crate::error_tracking::v3::mods_context(&event_row, &valid_custom)); + let error_v3_context = ctx.error_tracking_enabled.then(|| { + crate::error_tracking::v3::request_context(context, || { + crate::error_tracking::v3::mods_context(&event_row, &valid_custom) + }) + }); insert_mods_event(batch_queue, event_row.clone(), Some(tracking_ctx.clone())) .await .map_err(|_| "Failed to queue event".to_string())?; - if ctx.error_tracking_enabled - && let Some(errors) = errors - && !errors.is_empty() + if let (true, Some(errors), Some(error_v3_context)) = ( + ctx.error_tracking_enabled, + errors, + error_v3_context.as_ref(), + ) && !errors.is_empty() { let fallback_identity = server_id.to_string(); let sdk_version = event_row.plugin_version.as_deref(); - let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); for mut error in errors { if error.session_id.is_none() { error.session_id = session_id.clone(); @@ -734,11 +738,9 @@ async fn process_web_request( ); let should_process_errors = ctx.error_tracking_enabled && has_errors; let error_v3_context = should_process_errors.then(|| { - parsed - .context - .as_ref() - .map(|value| serde_json::to_string(value).unwrap_or_else(|_| "{}".to_string())) - .unwrap_or_else(|| crate::error_tracking::v3::web_context(&event_row, &valid_custom)) + crate::error_tracking::v3::request_context(parsed.context, || { + crate::error_tracking::v3::web_context(&event_row, &valid_custom) + }) }); if let Some(session_id) = parsed.session_id.as_deref() @@ -766,8 +768,11 @@ async fn process_web_request( .await .map_err(|_| "Failed to queue event".to_string())?; - if should_process_errors && let Some(errors) = parsed.errors { - let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); + if let (true, Some(errors), Some(error_v3_context)) = ( + should_process_errors, + parsed.errors, + error_v3_context.as_ref(), + ) { // The browser SDK sends this as `buildId`; the Tinybird v3 schema stores it as `release`. let release = parsed.build_id.as_deref(); for mut error in errors { diff --git a/src/handler/web.rs b/src/handler/web.rs index 86f89b8..b3c6201 100644 --- a/src/handler/web.rs +++ b/src/handler/web.rs @@ -5,7 +5,9 @@ use super::{ validate_hostname, }; use crate::batch_queue::{FailedRequest, RequestType, TrackingContext}; -use crate::error_tracking::v3::{WebOccurrenceInput, build_web_occurrence, web_context}; +use crate::error_tracking::v3::{ + WebOccurrenceInput, build_web_occurrence, request_context, web_context, +}; use crate::identity::resolve_person_for_distinct_id; use crate::models::{AppState, ErrorTracking}; use crate::utils::debounce::should_debounce; @@ -203,12 +205,8 @@ pub async fn web( &valid_custom, ); let should_process_errors = ctx.error_tracking_enabled && HAS_ERRORS(&errors); - let error_v3_context = should_process_errors.then(|| { - context - .as_ref() - .map(|value| serde_json::to_string(value).unwrap_or_else(|_| "{}".to_string())) - .unwrap_or_else(|| web_context(&event_row, &valid_custom)) - }); + let error_v3_context = should_process_errors + .then(|| request_context(context, || web_context(&event_row, &valid_custom))); if let Some(session_id) = session_id.as_deref() && let Some(replay_storage) = state.replay_storage.as_deref() @@ -238,8 +236,9 @@ pub async fn web( } } - if should_process_errors && let Some(error_list) = errors { - let error_v3_context = error_v3_context.as_deref().unwrap_or("{}"); + if let (true, Some(error_list), Some(error_v3_context)) = + (should_process_errors, errors, error_v3_context.as_ref()) + { // The browser SDK sends this as `buildId`; the Tinybird v3 schema stores it as `release`. let release = build_id.as_deref(); for mut error in error_list { diff --git a/src/models.rs b/src/models.rs index 9b180c9..fc8b077 100644 --- a/src/models.rs +++ b/src/models.rs @@ -37,7 +37,6 @@ pub struct Error { #[derive(Debug, Serialize, Deserialize)] pub struct ErrorTracking { - pub hash: String, #[serde(flatten)] pub error: Error, #[serde(default)] @@ -46,6 +45,8 @@ pub struct ErrorTracking { pub session_id: Option, #[serde(default, rename = "buildId")] pub build_id: Option, + #[serde(default)] + pub context: Option, pub handled: Option, } @@ -56,6 +57,8 @@ pub struct Request { pub id: RequestIdentifier, pub data: HashMap, pub errors: Option>, + #[serde(default)] + pub context: Option, #[serde(default, rename = "sessionId")] pub session_id: Option, } @@ -100,7 +103,6 @@ mod tests { }, "errors": [ { - "hash": "err_3d39cc9f28fb81e8b7064481c7deb8c0bb349cb0877558cc73b677c1fb9a704d", "error": "Error", "message": "Uncaught Error: Render error", "stack": ["line1", "line2"], @@ -129,11 +131,47 @@ mod tests { "hash": "err_3d39cc9f28fb81e8b7064481c7deb8c0bb349cb0877558cc73b677c1fb9a704d", "error": "Error", "message": "Uncaught Error: Render error", + "context": { "component": "checkout" }, "handled": true }"#; let result = serde_json::from_str::(json); assert!(result.is_ok(), "Failed to parse: {:?}", result.err()); - assert_eq!(result.unwrap().handled, Some(true)); + let error = result.unwrap(); + assert_eq!(error.handled, Some(true)); + assert_eq!( + error.context, + Some(serde_json::json!({ "component": "checkout" })) + ); + } + + #[test] + fn test_collect_request_parses_root_context() { + let json = r#"{ + "server_id": "f2a2b1b2-4d73-49f5-9daa-73ba95e4076d", + "data": {}, + "context": { "region": "eu" }, + "errors": [ + { + "error": "Error", + "message": "Render error", + "context": { "component": "checkout" } + } + ] + }"#; + + let result = serde_json::from_str::(json); + assert!(result.is_ok(), "Failed to parse: {:?}", result.err()); + + let request = result.unwrap(); + assert_eq!(request.context, Some(serde_json::json!({ "region": "eu" }))); + assert_eq!( + request + .errors + .as_ref() + .and_then(|errors| errors.first()) + .and_then(|error| error.context.as_ref()), + Some(&serde_json::json!({ "component": "checkout" })) + ); } } From 54b5439460510e205aec130e2d91fc5c9159ea94 Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 18:48:37 +0200 Subject: [PATCH 06/17] fix: session_id is web only --- src/error_tracking/v3.rs | 1 + src/handler/collect.rs | 9 ++------- src/handler/mod.rs | 8 ++------ src/models.rs | 8 ++++---- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index 2863d62..a06ab39 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -302,6 +302,7 @@ mod tests { build_id: None, context: None, handled: None, + sdk_version: None, }; let row = build_mods_occurrence( diff --git a/src/handler/collect.rs b/src/handler/collect.rs index 1e8a34a..ec580bd 100644 --- a/src/handler/collect.rs +++ b/src/handler/collect.rs @@ -75,7 +75,6 @@ pub async fn collect( mut data, errors, context, - session_id, } = req; let server_id = match id.value().parse::() { @@ -128,18 +127,14 @@ pub async fn collect( && !errors.is_empty() { let fallback_identity = server_id.to_string(); - let sdk_version = event_row.plugin_version.as_deref(); - for mut error in errors { - if error.session_id.is_none() { - error.session_id = session_id.clone(); - } + for error in errors { let occurrence = build_mods_occurrence( &ModsOccurrenceInput { project_id: ctx.project_id, release: error.build_id.as_deref(), server_id: fallback_identity.as_str(), session_id: error.session_id.as_deref(), - sdk_version, + sdk_version: error.sdk_version.as_deref(), context: error_v3_context, }, &error, diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 6dbfda7..561a4fc 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -575,7 +575,6 @@ async fn process_collect_request( mut data, errors, context, - session_id, } = req; let server_id = id @@ -619,16 +618,13 @@ async fn process_collect_request( { let fallback_identity = server_id.to_string(); let sdk_version = event_row.plugin_version.as_deref(); - for mut error in errors { - if error.session_id.is_none() { - error.session_id = session_id.clone(); - } + for error in errors { let occurrence = crate::error_tracking::v3::build_mods_occurrence( &crate::error_tracking::v3::ModsOccurrenceInput { project_id: ctx.project_id, release: error.build_id.as_deref(), server_id: fallback_identity.as_str(), - session_id: error.session_id.as_deref(), + session_id: None, sdk_version, context: error_v3_context, }, diff --git a/src/models.rs b/src/models.rs index fc8b077..923cb0c 100644 --- a/src/models.rs +++ b/src/models.rs @@ -41,12 +41,14 @@ pub struct ErrorTracking { pub error: Error, #[serde(default)] pub count: Option, - #[serde(default, rename = "sessionId")] - pub session_id: Option, #[serde(default, rename = "buildId")] pub build_id: Option, #[serde(default)] pub context: Option, + #[serde(default, rename = "sdkVersion")] + pub sdk_version: Option, + #[serde(default, skip_deserializing)] + pub session_id: Option, pub handled: Option, } @@ -59,8 +61,6 @@ pub struct Request { pub errors: Option>, #[serde(default)] pub context: Option, - #[serde(default, rename = "sessionId")] - pub session_id: Option, } #[derive(Debug, Serialize, Deserialize)] From 175b04023ad5178ff1a4bd634eace24be005978f Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 18:57:35 +0200 Subject: [PATCH 07/17] fix: flat custom context --- src/error_tracking/v3.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index a06ab39..a5795bf 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -204,11 +204,8 @@ pub fn web_context(row: &WebEventRow, custom: &HashMap) -> Value _ => serde_json::Map::new(), }; - if !custom.is_empty() { - context.insert( - "custom".to_string(), - Value::Object(custom.iter().map(|(k, v)| (k.clone(), v.clone())).collect()), - ); + for (key, value) in custom { + context.insert(key.clone(), value.clone()); } Value::Object(context) @@ -220,11 +217,8 @@ pub fn mods_context(row: &ModsEventRow, custom: &HashMap) -> Valu _ => serde_json::Map::new(), }; - if !custom.is_empty() { - context.insert( - "custom".to_string(), - Value::Object(custom.iter().map(|(k, v)| (k.clone(), v.clone())).collect()), - ); + for (key, value) in custom { + context.insert(key.clone(), value.clone()); } Value::Object(context) From b31323d5da495dc4163e5038f9567cd1a565a3b1 Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 19:09:58 +0200 Subject: [PATCH 08/17] fix: allow plugin_name --- src/handler/collect.rs | 1 + src/handler/mod.rs | 1 + src/models.rs | 3 +++ 3 files changed, 5 insertions(+) diff --git a/src/handler/collect.rs b/src/handler/collect.rs index ec580bd..12f01ff 100644 --- a/src/handler/collect.rs +++ b/src/handler/collect.rs @@ -75,6 +75,7 @@ pub async fn collect( mut data, errors, context, + project_name: _, } = req; let server_id = match id.value().parse::() { diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 561a4fc..e855a06 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -575,6 +575,7 @@ async fn process_collect_request( mut data, errors, context, + project_name: _, } = req; let server_id = id diff --git a/src/models.rs b/src/models.rs index 923cb0c..36ce28e 100644 --- a/src/models.rs +++ b/src/models.rs @@ -61,6 +61,9 @@ pub struct Request { pub errors: Option>, #[serde(default)] pub context: Option, + // TODO: handle project_name once project-level routing is supported. + #[serde(default)] + pub project_name: Option, } #[derive(Debug, Serialize, Deserialize)] From 859c5f6b524ac957267558749328a15004d8c0d4 Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 19:15:57 +0200 Subject: [PATCH 09/17] fix: allow kek stram idk --- src/handler/error.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/handler/error.rs b/src/handler/error.rs index 7960ab1..191b9fe 100644 --- a/src/handler/error.rs +++ b/src/handler/error.rs @@ -7,7 +7,7 @@ use crate::error_tracking::v3::{ ErrorOnlyOccurrenceInput, build_error_only_occurrence, empty_context, request_context, }; use crate::models::{AppState, ErrorTracking}; -use axum::Json; +use axum::body::Bytes; use axum::extract::State; use axum::http::{HeaderMap, StatusCode}; use axum::response::IntoResponse; @@ -35,13 +35,18 @@ pub(crate) struct ErrorRequest { pub async fn error( State(state): State, headers: HeaderMap, - Json(payload): Json, + body: Bytes, ) -> impl IntoResponse { let token = match get_authorization(&headers) { Some(t) => t, None => return error_response(StatusCode::UNAUTHORIZED, "Unauthorized"), }; + let payload: ErrorRequest = match serde_json::from_slice(&body) { + Ok(payload) => payload, + Err(_) => return error_response(StatusCode::BAD_REQUEST, "Invalid JSON"), + }; + let ctx = match load_project_context(&state.pool, &token).await { Ok(ctx) => ctx, Err(e) => return e, From 0a3de7e9f8790dc4a83e17048e5d4de234232b6a Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 19:21:24 +0200 Subject: [PATCH 10/17] fix: allow project_name in error --- src/handler/error.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/handler/error.rs b/src/handler/error.rs index 191b9fe..91b695d 100644 --- a/src/handler/error.rs +++ b/src/handler/error.rs @@ -26,6 +26,9 @@ pub(crate) struct ErrorRequest { build_id: Option, #[serde(default)] context: Option, + // TODO: handle project_name once project-level routing is supported. + #[serde(default)] + project_name: Option, #[serde(default, alias = "sdk_name")] sdk_name: Option, #[serde(default, alias = "sdk_version")] @@ -68,6 +71,7 @@ pub async fn error( }; let context = request_context(payload.context, empty_context); + let _project_name = payload.project_name; for mut error in payload.errors { if error.session_id.is_none() { From f38ce3316cc38d4b1564347e1ed73e9d887c4f79 Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 19:37:06 +0200 Subject: [PATCH 11/17] fix: stuff --- src/batch_queue/mod.rs | 2 -- src/error_tracking/v3.rs | 26 ++++++++------------------ src/handler/error.rs | 3 +++ src/tinybird.rs | 2 -- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/batch_queue/mod.rs b/src/batch_queue/mod.rs index 5c5a686..6bfef13 100644 --- a/src/batch_queue/mod.rs +++ b/src/batch_queue/mod.rs @@ -884,8 +884,6 @@ mod tests { user_id: String::new(), session_id: String::new(), window_id: String::new(), - platform: "web".to_string(), - runtime: "browser".to_string(), sdk_name: String::new(), sdk_version: String::new(), context: "{}".to_string(), diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index a5795bf..78f4f28 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -30,6 +30,7 @@ pub struct ModsOccurrenceInput<'a> { pub struct ErrorOnlyOccurrenceInput<'a> { pub project_id: Uuid, pub release: Option<&'a str>, + pub identifier: Option<&'a str>, pub session_id: Option<&'a str>, pub sdk_name: Option<&'a str>, pub sdk_version: Option<&'a str>, @@ -47,8 +48,6 @@ pub fn build_web_occurrence( user_id: input.user_id, session_id: input.session_id, window_id: input.window_id, - platform: "web", - runtime: "browser", sdk_name: input.sdk_name, sdk_version: input.sdk_version, context: input.context, @@ -69,8 +68,6 @@ pub fn build_mods_occurrence( user_id: Some(input.server_id), session_id: input.session_id, window_id: None, - platform: "minecraft-plugin", - runtime: "java", sdk_name: Some("minecraft-plugin"), sdk_version: input.sdk_version, context: input.context, @@ -88,11 +85,9 @@ pub fn build_error_only_occurrence( OccurrenceInput { project_id: input.project_id, release: input.release, - user_id: None, + user_id: input.identifier, session_id: input.session_id, window_id: None, - platform: "web", - runtime: "browser", sdk_name: input.sdk_name, sdk_version: input.sdk_version, context: input.context, @@ -108,8 +103,6 @@ struct OccurrenceInput<'a> { user_id: Option<&'a str>, session_id: Option<&'a str>, window_id: Option<&'a str>, - platform: &'a str, - runtime: &'a str, sdk_name: Option<&'a str>, sdk_version: Option<&'a str>, context: &'a Value, @@ -146,8 +139,6 @@ fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorO user_id: input.user_id.unwrap_or_default().to_string(), session_id: input.session_id.unwrap_or_default().to_string(), window_id: input.window_id.unwrap_or_default().to_string(), - platform: input.platform.to_string(), - runtime: input.runtime.to_string(), sdk_name: input.sdk_name.unwrap_or_default().to_string(), sdk_version: input.sdk_version.unwrap_or_default().to_string(), context: occurrence_context(input.context, error.context.as_ref()), @@ -166,20 +157,19 @@ pub async fn enrich_with_sourcemap( return row; } - let mapped = if row.platform == "web" && row.runtime == "browser" { - resolver - .apply_javascript(row.project_id, build_id, &row.stacktrace) - .await - } else if row.runtime == "java" { + let is_java = row.sdk_name == "minecraft-plugin"; + let mapped = if is_java { resolver .apply_r8(row.project_id, build_id, &row.stacktrace) .await } else { - None + resolver + .apply_javascript(row.project_id, build_id, &row.stacktrace) + .await }; if let Some(mapped) = mapped { - if row.runtime == "java" { + if is_java { row.group_hash = java_fingerprint::group_hash( &row.error_type, &row.error_message, diff --git a/src/handler/error.rs b/src/handler/error.rs index 91b695d..8a77a72 100644 --- a/src/handler/error.rs +++ b/src/handler/error.rs @@ -21,6 +21,8 @@ use tracing::warn; pub(crate) struct ErrorRequest { errors: Vec, #[serde(default)] + identifier: Option, + #[serde(default)] session_id: Option, #[serde(default)] build_id: Option, @@ -85,6 +87,7 @@ pub async fn error( &ErrorOnlyOccurrenceInput { project_id: ctx.project_id, release: error.build_id.as_deref(), + identifier: payload.identifier.as_deref(), session_id: error.session_id.as_deref(), sdk_name: payload.sdk_name.as_deref(), sdk_version: payload.sdk_version.as_deref(), diff --git a/src/tinybird.rs b/src/tinybird.rs index 567a96d..407f381 100644 --- a/src/tinybird.rs +++ b/src/tinybird.rs @@ -87,8 +87,6 @@ pub struct ErrorOccurrenceV3Row { pub user_id: String, pub session_id: String, pub window_id: String, - pub platform: String, - pub runtime: String, pub sdk_name: String, pub sdk_version: String, pub context: String, From e441ce336eee9020742d118568edd9ae9581762c Mon Sep 17 00:00:00 2001 From: Luca Date: Sat, 30 May 2026 19:51:00 +0200 Subject: [PATCH 12/17] fix naming --- src/batch_queue/mod.rs | 2 +- src/error_tracking/v3.rs | 2 +- src/tinybird.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/batch_queue/mod.rs b/src/batch_queue/mod.rs index 6bfef13..248347c 100644 --- a/src/batch_queue/mod.rs +++ b/src/batch_queue/mod.rs @@ -881,7 +881,7 @@ mod tests { stacktrace: "line1".to_string(), mapped_stacktrace: None, mapping_used: None, - user_id: String::new(), + identifier: String::new(), session_id: String::new(), window_id: String::new(), sdk_name: String::new(), diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index 78f4f28..634a9c2 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -136,7 +136,7 @@ fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorO stacktrace, mapped_stacktrace: None, mapping_used: None, - user_id: input.user_id.unwrap_or_default().to_string(), + identifier: input.user_id.unwrap_or_default().to_string(), session_id: input.session_id.unwrap_or_default().to_string(), window_id: input.window_id.unwrap_or_default().to_string(), sdk_name: input.sdk_name.unwrap_or_default().to_string(), diff --git a/src/tinybird.rs b/src/tinybird.rs index 407f381..23eebce 100644 --- a/src/tinybird.rs +++ b/src/tinybird.rs @@ -84,7 +84,7 @@ pub struct ErrorOccurrenceV3Row { pub mapped_stacktrace: Option, #[serde(skip_serializing_if = "Option::is_none")] pub mapping_used: Option, - pub user_id: String, + pub identifier: String, pub session_id: String, pub window_id: String, pub sdk_name: String, From 5e98ec7db1b8caf9349fb60e744a57aaa8e946ff Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 31 May 2026 10:49:59 +0200 Subject: [PATCH 13/17] fix: support count --- src/batch_queue/mod.rs | 1 + src/error_tracking/v3.rs | 7 ++++++- src/tinybird.rs | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/batch_queue/mod.rs b/src/batch_queue/mod.rs index 248347c..7f66ac3 100644 --- a/src/batch_queue/mod.rs +++ b/src/batch_queue/mod.rs @@ -886,6 +886,7 @@ mod tests { window_id: String::new(), sdk_name: String::new(), sdk_version: String::new(), + count: 1, context: "{}".to_string(), }), tracking: None, diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index 634a9c2..c3683b4 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -141,6 +141,10 @@ fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorO window_id: input.window_id.unwrap_or_default().to_string(), sdk_name: input.sdk_name.unwrap_or_default().to_string(), sdk_version: input.sdk_version.unwrap_or_default().to_string(), + count: error + .count + .and_then(|count| count.try_into().ok()) + .unwrap_or(1), context: occurrence_context(input.context, error.context.as_ref()), } } @@ -281,7 +285,7 @@ mod tests { ]), cause: None, }, - count: None, + count: Some(3), session_id: None, build_id: None, context: None, @@ -309,6 +313,7 @@ mod tests { "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)" ) ); + assert_eq!(row.count, 3); } #[test] diff --git a/src/tinybird.rs b/src/tinybird.rs index 23eebce..a94fa5c 100644 --- a/src/tinybird.rs +++ b/src/tinybird.rs @@ -89,6 +89,7 @@ pub struct ErrorOccurrenceV3Row { pub window_id: String, pub sdk_name: String, pub sdk_version: String, + pub count: u32, pub context: String, } From 133b935617eab3f6ce632f0666b3ed6729db88e1 Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 31 May 2026 17:03:10 +0200 Subject: [PATCH 14/17] exclude java internals --- src/error_tracking/java_fingerprint.rs | 30 +++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/src/error_tracking/java_fingerprint.rs b/src/error_tracking/java_fingerprint.rs index 1914ae7..57d6b6c 100644 --- a/src/error_tracking/java_fingerprint.rs +++ b/src/error_tracking/java_fingerprint.rs @@ -25,6 +25,7 @@ static LAMBDA_RE: LazyLock = LazyLock::new(|| { }); static WHITESPACE_RE: LazyLock = LazyLock::new(|| Regex::new(r"\s+").expect("valid whitespace regex")); +const JAVA_INTERNAL_FRAME_PREFIXES: &[&str] = &["java.", "javax.", "sun.", "com.sun.", "jdk."]; pub fn group_hash(error_type: &str, message: &str, stacktrace: &str) -> String { let normalized = normalize_for_grouping(error_type, message, stacktrace); @@ -39,7 +40,7 @@ fn normalize_for_grouping(error_type: &str, message: &str, stacktrace: &str) -> for line in stacktrace.lines().take(80) { let normalized = normalize_piece(line); - if normalized.is_empty() { + if normalized.is_empty() || is_java_internal_frame(&normalized) { continue; } out.push('\n'); @@ -63,6 +64,14 @@ fn normalize_piece(input: &str) -> String { value.trim().to_string() } +fn is_java_internal_frame(line: &str) -> bool { + let frame = line.strip_prefix("at ").unwrap_or(line); + + JAVA_INTERNAL_FRAME_PREFIXES + .iter() + .any(|prefix| frame.starts_with(prefix)) +} + #[cfg(test)] mod tests { use super::{group_hash, normalize_piece}; @@ -94,4 +103,23 @@ mod tests { assert_eq!(a, b); } + + #[test] + fn group_hash_ignores_java_internal_frames() { + let app_frame = "\tat com.example.Plugin.handle(Plugin.java:42)"; + let with_internals = [ + "\tat java.base/java.lang.Thread.run(Thread.java:840)", + "\tat javax.servlet.Filter.doFilter(Filter.java:10)", + "\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)", + "\tat com.sun.proxy.$Proxy1.invoke(Unknown Source)", + "\tat jdk.proxy2.$Proxy2.run(Unknown Source)", + app_frame, + ] + .join("\n"); + + let a = group_hash("RuntimeException", "Failed", app_frame); + let b = group_hash("RuntimeException", "Failed", &with_internals); + + assert_eq!(a, b); + } } From a430096f20e811ee21e01138fe662f706e84e230 Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 31 May 2026 17:13:11 +0200 Subject: [PATCH 15/17] exclude more frames --- src/error_tracking/java_fingerprint.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/error_tracking/java_fingerprint.rs b/src/error_tracking/java_fingerprint.rs index 57d6b6c..912b784 100644 --- a/src/error_tracking/java_fingerprint.rs +++ b/src/error_tracking/java_fingerprint.rs @@ -40,7 +40,7 @@ fn normalize_for_grouping(error_type: &str, message: &str, stacktrace: &str) -> for line in stacktrace.lines().take(80) { let normalized = normalize_piece(line); - if normalized.is_empty() || is_java_internal_frame(&normalized) { + if normalized.is_empty() || should_ignore_frame(&normalized) { continue; } out.push('\n'); @@ -64,6 +64,10 @@ fn normalize_piece(input: &str) -> String { value.trim().to_string() } +fn should_ignore_frame(line: &str) -> bool { + is_java_internal_frame(line) || line.trim_start().starts_with("...") +} + fn is_java_internal_frame(line: &str) -> bool { let frame = line.strip_prefix("at ").unwrap_or(line); @@ -122,4 +126,15 @@ mod tests { assert_eq!(a, b); } + + #[test] + fn group_hash_ignores_java_cause_elision() { + let app_frame = "\tat com.example.Plugin.handle(Plugin.java:42)"; + let with_elision = [app_frame, "\t... 23 more"].join("\n"); + + let a = group_hash("RuntimeException", "Failed", app_frame); + let b = group_hash("RuntimeException", "Failed", &with_elision); + + assert_eq!(a, b); + } } From c50ecb0cb6f2f0875cdf9c770cc01ab6831bd26b Mon Sep 17 00:00:00 2001 From: Luca Date: Sun, 31 May 2026 17:38:56 +0200 Subject: [PATCH 16/17] proper lang --- src/batch_queue/mod.rs | 45 +++++++++++++++++------- src/error_tracking/v3.rs | 75 ++++++++++++++++++++++++++-------------- src/handler/collect.rs | 1 + src/handler/error.rs | 21 ++++++++--- src/handler/mod.rs | 24 +++++++++---- src/handler/web.rs | 1 + 6 files changed, 119 insertions(+), 48 deletions(-) diff --git a/src/batch_queue/mod.rs b/src/batch_queue/mod.rs index 7f66ac3..1972fa2 100644 --- a/src/batch_queue/mod.rs +++ b/src/batch_queue/mod.rs @@ -2,6 +2,7 @@ mod backup_store; pub use backup_store::BackupStore; use crate::error_tracking::sourcemaps::SourcemapResolver; +use crate::error_tracking::v3::ErrorLanguage; use crate::polar::{PolarClient, UsageCounts}; use crate::tinybird::{ ErrorOccurrenceV3Row, ModsEventRow, ReplayRow, TinybirdClient, WebEventRow, WebVitalRow, @@ -79,6 +80,8 @@ pub enum QueuedEvent { }, ErrorOccurrenceV3 { row: Box, + #[serde(default)] + language: ErrorLanguage, #[serde(skip_serializing_if = "Option::is_none")] tracking: Option, }, @@ -112,7 +115,7 @@ const INITIAL_BATCH_CAPACITY: usize = 64; struct InMemoryBatch { web_events: Vec<(WebEventRow, Option)>, mods_events: Vec<(ModsEventRow, Option)>, - error_occurrences_v3: Vec<(ErrorOccurrenceV3Row, Option)>, + error_occurrences_v3: Vec<(ErrorOccurrenceV3Row, ErrorLanguage, Option)>, web_vitals: Vec<(WebVitalRow, Option)>, replays: Vec<(ReplayRow, Option)>, } @@ -150,9 +153,11 @@ impl InMemoryBatch { match event { QueuedEvent::WebEvent { row, tracking } => self.web_events.push((*row, tracking)), QueuedEvent::ModsEvent { row, tracking } => self.mods_events.push((row, tracking)), - QueuedEvent::ErrorOccurrenceV3 { row, tracking } => { - self.error_occurrences_v3.push((*row, tracking)) - } + QueuedEvent::ErrorOccurrenceV3 { + row, + language, + tracking, + } => self.error_occurrences_v3.push((*row, language, tracking)), QueuedEvent::WebVital { row, tracking } => self.web_vitals.push((row, tracking)), QueuedEvent::Replay { row, tracking } => self.replays.push((row, tracking)), } @@ -176,8 +181,9 @@ impl InMemoryBatch { result.extend( self.error_occurrences_v3 .into_iter() - .map(|(row, tracking)| QueuedEvent::ErrorOccurrenceV3 { + .map(|(row, language, tracking)| QueuedEvent::ErrorOccurrenceV3 { row: Box::new(row), + language, tracking, }), ); @@ -224,7 +230,19 @@ impl InMemoryBatch { count_usage!(&self.web_events, events); count_usage!(&self.mods_events, events); - count_usage!(&self.error_occurrences_v3, error_tracking); + for (_, _, ctx) in &self.error_occurrences_v3 { + if let Some(ctx) = ctx { + usage + .entry(Arc::clone(&ctx.owner_id)) + .or_insert_with(|| OwnerUsage { + counts: UsageCounts::default(), + token: Arc::clone(&ctx.token), + org: ctx.organization_id.as_ref().map(Arc::clone), + }) + .counts + .error_tracking += 1; + } + } count_usage!(&self.web_vitals, web_vitals); for (row, ctx) in &self.replays { if let Some(ctx) = ctx { @@ -251,7 +269,8 @@ impl InMemoryBatch { struct BatchSendResult { failed_web_events: Vec<(WebEventRow, Option)>, failed_mods_events: Vec<(ModsEventRow, Option)>, - failed_error_occurrences_v3: Vec<(ErrorOccurrenceV3Row, Option)>, + failed_error_occurrences_v3: + Vec<(ErrorOccurrenceV3Row, ErrorLanguage, Option)>, failed_web_vitals: Vec<(WebVitalRow, Option)>, failed_replays: Vec<(ReplayRow, Option)>, had_permanent_failure: bool, @@ -564,7 +583,7 @@ impl BatchQueue { let mods_event_rows: Vec<_> = mods_events.iter().map(|(e, _)| e).collect(); let error_occurrences_v3 = self.enrich_error_occurrences_v3(error_occurrences_v3).await; let error_occurrence_v3_rows: Vec<_> = - error_occurrences_v3.iter().map(|(e, _)| e).collect(); + error_occurrences_v3.iter().map(|(e, _, _)| e).collect(); let web_vital_rows: Vec<_> = web_vitals.iter().map(|(e, _)| e).collect(); let replay_rows: Vec<_> = replays.iter().map(|(e, _)| e).collect(); @@ -664,17 +683,18 @@ impl BatchQueue { async fn enrich_error_occurrences_v3( &self, - rows: Vec<(ErrorOccurrenceV3Row, Option)>, - ) -> Vec<(ErrorOccurrenceV3Row, Option)> { + rows: Vec<(ErrorOccurrenceV3Row, ErrorLanguage, Option)>, + ) -> Vec<(ErrorOccurrenceV3Row, ErrorLanguage, Option)> { if rows.is_empty() || self.sourcemaps.is_none() { return rows; } let resolver = self.sourcemaps.as_deref(); let mut enriched = Vec::with_capacity(rows.len()); - for (row, tracking) in rows { + for (row, language, tracking) in rows { enriched.push(( - crate::error_tracking::v3::enrich_with_sourcemap(resolver, row).await, + crate::error_tracking::v3::enrich_with_sourcemap(resolver, row, language).await, + language, tracking, )); } @@ -889,6 +909,7 @@ mod tests { count: 1, context: "{}".to_string(), }), + language: ErrorLanguage::Java, tracking: None, } } diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index c3683b4..fba4fef 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -7,6 +7,30 @@ use serde_json::{Map, Value}; use std::collections::HashMap; use uuid::Uuid; +#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize, Default)] +pub enum ErrorLanguage { + #[default] + Java, + Javascript, +} + +impl ErrorLanguage { + pub fn parse_optional(value: Option<&str>) -> Result { + match value.map(str::trim).filter(|value| !value.is_empty()) { + None | Some("java") => Ok(Self::Java), + Some("javascript") => Ok(Self::Javascript), + Some(_) => Err("Unsupported language. Expected java or javascript"), + } + } + + fn group_hash(self) -> fn(&str, &str, &str) -> String { + match self { + Self::Java => java_fingerprint::group_hash, + Self::Javascript => fingerprint::group_hash, + } + } +} + pub struct WebOccurrenceInput<'a> { pub project_id: Uuid, pub release: Option<&'a str>, @@ -34,6 +58,7 @@ pub struct ErrorOnlyOccurrenceInput<'a> { pub session_id: Option<&'a str>, pub sdk_name: Option<&'a str>, pub sdk_version: Option<&'a str>, + pub language: ErrorLanguage, pub context: &'a Value, } @@ -51,7 +76,7 @@ pub fn build_web_occurrence( sdk_name: input.sdk_name, sdk_version: input.sdk_version, context: input.context, - group_hash: fingerprint::group_hash, + language: ErrorLanguage::Javascript, }, error, ) @@ -71,7 +96,7 @@ pub fn build_mods_occurrence( sdk_name: Some("minecraft-plugin"), sdk_version: input.sdk_version, context: input.context, - group_hash: java_fingerprint::group_hash, + language: ErrorLanguage::Java, }, error, ) @@ -91,7 +116,7 @@ pub fn build_error_only_occurrence( sdk_name: input.sdk_name, sdk_version: input.sdk_version, context: input.context, - group_hash: fingerprint::group_hash, + language: input.language, }, error, ) @@ -106,7 +131,7 @@ struct OccurrenceInput<'a> { sdk_name: Option<&'a str>, sdk_version: Option<&'a str>, context: &'a Value, - group_hash: fn(&str, &str, &str) -> String, + language: ErrorLanguage, } fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorOccurrenceV3Row { @@ -128,7 +153,7 @@ fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorO // release behavior are verified in production data. environment: "prod".to_string(), release: input.release.unwrap_or_default().to_string(), - group_hash: (input.group_hash)(&error_type, &error_message, source_stack), + group_hash: (input.language.group_hash())(&error_type, &error_message, source_stack), exact_hash: fingerprint::exact_hash(&error_type, &error_message, source_stack), error_type, error_message, @@ -152,6 +177,7 @@ fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorO pub async fn enrich_with_sourcemap( resolver: Option<&SourcemapResolver>, mut row: ErrorOccurrenceV3Row, + language: ErrorLanguage, ) -> ErrorOccurrenceV3Row { let Some(resolver) = resolver else { return row; @@ -161,28 +187,22 @@ pub async fn enrich_with_sourcemap( return row; } - let is_java = row.sdk_name == "minecraft-plugin"; - let mapped = if is_java { - resolver - .apply_r8(row.project_id, build_id, &row.stacktrace) - .await - } else { - resolver - .apply_javascript(row.project_id, build_id, &row.stacktrace) - .await + let mapped = match language { + ErrorLanguage::Java => { + resolver + .apply_r8(row.project_id, build_id, &row.stacktrace) + .await + } + ErrorLanguage::Javascript => { + resolver + .apply_javascript(row.project_id, build_id, &row.stacktrace) + .await + } }; if let Some(mapped) = mapped { - if is_java { - row.group_hash = java_fingerprint::group_hash( - &row.error_type, - &row.error_message, - &mapped.stacktrace, - ); - } else { - row.group_hash = - fingerprint::group_hash(&row.error_type, &row.error_message, &mapped.stacktrace); - } + row.group_hash = + (language.group_hash())(&row.error_type, &row.error_message, &mapped.stacktrace); row.exact_hash = fingerprint::exact_hash(&row.error_type, &row.error_message, &mapped.stacktrace); row.mapped_stacktrace = Some(mapped.stacktrace); @@ -268,8 +288,11 @@ fn merge_context_values(base_context: Value, error_context: Value) -> Value { #[cfg(test)] mod tests { - use super::{ModsOccurrenceInput, build_mods_occurrence, empty_context, occurrence_context}; - use crate::error_tracking::java_fingerprint; + use super::{ + ErrorLanguage, ErrorOnlyOccurrenceInput, ModsOccurrenceInput, build_error_only_occurrence, + build_mods_occurrence, empty_context, occurrence_context, + }; + use crate::error_tracking::{fingerprint, java_fingerprint}; use crate::models::{Error, ErrorTracking}; use serde_json::json; use uuid::Uuid; diff --git a/src/handler/collect.rs b/src/handler/collect.rs index 12f01ff..ca693ed 100644 --- a/src/handler/collect.rs +++ b/src/handler/collect.rs @@ -143,6 +143,7 @@ pub async fn collect( if let Err(e) = insert_error_occurrence_v3( &state.batch_queue, occurrence, + crate::error_tracking::v3::ErrorLanguage::Java, Some(tracking_ctx.clone()), ) .await diff --git a/src/handler/error.rs b/src/handler/error.rs index 8a77a72..f15b611 100644 --- a/src/handler/error.rs +++ b/src/handler/error.rs @@ -4,7 +4,8 @@ use super::{ }; use crate::batch_queue::TrackingContext; use crate::error_tracking::v3::{ - ErrorOnlyOccurrenceInput, build_error_only_occurrence, empty_context, request_context, + ErrorLanguage, ErrorOnlyOccurrenceInput, build_error_only_occurrence, empty_context, + request_context, }; use crate::models::{AppState, ErrorTracking}; use axum::body::Bytes; @@ -35,6 +36,8 @@ pub(crate) struct ErrorRequest { sdk_name: Option, #[serde(default, alias = "sdk_version")] sdk_version: Option, + #[serde(default)] + language: Option, } pub async fn error( @@ -66,6 +69,11 @@ pub async fn error( return error_response(StatusCode::FORBIDDEN, "Error tracking is not enabled"); } + let language = match ErrorLanguage::parse_optional(payload.language.as_deref()) { + Ok(language) => language, + Err(message) => return error_response(StatusCode::BAD_REQUEST, message), + }; + let tracking_ctx = TrackingContext { owner_id: ctx.billing_customer_id.as_str().into(), token: token.into(), @@ -91,13 +99,18 @@ pub async fn error( session_id: error.session_id.as_deref(), sdk_name: payload.sdk_name.as_deref(), sdk_version: payload.sdk_version.as_deref(), + language, context: &context, }, &error, ); - if let Err(e) = - insert_error_occurrence_v3(&state.batch_queue, occurrence, Some(tracking_ctx.clone())) - .await + if let Err(e) = insert_error_occurrence_v3( + &state.batch_queue, + occurrence, + language, + Some(tracking_ctx.clone()), + ) + .await { return e; } diff --git a/src/handler/mod.rs b/src/handler/mod.rs index e855a06..858ead3 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -523,11 +523,13 @@ pub async fn insert_mods_event( pub async fn insert_error_occurrence_v3( batch_queue: &BatchQueue, row: ErrorOccurrenceV3Row, + language: crate::error_tracking::v3::ErrorLanguage, tracking: Option, ) -> Result<(), HandlerResponse> { batch_queue .queue_event(QueuedEvent::ErrorOccurrenceV3 { row: Box::new(row), + language, tracking, }) .await @@ -631,9 +633,14 @@ async fn process_collect_request( }, &error, ); - insert_error_occurrence_v3(batch_queue, occurrence, Some(tracking_ctx.clone())) - .await - .map_err(|_| "Failed to queue error".to_string())?; + insert_error_occurrence_v3( + batch_queue, + occurrence, + crate::error_tracking::v3::ErrorLanguage::Java, + Some(tracking_ctx.clone()), + ) + .await + .map_err(|_| "Failed to queue error".to_string())?; } } @@ -789,9 +796,14 @@ async fn process_web_request( }, &error, ); - insert_error_occurrence_v3(batch_queue, occurrence, Some(tracking_ctx.clone())) - .await - .map_err(|_| "Failed to queue error occurrence".to_string())?; + insert_error_occurrence_v3( + batch_queue, + occurrence, + crate::error_tracking::v3::ErrorLanguage::Javascript, + Some(tracking_ctx.clone()), + ) + .await + .map_err(|_| "Failed to queue error occurrence".to_string())?; } if let Some(session_id) = parsed.session_id.as_deref() diff --git a/src/handler/web.rs b/src/handler/web.rs index b3c6201..a4f837a 100644 --- a/src/handler/web.rs +++ b/src/handler/web.rs @@ -261,6 +261,7 @@ pub async fn web( if let Err(e) = insert_error_occurrence_v3( &state.batch_queue, occurrence, + crate::error_tracking::v3::ErrorLanguage::Javascript, Some(tracking_ctx.clone()), ) .await From 54432c39e787fdca3aa5d676aab247ee409fd27b Mon Sep 17 00:00:00 2001 From: Luca Date: Mon, 1 Jun 2026 08:42:07 +0200 Subject: [PATCH 17/17] remove message from hash --- src/error_tracking/fingerprint.rs | 22 ++++++---------------- src/error_tracking/java_fingerprint.rs | 18 +++++++----------- src/error_tracking/v3.rs | 15 +++++---------- 3 files changed, 18 insertions(+), 37 deletions(-) diff --git a/src/error_tracking/fingerprint.rs b/src/error_tracking/fingerprint.rs index 317d1d1..62ba1db 100644 --- a/src/error_tracking/fingerprint.rs +++ b/src/error_tracking/fingerprint.rs @@ -30,16 +30,14 @@ pub fn exact_hash(error_type: &str, message: &str, stacktrace: &str) -> String { ]) } -pub fn group_hash(error_type: &str, message: &str, stacktrace: &str) -> String { - let normalized = normalize_for_grouping(error_type, message, stacktrace); +pub fn group_hash(error_type: &str, stacktrace: &str) -> String { + let normalized = normalize_for_grouping(error_type, stacktrace); sha256_hex(&[normalized.as_bytes()]) } -fn normalize_for_grouping(error_type: &str, message: &str, stacktrace: &str) -> String { +fn normalize_for_grouping(error_type: &str, stacktrace: &str) -> String { let mut out = String::new(); out.push_str(&normalize_piece(error_type)); - out.push('\n'); - out.push_str(&normalize_piece(message)); for line in stacktrace.lines().take(50) { let normalized = normalize_piece(line); @@ -149,17 +147,9 @@ mod tests { } #[test] - fn group_hash_ignores_line_column_and_quoted_message_values() { - let a = group_hash( - "TypeError", - "Cannot read property 'name' of user 123", - " at render (/app/static/chunk.js:10:20)", - ); - let b = group_hash( - "TypeError", - "Cannot read property 'email' of user 456", - " at render (/app/static/chunk.js:99:1)", - ); + fn group_hash_ignores_line_columns() { + let a = group_hash("TypeError", " at render (/app/static/chunk.js:10:20)"); + let b = group_hash("TypeError", " at render (/app/static/chunk.js:99:1)"); assert_eq!(a, b); } diff --git a/src/error_tracking/java_fingerprint.rs b/src/error_tracking/java_fingerprint.rs index 912b784..0f0f584 100644 --- a/src/error_tracking/java_fingerprint.rs +++ b/src/error_tracking/java_fingerprint.rs @@ -27,16 +27,14 @@ static WHITESPACE_RE: LazyLock = LazyLock::new(|| Regex::new(r"\s+").expect("valid whitespace regex")); const JAVA_INTERNAL_FRAME_PREFIXES: &[&str] = &["java.", "javax.", "sun.", "com.sun.", "jdk."]; -pub fn group_hash(error_type: &str, message: &str, stacktrace: &str) -> String { - let normalized = normalize_for_grouping(error_type, message, stacktrace); +pub fn group_hash(error_type: &str, stacktrace: &str) -> String { + let normalized = normalize_for_grouping(error_type, stacktrace); sha256_hex(&[normalized.as_bytes()]) } -fn normalize_for_grouping(error_type: &str, message: &str, stacktrace: &str) -> String { +fn normalize_for_grouping(error_type: &str, stacktrace: &str) -> String { let mut out = String::new(); out.push_str(&normalize_piece(error_type)); - out.push('\n'); - out.push_str(&normalize_piece(message)); for line in stacktrace.lines().take(80) { let normalized = normalize_piece(line); @@ -96,12 +94,10 @@ mod tests { fn group_hash_ignores_jar_versions_and_line_numbers() { let a = group_hash( "java.lang.RuntimeException", - "Failed for player 123", "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)", ); let b = group_hash( "java.lang.RuntimeException", - "Failed for player 456", "\tat plugin-9.9.9.jar//com.example.Plugin.handle(Plugin.java:99)", ); @@ -121,8 +117,8 @@ mod tests { ] .join("\n"); - let a = group_hash("RuntimeException", "Failed", app_frame); - let b = group_hash("RuntimeException", "Failed", &with_internals); + let a = group_hash("RuntimeException", app_frame); + let b = group_hash("RuntimeException", &with_internals); assert_eq!(a, b); } @@ -132,8 +128,8 @@ mod tests { let app_frame = "\tat com.example.Plugin.handle(Plugin.java:42)"; let with_elision = [app_frame, "\t... 23 more"].join("\n"); - let a = group_hash("RuntimeException", "Failed", app_frame); - let b = group_hash("RuntimeException", "Failed", &with_elision); + let a = group_hash("RuntimeException", app_frame); + let b = group_hash("RuntimeException", &with_elision); assert_eq!(a, b); } diff --git a/src/error_tracking/v3.rs b/src/error_tracking/v3.rs index fba4fef..1c4afdf 100644 --- a/src/error_tracking/v3.rs +++ b/src/error_tracking/v3.rs @@ -23,7 +23,7 @@ impl ErrorLanguage { } } - fn group_hash(self) -> fn(&str, &str, &str) -> String { + fn group_hash(self) -> fn(&str, &str) -> String { match self { Self::Java => java_fingerprint::group_hash, Self::Javascript => fingerprint::group_hash, @@ -153,7 +153,7 @@ fn build_occurrence(input: OccurrenceInput<'_>, error: &ErrorTracking) -> ErrorO // release behavior are verified in production data. environment: "prod".to_string(), release: input.release.unwrap_or_default().to_string(), - group_hash: (input.language.group_hash())(&error_type, &error_message, source_stack), + group_hash: (input.language.group_hash())(&error_type, source_stack), exact_hash: fingerprint::exact_hash(&error_type, &error_message, source_stack), error_type, error_message, @@ -201,8 +201,7 @@ pub async fn enrich_with_sourcemap( }; if let Some(mapped) = mapped { - row.group_hash = - (language.group_hash())(&row.error_type, &row.error_message, &mapped.stacktrace); + row.group_hash = (language.group_hash())(&row.error_type, &mapped.stacktrace); row.exact_hash = fingerprint::exact_hash(&row.error_type, &row.error_message, &mapped.stacktrace); row.mapped_stacktrace = Some(mapped.stacktrace); @@ -288,11 +287,8 @@ fn merge_context_values(base_context: Value, error_context: Value) -> Value { #[cfg(test)] mod tests { - use super::{ - ErrorLanguage, ErrorOnlyOccurrenceInput, ModsOccurrenceInput, build_error_only_occurrence, - build_mods_occurrence, empty_context, occurrence_context, - }; - use crate::error_tracking::{fingerprint, java_fingerprint}; + use super::{ModsOccurrenceInput, build_mods_occurrence, empty_context, occurrence_context}; + use crate::error_tracking::java_fingerprint; use crate::models::{Error, ErrorTracking}; use serde_json::json; use uuid::Uuid; @@ -332,7 +328,6 @@ mod tests { row.group_hash, java_fingerprint::group_hash( "java.lang.RuntimeException", - "Failed for player 123", "\tat plugin-1.2.3.jar//com.example.Plugin.handle(Plugin.java:42)" ) );