From 40012051992d26f1febf1ffe35f5cdd2e33306c4 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 17:04:50 -0700 Subject: [PATCH 1/4] perf(eap-items): make sentry.timestamp range filters index-prunable `sentry.timestamp` is a normalized column that attribute_key_to_expression maps to `CAST(timestamp, 'Float64')`. Wrapping the primary-key/partition column in a CAST stops ClickHouse from using it for granule and partition pruning, so client-supplied range filters on sentry.timestamp degenerate into per-row scans on top of the mandatory time-range condition already applied on the raw column. Rewrite range comparisons (<, <=, >, >=) on sentry.timestamp to compare the raw DateTime `timestamp` column against toDateTime(value) so the condition is index- and partition-prunable and folds with the mandatory time bounds. This mirrors the pattern the trace-item-table resolver already uses to enable optimize_read_in_order. EQUALS/IN and SELECTing sentry.timestamp are unchanged. Co-Authored-By: Claude Opus 4.8 (1M context) --- snuba/web/rpc/common/common.py | 21 ++++++++++++ tests/web/rpc/test_common.py | 59 ++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py index 1286ed9a42..78d857e34b 100644 --- a/snuba/web/rpc/common/common.py +++ b/snuba/web/rpc/common/common.py @@ -13,6 +13,7 @@ from snuba import settings, state from snuba.protos.common import ( + COLUMN_PREFIX, MalformedAttributeException, type_array_to_membership_array_expression, ) @@ -660,6 +661,26 @@ def trace_item_filters_to_expression( else: v_expression = _attribute_value_to_expression(v) + # `sentry.timestamp` is a normalized column that `attribute_key_to_expression` + # maps to `CAST(timestamp, 'Float64')`. Wrapping the primary-key/partition + # column in a CAST prevents ClickHouse from using it for granule and partition + # pruning, so range filters on it scan far more data than necessary. It also + # duplicates the mandatory time-range condition (timestamp_in_range_condition) + # that is already applied on the raw column. For range comparisons, compare + # against the raw DateTime `timestamp` column instead so the condition is + # index- and partition-prunable and folds with the mandatory time bounds. + if k.name == f"{COLUMN_PREFIX}timestamp" and not (v.is_null or value_type == "val_null"): + raw_timestamp = column("timestamp") + raw_value = f.toDateTime(v_expression) + if op == ComparisonFilter.OP_LESS_THAN: + return f.less(raw_timestamp, raw_value) + if op == ComparisonFilter.OP_LESS_THAN_OR_EQUALS: + return f.lessOrEquals(raw_timestamp, raw_value) + if op == ComparisonFilter.OP_GREATER_THAN: + return f.greater(raw_timestamp, raw_value) + if op == ComparisonFilter.OP_GREATER_THAN_OR_EQUALS: + return f.greaterOrEquals(raw_timestamp, raw_value) + if op == ComparisonFilter.OP_EQUALS: _check_non_string_values_cannot_ignore_case(item_filter.comparison_filter) diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py index ae18b6a5be..6a64ec8a09 100644 --- a/tests/web/rpc/test_common.py +++ b/tests/web/rpc/test_common.py @@ -295,6 +295,65 @@ def test_exists_filter_on_non_coalesced_attribute_unchanged(self) -> None: assert expr.function_name == "mapContains" +class TestSentryTimestampFilter: + """Range filters on `sentry.timestamp` must target the raw DateTime `timestamp` + column (index- and partition-prunable) rather than CAST(timestamp, 'Float64').""" + + @staticmethod + def _range_filter(op: "ComparisonFilter.Op.ValueType") -> TraceItemFilter: + return TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.Type.TYPE_DOUBLE, name="sentry.timestamp"), + op=op, + value=AttributeValue(val_double=1781040732), + ) + ) + + @pytest.mark.parametrize( + "op,expected_function", + [ + (ComparisonFilter.OP_LESS_THAN, "less"), + (ComparisonFilter.OP_LESS_THAN_OR_EQUALS, "lessOrEquals"), + (ComparisonFilter.OP_GREATER_THAN, "greater"), + (ComparisonFilter.OP_GREATER_THAN_OR_EQUALS, "greaterOrEquals"), + ], + ) + def test_range_filter_uses_raw_timestamp_column( + self, op: "ComparisonFilter.Op.ValueType", expected_function: str + ) -> None: + expr = trace_item_filters_to_expression(self._range_filter(op), attribute_key_to_expression) + assert isinstance(expr, FunctionCall) + assert expr.function_name == expected_function + + # LHS is the bare `timestamp` column, not a CAST. + from snuba.query.expressions import Column as SnubaColumn + + lhs = expr.parameters[0] + assert isinstance(lhs, SnubaColumn) + assert lhs.column_name == "timestamp" + + # RHS is toDateTime(value) so it compares against a DateTime, enabling + # primary-key and partition (toMonday(timestamp)) pruning. + rhs = expr.parameters[1] + assert isinstance(rhs, FunctionCall) + assert rhs.function_name == "toDateTime" + + def test_equals_filter_unchanged(self) -> None: + """Non-range comparisons keep the existing CAST-based behavior.""" + expr = trace_item_filters_to_expression( + self._range_filter(ComparisonFilter.OP_EQUALS), attribute_key_to_expression + ) + # equals path wraps in the null-aware OR; the LHS of the equals is still the CAST. + assert isinstance(expr, FunctionCall) + assert expr.function_name == "or" + equals_expr = expr.parameters[0] + assert isinstance(equals_expr, FunctionCall) + assert equals_expr.function_name == "equals" + cast_expr = equals_expr.parameters[0] + assert isinstance(cast_expr, FunctionCall) + assert cast_expr.function_name == "cast" + + @pytest.mark.redis_db @pytest.mark.clickhouse_db def test_convert_rpc_exception_to_proto_packs_details() -> None: From 2053538224f6bb934691856b49b223bb7959b93b Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 17:51:58 -0700 Subject: [PATCH 2/4] perf(eap-items): dedupe timestamp conditions identical to the time-range bound Builds on the sentry.timestamp range-filter rewrite: emit the rewritten bound using the same canonical toDateTime('YYYY-MM-DD HH:MM:SS') form as the mandatory time-range condition, so a client filter whose bounds equal the request window produces a byte-identical expression. treeify_or_and_conditions now drops structurally-identical conjuncts from the top-level WHERE AND (A AND A == A) before re-nesting. Nothing downstream in the query pipeline dedupes conditions (the ClickHouse formatter joins AND args verbatim), so without this the duplicate bound would reach ClickHouse. Because every RPC endpoint treeifies, they all get the dedupe. Distinct ranges are left untouched -- the AND of them yields the tightest window -- and the pass is a no-op (no rebuild) when there are no duplicates. Co-Authored-By: Claude Opus 4.8 (1M context) --- snuba/web/rpc/common/common.py | 79 ++++++-- tests/web/rpc/test_common.py | 52 ++++++ ..._endpoint_time_series_timestamp_pruning.py | 171 ++++++++++++++++++ 3 files changed, 287 insertions(+), 15 deletions(-) create mode 100644 tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_timestamp_pruning.py diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py index 78d857e34b..56ea7c4f59 100644 --- a/snuba/web/rpc/common/common.py +++ b/snuba/web/rpc/common/common.py @@ -301,7 +301,12 @@ def treeify_or_and_conditions(query: Query) -> None: Note: does not apply to the conditions of a from_clause subquery (the nested one) this is bc transform_expressions is not implemented for composite queries + + This also removes structurally-identical conjuncts from the top-level WHERE AND + (`A AND A` == `A`); see dedupe_and_conditions. Nothing downstream in the query + pipeline dedupes conditions, so without this the duplicate would reach ClickHouse. """ + dedupe_and_conditions(query) def transform(exp: Expression) -> Expression: if not isinstance(exp, FunctionCall): @@ -317,6 +322,42 @@ def transform(exp: Expression) -> Expression: query.transform_expressions(transform) +def dedupe_and_conditions(query: Query) -> None: + """Remove structurally-identical conjuncts from the query's top-level AND. + + ``A AND A`` is equivalent to ``A``, so dropping exact duplicates never changes + results. The motivating case: when a client sends a ``sentry.timestamp`` range + filter whose bounds equal the mandatory time-range condition, both now produce + byte-identical expressions (see timestamp_seconds_to_datetime_literal) and + collapse to a single condition. Distinct ranges are left untouched (the AND of + them naturally yields the tightest window). + + Conditions nested inside OR/NOT subtrees are treated as opaque leaves -- we only + flatten the top-level AND. The query is left unchanged unless a duplicate is + actually found, so existing queries keep their exact condition tree. + """ + condition = query.get_condition() + if condition is None: + return + + def flatten_and(exp: Expression) -> list[Expression]: + if isinstance(exp, FunctionCall) and exp.function_name == "and": + flattened: list[Expression] = [] + for param in exp.parameters: + flattened.extend(flatten_and(param)) + return flattened + return [exp] + + conjuncts = flatten_and(condition) + deduped: list[Expression] = [] + for conjunct in conjuncts: + if conjunct not in deduped: + deduped.append(conjunct) + + if len(deduped) != len(conjuncts): + query.set_ast_condition(combine_and_conditions(deduped)) + + def add_existence_check_to_subscriptable_references(query: Query) -> None: def transform(exp: Expression) -> Expression: if not isinstance(exp, SubscriptableReference): @@ -668,10 +709,18 @@ def trace_item_filters_to_expression( # duplicates the mandatory time-range condition (timestamp_in_range_condition) # that is already applied on the raw column. For range comparisons, compare # against the raw DateTime `timestamp` column instead so the condition is - # index- and partition-prunable and folds with the mandatory time bounds. - if k.name == f"{COLUMN_PREFIX}timestamp" and not (v.is_null or value_type == "val_null"): + # index- and partition-prunable. We reuse timestamp_seconds_to_datetime_literal + # so a bound equal to the mandatory range is byte-identical to it and gets + # collapsed by dedupe_timestamp_conditions. + if k.name == f"{COLUMN_PREFIX}timestamp" and value_type in ( + "val_int", + "val_float", + "val_double", + ): + scalar_value = _scalar_value(v) + assert isinstance(scalar_value, (int, float)) raw_timestamp = column("timestamp") - raw_value = f.toDateTime(v_expression) + raw_value = timestamp_seconds_to_datetime_literal(int(scalar_value)) if op == ComparisonFilter.OP_LESS_THAN: return f.less(raw_timestamp, raw_value) if op == ComparisonFilter.OP_LESS_THAN_OR_EQUALS: @@ -853,20 +902,20 @@ def project_id_and_org_conditions(meta: RequestMeta) -> Expression: ) +def timestamp_seconds_to_datetime_literal(ts_seconds: int) -> Expression: + """Build the canonical ``toDateTime('YYYY-MM-DD HH:MM:SS')`` expression for a unix + timestamp. The mandatory time-range bounds and rewritten ``sentry.timestamp`` range + filters share this form so equal bounds produce structurally identical expressions + (which lets dedupe_timestamp_conditions collapse the duplicates).""" + return f.toDateTime( + datetime.fromtimestamp(ts_seconds, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + ) + + def timestamp_in_range_condition(start_ts: int, end_ts: int) -> Expression: return and_cond( - f.less( - column("timestamp"), - f.toDateTime( - datetime.fromtimestamp(end_ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") - ), - ), - f.greaterOrEquals( - column("timestamp"), - f.toDateTime( - datetime.fromtimestamp(start_ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") - ), - ), + f.less(column("timestamp"), timestamp_seconds_to_datetime_literal(end_ts)), + f.greaterOrEquals(column("timestamp"), timestamp_seconds_to_datetime_literal(start_ts)), ) diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py index 6a64ec8a09..a83697e5c5 100644 --- a/tests/web/rpc/test_common.py +++ b/tests/web/rpc/test_common.py @@ -31,14 +31,19 @@ from snuba.datasets.storages.factory import get_storage from snuba.datasets.storages.storage_key import StorageKey from snuba.protos.common import ATTRIBUTES_TO_COALESCE +from snuba.query.dsl import Functions as f +from snuba.query.dsl import and_cond, column from snuba.query.expressions import FunctionCall, Lambda, Literal +from snuba.query.logical import Query from snuba.web.rpc.common.common import ( _any_attribute_filter_to_expression, attribute_key_to_expression, + dedupe_and_conditions, next_monday, prev_monday, process_arrays, trace_item_filters_to_expression, + treeify_or_and_conditions, use_sampling_factor, ) from snuba.web.rpc.common.exceptions import ( @@ -354,6 +359,53 @@ def test_equals_filter_unchanged(self) -> None: assert cast_expr.function_name == "cast" +class TestDedupeAndConditions: + """dedupe_and_conditions removes structurally-identical top-level AND conjuncts. + treeify_or_and_conditions runs it, so every endpoint that treeifies gets it.""" + + @staticmethod + def _conjuncts(condition: object) -> list[FunctionCall]: + out: list[FunctionCall] = [] + if isinstance(condition, FunctionCall) and condition.function_name == "and": + for param in condition.parameters: + out.extend(TestDedupeAndConditions._conjuncts(param)) + elif isinstance(condition, FunctionCall): + out.append(condition) + return out + + def test_duplicate_conjunct_removed(self) -> None: + a = f.greaterOrEquals(column("timestamp"), f.toDateTime("2026-06-09 21:30:00")) + b = f.equals(column("project_id"), 1) + query = Query(from_clause=None, condition=and_cond(a, b, a)) + + dedupe_and_conditions(query) + + conjuncts = self._conjuncts(query.get_condition()) + assert len(conjuncts) == 2 + assert a in conjuncts and b in conjuncts + + def test_no_duplicates_leaves_condition_untouched(self) -> None: + a = f.greaterOrEquals(column("timestamp"), f.toDateTime("2026-06-09 21:30:00")) + b = f.less(column("timestamp"), f.toDateTime("2026-06-10 21:40:00")) + original = and_cond(a, b) + query = Query(from_clause=None, condition=original) + + dedupe_and_conditions(query) + + # Unchanged object identity: we only rebuild when a duplicate is found. + assert query.get_condition() is original + + def test_treeify_also_dedupes(self) -> None: + a = f.equals(column("project_id"), 1) + b = f.equals(column("organization_id"), 2) + query = Query(from_clause=None, condition=and_cond(a, b, a)) + + treeify_or_and_conditions(query) + + conjuncts = self._conjuncts(query.get_condition()) + assert len(conjuncts) == 2 + + @pytest.mark.redis_db @pytest.mark.clickhouse_db def test_convert_rpc_exception_to_proto_packs_details() -> None: diff --git a/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_timestamp_pruning.py b/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_timestamp_pruning.py new file mode 100644 index 0000000000..abdc21efd4 --- /dev/null +++ b/tests/web/rpc/v1/test_endpoint_time_series/test_endpoint_time_series_timestamp_pruning.py @@ -0,0 +1,171 @@ +"""Tests that `sentry.timestamp` range filters on the TimeSeries endpoint are +emitted against the raw `timestamp` DateTime column (never CAST to Float64) and +that conditions identical to the mandatory time-range bound are deduped. + +These build the query AST directly via `build_query` so we can assert on the +generated WHERE clause without needing a live ClickHouse. +""" + +from google.protobuf.timestamp_pb2 import Timestamp +from sentry_protos.snuba.v1.endpoint_time_series_pb2 import TimeSeriesRequest +from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemType +from sentry_protos.snuba.v1.trace_item_attribute_pb2 import ( + AttributeAggregation, + AttributeKey, + AttributeValue, + Function, +) +from sentry_protos.snuba.v1.trace_item_filter_pb2 import ( + AndFilter, + ComparisonFilter, + TraceItemFilter, +) + +from snuba.query.expressions import Column, Expression, FunctionCall +from snuba.web.rpc.v1.resolvers.R_eap_items.resolver_time_series import build_query + +# Aligned to a 600s boundary so the meta window matches what the time-series +# bucketing produces; values are arbitrary fixed unix seconds. +START_TS = 1781040600 +END_TS = 1781127600 + +_RANGE_OPS = {"less", "lessOrEquals", "greater", "greaterOrEquals"} + + +def _flatten_and(exp: Expression | None) -> list[Expression]: + if isinstance(exp, FunctionCall) and exp.function_name == "and": + out: list[Expression] = [] + for param in exp.parameters: + out.extend(_flatten_and(param)) + return out + return [] if exp is None else [exp] + + +def _is_raw_timestamp_column(exp: Expression) -> bool: + return isinstance(exp, Column) and exp.column_name == "timestamp" + + +def _timestamp_range_conditions(condition: Expression | None) -> list[FunctionCall]: + """All top-level range comparisons whose left side is the raw `timestamp` column.""" + conditions = [] + for conjunct in _flatten_and(condition): + if ( + isinstance(conjunct, FunctionCall) + and conjunct.function_name in _RANGE_OPS + and conjunct.parameters + and _is_raw_timestamp_column(conjunct.parameters[0]) + ): + conditions.append(conjunct) + return conditions + + +def _contains_cast_of_timestamp(exp: Expression | None) -> bool: + """True if any `cast(timestamp, ...)` (the Float64 form) appears in the tree.""" + if isinstance(exp, FunctionCall): + if ( + exp.function_name == "cast" + and exp.parameters + and _is_raw_timestamp_column(exp.parameters[0]) + ): + return True + return any(_contains_cast_of_timestamp(p) for p in exp.parameters) + return False + + +def _make_request(*timestamp_filters: TraceItemFilter) -> TimeSeriesRequest: + request_filter = None + if len(timestamp_filters) == 1: + request_filter = timestamp_filters[0] + elif len(timestamp_filters) > 1: + request_filter = TraceItemFilter(and_filter=AndFilter(filters=list(timestamp_filters))) + + return TimeSeriesRequest( + meta=RequestMeta( + project_ids=[1], + organization_id=1, + cogs_category="something", + referrer="something", + start_timestamp=Timestamp(seconds=START_TS), + end_timestamp=Timestamp(seconds=END_TS), + trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN, + ), + aggregations=[ + AttributeAggregation( + aggregate=Function.FUNCTION_COUNT, + key=AttributeKey(type=AttributeKey.TYPE_FLOAT, name="sentry.duration"), + label="count", + ), + ], + granularity_secs=600, + filter=request_filter or TraceItemFilter(), + ) + + +def _ts_filter(op: "ComparisonFilter.Op.ValueType", seconds: int) -> TraceItemFilter: + return TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.TYPE_DOUBLE, name="sentry.timestamp"), + op=op, + value=AttributeValue(val_double=seconds), + ) + ) + + +def test_no_timestamp_filter_keeps_only_mandatory_bounds() -> None: + query = build_query(_make_request()) + conds = _timestamp_range_conditions(query.get_condition()) + # Just the mandatory lower + upper bound from the request time window. + assert len(conds) == 2 + assert not _contains_cast_of_timestamp(query.get_condition()) + + +def test_identical_range_is_deduped_to_one() -> None: + # A sentry.timestamp filter whose bounds exactly match the meta time window. + # Using `>=` for the lower bound and `<` for the upper bound mirrors the + # mandatory condition, so the two pairs are byte-identical and collapse. + query = build_query( + _make_request( + _ts_filter(ComparisonFilter.OP_GREATER_THAN_OR_EQUALS, START_TS), + _ts_filter(ComparisonFilter.OP_LESS_THAN, END_TS), + ) + ) + conds = _timestamp_range_conditions(query.get_condition()) + # The duplicate lower/upper bounds are pruned -> only one of each remains. + assert len(conds) == 2 + functions = sorted(c.function_name for c in conds) + assert functions == ["greaterOrEquals", "less"] + assert not _contains_cast_of_timestamp(query.get_condition()) + + +def test_different_range_keeps_both_bounds_as_intersection() -> None: + # A tighter window than the meta range: both the meta bounds and the filter + # bounds survive; ClickHouse's AND yields the smaller (intersected) window. + query = build_query( + _make_request( + _ts_filter(ComparisonFilter.OP_GREATER_THAN_OR_EQUALS, START_TS + 600), + _ts_filter(ComparisonFilter.OP_LESS_THAN, END_TS - 600), + ) + ) + conds = _timestamp_range_conditions(query.get_condition()) + # 2 mandatory + 2 distinct filter bounds, none collapsed. + assert len(conds) == 4 + assert not _contains_cast_of_timestamp(query.get_condition()) + + +def test_more_than_two_distinct_bounds_all_kept_and_not_float() -> None: + # Three distinct sentry.timestamp bounds (none equal to a meta bound) plus the + # two mandatory bounds: all are kept, and crucially none use CAST(.. Float64). + query = build_query( + _make_request( + _ts_filter(ComparisonFilter.OP_GREATER_THAN, START_TS + 100), + _ts_filter(ComparisonFilter.OP_GREATER_THAN_OR_EQUALS, START_TS + 600), + _ts_filter(ComparisonFilter.OP_LESS_THAN, END_TS - 600), + ) + ) + condition = query.get_condition() + conds = _timestamp_range_conditions(condition) + assert len(conds) == 5 + # Every timestamp comparison compares the raw DateTime column, not a Float cast. + assert not _contains_cast_of_timestamp(condition) + for cond in conds: + assert _is_raw_timestamp_column(cond.parameters[0]) From 9e85f74a34c4de9f0106b01b183b095748c31e1c Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Fri, 12 Jun 2026 17:41:24 -0700 Subject: [PATCH 3/4] perf(eap-items): order by raw timestamp column for sentry.timestamp sorts When a client orders by sentry.timestamp outside the first-position single-project no-groupby fast path, _convert_order_by mapped it through attribute_key_to_expression, which emits CAST(timestamp, ). That cast stops ClickHouse from sorting against the primary-key timestamp column, the same way the CAST defeated index pruning for range filters. Order by the raw DateTime timestamp column instead. The cast is monotonic, so the result order is identical, but ClickHouse can now read in primary-key order. This covers timestamp ordering as a secondary key, alongside group bys, or across multiple projects. Co-Authored-By: Claude Opus 4.8 Agent transcript: https://claudescope.sentry.dev/share/_OVFmpNByU-pVFTu3TbDsraFzPaFx3DrJX8_H8CH0xk --- .../R_eap_items/resolver_trace_item_table.py | 12 +++++++++++- .../test_endpoint_trace_item_table.py | 18 ++++++++---------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py b/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py index 8d7260fa57..5f7215568c 100644 --- a/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py +++ b/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py @@ -271,10 +271,20 @@ def _convert_order_by( ] ) elif x.column.HasField("key"): + # `sentry.timestamp` maps to `CAST(timestamp, 'Float64')`; the cast stops + # ClickHouse from sorting against the primary-key `timestamp` column. The + # cast is monotonic, so ordering by the raw DateTime column yields the same + # order while staying read-in-order friendly. (The i == 0 / single-project / + # no-groupby branch above already expands to the full table sort key; this + # covers `sentry.timestamp` ordering anywhere else.) + if x.column.key.name == "sentry.timestamp": + expression: Expression = snuba_column("timestamp") + else: + expression = attribute_key_to_expression(x.column.key) res.append( OrderBy( direction=direction, - expression=attribute_key_to_expression(x.column.key), + expression=expression, ) ) elif x.column.HasField("conditional_aggregation"): diff --git a/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py b/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py index db37dda848..6ff1263827 100644 --- a/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py +++ b/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py @@ -4196,14 +4196,13 @@ def test_build_query_with_order_by_optimization_disabled_because_multiproject() request = _apply_labels_to_columns(request) query = build_query(request) + # The full sort-key expansion is disabled (multi-project), but ordering by + # sentry.timestamp still targets the raw `timestamp` column rather than a CAST so + # ClickHouse can read in primary-key order. assert query.get_orderby() == [ OrderBy( direction=OrderByDirection.DESC, - expression=f.cast( - snuba_column("timestamp"), - "String", - alias="sentry.timestamp_TYPE_STRING", - ), + expression=snuba_column("timestamp"), ), ] @@ -4239,14 +4238,13 @@ def test_build_query_with_order_by_optimization_disabled_because_groupby() -> No request = _apply_labels_to_columns(request) query = build_query(request) + # The full sort-key expansion is disabled (group by present), but ordering by + # sentry.timestamp still targets the raw `timestamp` column rather than a CAST so + # ClickHouse can read in primary-key order. assert query.get_orderby() == [ OrderBy( direction=OrderByDirection.DESC, - expression=f.cast( - snuba_column("timestamp"), - "String", - alias="sentry.timestamp_TYPE_STRING", - ), + expression=snuba_column("timestamp"), ), ] From 7e28b08ca9cb0efaf8b33f7be9cd8f6111477a5f Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 16 Jun 2026 10:56:03 -0700 Subject: [PATCH 4/4] fix(eap-items): Round fractional sentry.timestamp bounds to preserve filter semantics The raw `timestamp` column is a second-resolution DateTime, so truncating a fractional bound with int() shifted range boundaries versus the original CAST(timestamp, 'Float64') comparison. Round per-operator instead (ceil for =, floor for <=/>) so integer bounds stay byte-identical (dedupe still collapses them) and fractional bounds match the original semantics exactly. Also reuse the existing DATETIME_FORMAT constant. Co-Authored-By: Claude Opus 4.8 (1M context) --- snuba/web/rpc/common/common.py | 31 +++++++++++++++++++++++++------ tests/web/rpc/test_common.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py index 56ea7c4f59..d4bfe1562a 100644 --- a/snuba/web/rpc/common/common.py +++ b/snuba/web/rpc/common/common.py @@ -1,4 +1,5 @@ import json +import math from datetime import datetime, timedelta, timezone from typing import Any, Callable, TypeVar, cast @@ -12,6 +13,7 @@ ) from snuba import settings, state +from snuba.clickhouse import DATETIME_FORMAT from snuba.protos.common import ( COLUMN_PREFIX, MalformedAttributeException, @@ -720,15 +722,32 @@ def trace_item_filters_to_expression( scalar_value = _scalar_value(v) assert isinstance(scalar_value, (int, float)) raw_timestamp = column("timestamp") - raw_value = timestamp_seconds_to_datetime_literal(int(scalar_value)) + # `timestamp` is a second-resolution DateTime, so a fractional bound must be + # rounded to the integer second that preserves the original + # `CAST(timestamp, 'Float64') OP value` result: `<`/`>=` round up (ceil) and + # `<=`/`>` round down (floor). Integer bounds are left unchanged, so the + # rewritten mandatory-range bounds stay byte-identical and + # dedupe_timestamp_conditions can still collapse them. if op == ComparisonFilter.OP_LESS_THAN: - return f.less(raw_timestamp, raw_value) + return f.less( + raw_timestamp, + timestamp_seconds_to_datetime_literal(math.ceil(scalar_value)), + ) if op == ComparisonFilter.OP_LESS_THAN_OR_EQUALS: - return f.lessOrEquals(raw_timestamp, raw_value) + return f.lessOrEquals( + raw_timestamp, + timestamp_seconds_to_datetime_literal(math.floor(scalar_value)), + ) if op == ComparisonFilter.OP_GREATER_THAN: - return f.greater(raw_timestamp, raw_value) + return f.greater( + raw_timestamp, + timestamp_seconds_to_datetime_literal(math.floor(scalar_value)), + ) if op == ComparisonFilter.OP_GREATER_THAN_OR_EQUALS: - return f.greaterOrEquals(raw_timestamp, raw_value) + return f.greaterOrEquals( + raw_timestamp, + timestamp_seconds_to_datetime_literal(math.ceil(scalar_value)), + ) if op == ComparisonFilter.OP_EQUALS: _check_non_string_values_cannot_ignore_case(item_filter.comparison_filter) @@ -908,7 +927,7 @@ def timestamp_seconds_to_datetime_literal(ts_seconds: int) -> Expression: filters share this form so equal bounds produce structurally identical expressions (which lets dedupe_timestamp_conditions collapse the duplicates).""" return f.toDateTime( - datetime.fromtimestamp(ts_seconds, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + datetime.fromtimestamp(ts_seconds, tz=timezone.utc).strftime(DATETIME_FORMAT) ) diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py index a83697e5c5..208afb7aa1 100644 --- a/tests/web/rpc/test_common.py +++ b/tests/web/rpc/test_common.py @@ -343,6 +343,40 @@ def test_range_filter_uses_raw_timestamp_column( assert isinstance(rhs, FunctionCall) assert rhs.function_name == "toDateTime" + @pytest.mark.parametrize( + "op,expected_second", + [ + # `timestamp` is second-resolution, so fractional bounds round to the + # integer second that preserves `CAST(timestamp, 'Float64') OP value`: + # `<`/`>=` round up, `<=`/`>` round down. + (ComparisonFilter.OP_LESS_THAN, 1781040733), + (ComparisonFilter.OP_LESS_THAN_OR_EQUALS, 1781040732), + (ComparisonFilter.OP_GREATER_THAN, 1781040732), + (ComparisonFilter.OP_GREATER_THAN_OR_EQUALS, 1781040733), + ], + ) + def test_fractional_range_filter_rounds_to_preserve_semantics( + self, op: "ComparisonFilter.Op.ValueType", expected_second: int + ) -> None: + fractional = TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.Type.TYPE_DOUBLE, name="sentry.timestamp"), + op=op, + value=AttributeValue(val_double=1781040732.7), + ) + ) + expr = trace_item_filters_to_expression(fractional, attribute_key_to_expression) + assert isinstance(expr, FunctionCall) + rhs = expr.parameters[1] + assert isinstance(rhs, FunctionCall) + assert rhs.function_name == "toDateTime" + literal = rhs.parameters[0] + assert isinstance(literal, Literal) + expected = datetime.fromtimestamp(expected_second, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M:%S" + ) + assert literal.value == expected + def test_equals_filter_unchanged(self) -> None: """Non-range comparisons keep the existing CAST-based behavior.""" expr = trace_item_filters_to_expression(