getsentry · phacops · Jun 16, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 13, 2026
@@ -1,4 +1,5 @@
 import json
+import math
 from datetime import datetime, timedelta, timezone
 from typing import Any, Callable, TypeVar, cast
 
@@ -12,8 +13,10 @@
 )
 
 from snuba import settings, state
+from snuba.clickhouse import DATETIME_FORMAT
 from snuba.protos.common import (
     ATTRIBUTES_TO_COALESCE,
+    COLUMN_PREFIX,
     PROTO_TYPE_TO_ATTRIBUTE_COLUMN,
     PROTO_TYPE_TO_CLICKHOUSE_TYPE,
     MalformedAttributeException,
@@ -237,7 +240,12 @@ def treeify_or_and_conditions(query: Query) -> None:
 
     Note: does not apply to the conditions of a from_clause subquery (the nested one)
         this is bc transform_expressions is not implemented for composite queries
+
+    This also removes structurally-identical conjuncts from the top-level WHERE AND
+    (`A AND A` == `A`); see dedupe_and_conditions. Nothing downstream in the query
+    pipeline dedupes conditions, so without this the duplicate would reach ClickHouse.
     """
+    dedupe_and_conditions(query)
 
     def transform(exp: Expression) -> Expression:
         if not isinstance(exp, FunctionCall):
@@ -253,6 +261,42 @@ def transform(exp: Expression) -> Expression:
     query.transform_expressions(transform)
 
 
+def dedupe_and_conditions(query: Query) -> None:
+    """Remove structurally-identical conjuncts from the query's top-level AND.
+
+    ``A AND A`` is equivalent to ``A``, so dropping exact duplicates never changes
+    results. The motivating case: when a client sends a ``sentry.timestamp`` range
+    filter whose bounds equal the mandatory time-range condition, both now produce
+    byte-identical expressions (see timestamp_seconds_to_datetime_literal) and
+    collapse to a single condition. Distinct ranges are left untouched (the AND of
+    them naturally yields the tightest window).
+
+    Conditions nested inside OR/NOT subtrees are treated as opaque leaves -- we only
+    flatten the top-level AND. The query is left unchanged unless a duplicate is
+    actually found, so existing queries keep their exact condition tree.
+    """
+    condition = query.get_condition()
+    if condition is None:
+        return
+
+    def flatten_and(exp: Expression) -> list[Expression]:
+        if isinstance(exp, FunctionCall) and exp.function_name == "and":
+            flattened: list[Expression] = []
+            for param in exp.parameters:
+                flattened.extend(flatten_and(param))
+            return flattened
+        return [exp]
+
+    conjuncts = flatten_and(condition)
+    deduped: list[Expression] = []
+    for conjunct in conjuncts:
+        if conjunct not in deduped:
+            deduped.append(conjunct)
+
+    if len(deduped) != len(conjuncts):
+        query.set_ast_condition(combine_and_conditions(deduped))
+
+
 # Map column -> the Nullable type its values resolve to, used to emit a typed
 # NULL as the `if(...)` else-branch below. A bare, untyped NULL default trips
 # the ClickHouse "new analyzer": it canonicalizes the very same `if(...)`
@@ -725,6 +769,51 @@ def trace_item_filters_to_expression(
         else:
             v_expression = _attribute_value_to_expression(v)
 
+        # `sentry.timestamp` is a normalized column that `attribute_key_to_expression`
+        # maps to `CAST(timestamp, 'Float64')`. Wrapping the primary-key/partition
+        # column in a CAST prevents ClickHouse from using it for granule and partition
+        # pruning, so range filters on it scan far more data than necessary. It also
+        # duplicates the mandatory time-range condition (timestamp_in_range_condition)
+        # that is already applied on the raw column. For range comparisons, compare
+        # against the raw DateTime `timestamp` column instead so the condition is
+        # index- and partition-prunable. We reuse timestamp_seconds_to_datetime_literal
+        # so a bound equal to the mandatory range is byte-identical to it and gets
+        # collapsed by dedupe_timestamp_conditions.
+        if k.name == f"{COLUMN_PREFIX}timestamp" and value_type in (
+            "val_int",
+            "val_float",
+            "val_double",
+        ):
+            scalar_value = _scalar_value(v)
+            assert isinstance(scalar_value, (int, float))
+            raw_timestamp = column("timestamp")
+            # `timestamp` is a second-resolution DateTime, so a fractional bound must be
+            # rounded to the integer second that preserves the original
+            # `CAST(timestamp, 'Float64') OP value` result: `<`/`>=` round up (ceil) and
+            # `<=`/`>` round down (floor). Integer bounds are left unchanged, so the
+            # rewritten mandatory-range bounds stay byte-identical and
+            # dedupe_timestamp_conditions can still collapse them.
+            if op == ComparisonFilter.OP_LESS_THAN:
+                return f.less(
+                    raw_timestamp,
+                    timestamp_seconds_to_datetime_literal(math.ceil(scalar_value)),
+                )
+            if op == ComparisonFilter.OP_LESS_THAN_OR_EQUALS:
+                return f.lessOrEquals(
+                    raw_timestamp,
+                    timestamp_seconds_to_datetime_literal(math.floor(scalar_value)),
+                )
+            if op == ComparisonFilter.OP_GREATER_THAN:
+                return f.greater(
+                    raw_timestamp,
+                    timestamp_seconds_to_datetime_literal(math.floor(scalar_value)),
+                )
+            if op == ComparisonFilter.OP_GREATER_THAN_OR_EQUALS:
+                return f.greaterOrEquals(
+                    raw_timestamp,
+                    timestamp_seconds_to_datetime_literal(math.ceil(scalar_value)),
+                )
+
         if op == ComparisonFilter.OP_EQUALS:
             _check_non_string_values_cannot_ignore_case(item_filter.comparison_filter)
 
@@ -955,20 +1044,20 @@ def project_id_and_org_conditions(meta: RequestMeta) -> Expression:
     )
 
 
+def timestamp_seconds_to_datetime_literal(ts_seconds: int) -> Expression:
+    """Build the canonical ``toDateTime('YYYY-MM-DD HH:MM:SS')`` expression for a unix
+    timestamp. The mandatory time-range bounds and rewritten ``sentry.timestamp`` range
+    filters share this form so equal bounds produce structurally identical expressions
+    (which lets dedupe_timestamp_conditions collapse the duplicates)."""
+    return f.toDateTime(
+        datetime.fromtimestamp(ts_seconds, tz=timezone.utc).strftime(DATETIME_FORMAT)
+    )
+
+
 def timestamp_in_range_condition(start_ts: int, end_ts: int) -> Expression:
     return and_cond(
-        f.less(
-            column("timestamp"),
-            f.toDateTime(
-                datetime.fromtimestamp(end_ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
-            ),
-        ),
-        f.greaterOrEquals(
-            column("timestamp"),
-            f.toDateTime(
-                datetime.fromtimestamp(start_ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
-            ),
-        ),
+        f.less(column("timestamp"), timestamp_seconds_to_datetime_literal(end_ts)),
+        f.greaterOrEquals(column("timestamp"), timestamp_seconds_to_datetime_literal(start_ts)),
     )
 
 

@@ -296,10 +296,20 @@ def _convert_order_by(
                 ]
             )
         elif x.column.HasField("key"):
+            # `sentry.timestamp` maps to `CAST(timestamp, 'Float64')`; the cast stops
+            # ClickHouse from sorting against the primary-key `timestamp` column. The
+            # cast is monotonic, so ordering by the raw DateTime column yields the same
+            # order while staying read-in-order friendly. (The i == 0 / single-project /
+            # no-groupby branch above already expands to the full table sort key; this
+            # covers `sentry.timestamp` ordering anywhere else.)
+            if x.column.key.name == "sentry.timestamp":
+                expression: Expression = snuba_column("timestamp")
+            else:
+                expression = attribute_key_to_expression(x.column.key)
             res.append(
                 OrderBy(
                     direction=direction,
-                    expression=attribute_key_to_expression(x.column.key),
+                    expression=expression,
                 )
             )
         elif x.column.HasField("conditional_aggregation"):

@@ -32,19 +32,24 @@
 from snuba.datasets.storages.factory import get_storage
 from snuba.datasets.storages.storage_key import StorageKey
 from snuba.protos.common import ATTRIBUTES_TO_COALESCE
+from snuba.query.dsl import Functions as f
+from snuba.query.dsl import and_cond, column
 from snuba.query.expressions import (
     Expression,
     FunctionCall,
     Lambda,
     Literal,
     SubscriptableReference,
 )
+from snuba.query.logical import Query
 from snuba.web.rpc.common.common import (
     _any_attribute_filter_to_expression,
     attribute_key_to_expression,
+    dedupe_and_conditions,
     next_monday,
     prev_monday,
     trace_item_filters_to_expression,
+    treeify_or_and_conditions,
     use_sampling_factor,
 )
 from snuba.web.rpc.common.exceptions import (
@@ -269,6 +274,146 @@ def test_exists_filter_on_non_coalesced_attribute_unchanged(self) -> None:
         assert expr.function_name == "mapContains"
 
 
+class TestSentryTimestampFilter:
+    """Range filters on `sentry.timestamp` must target the raw DateTime `timestamp`
+    column (index- and partition-prunable) rather than CAST(timestamp, 'Float64')."""
+
+    @staticmethod
+    def _range_filter(op: "ComparisonFilter.Op.ValueType") -> TraceItemFilter:
+        return TraceItemFilter(
+            comparison_filter=ComparisonFilter(
+                key=AttributeKey(type=AttributeKey.Type.TYPE_DOUBLE, name="sentry.timestamp"),
+                op=op,
+                value=AttributeValue(val_double=1781040732),
+            )
+        )
+
+    @pytest.mark.parametrize(
+        "op,expected_function",
+        [
+            (ComparisonFilter.OP_LESS_THAN, "less"),
+            (ComparisonFilter.OP_LESS_THAN_OR_EQUALS, "lessOrEquals"),
+            (ComparisonFilter.OP_GREATER_THAN, "greater"),
+            (ComparisonFilter.OP_GREATER_THAN_OR_EQUALS, "greaterOrEquals"),
+        ],
+    )
+    def test_range_filter_uses_raw_timestamp_column(
+        self, op: "ComparisonFilter.Op.ValueType", expected_function: str
+    ) -> None:
+        expr = trace_item_filters_to_expression(self._range_filter(op), attribute_key_to_expression)
+        assert isinstance(expr, FunctionCall)
+        assert expr.function_name == expected_function
+
+        # LHS is the bare `timestamp` column, not a CAST.
+        from snuba.query.expressions import Column as SnubaColumn
+
+        lhs = expr.parameters[0]
+        assert isinstance(lhs, SnubaColumn)
+        assert lhs.column_name == "timestamp"
+
+        # RHS is toDateTime(value) so it compares against a DateTime, enabling
+        # primary-key and partition (toMonday(timestamp)) pruning.
+        rhs = expr.parameters[1]
+        assert isinstance(rhs, FunctionCall)
+        assert rhs.function_name == "toDateTime"
+
+    @pytest.mark.parametrize(
+        "op,expected_second",
+        [
+            # `timestamp` is second-resolution, so fractional bounds round to the
+            # integer second that preserves `CAST(timestamp, 'Float64') OP value`:
+            # `<`/`>=` round up, `<=`/`>` round down.
+            (ComparisonFilter.OP_LESS_THAN, 1781040733),
+            (ComparisonFilter.OP_LESS_THAN_OR_EQUALS, 1781040732),
+            (ComparisonFilter.OP_GREATER_THAN, 1781040732),
+            (ComparisonFilter.OP_GREATER_THAN_OR_EQUALS, 1781040733),
+        ],
+    )
+    def test_fractional_range_filter_rounds_to_preserve_semantics(
+        self, op: "ComparisonFilter.Op.ValueType", expected_second: int
+    ) -> None:
+        fractional = TraceItemFilter(
+            comparison_filter=ComparisonFilter(
+                key=AttributeKey(type=AttributeKey.Type.TYPE_DOUBLE, name="sentry.timestamp"),
+                op=op,
+                value=AttributeValue(val_double=1781040732.7),
+            )
+        )
+        expr = trace_item_filters_to_expression(fractional, attribute_key_to_expression)
+        assert isinstance(expr, FunctionCall)
+        rhs = expr.parameters[1]
+        assert isinstance(rhs, FunctionCall)
+        assert rhs.function_name == "toDateTime"
+        literal = rhs.parameters[0]
+        assert isinstance(literal, Literal)
+        expected = datetime.fromtimestamp(expected_second, tz=timezone.utc).strftime(
+            "%Y-%m-%d %H:%M:%S"
+        )
+        assert literal.value == expected
+
+    def test_equals_filter_unchanged(self) -> None:
+        """Non-range comparisons keep the existing CAST-based behavior."""
+        expr = trace_item_filters_to_expression(
+            self._range_filter(ComparisonFilter.OP_EQUALS), attribute_key_to_expression
+        )
+        # equals path wraps in the null-aware OR; the LHS of the equals is still the CAST.
+        assert isinstance(expr, FunctionCall)
+        assert expr.function_name == "or"
+        equals_expr = expr.parameters[0]
+        assert isinstance(equals_expr, FunctionCall)
+        assert equals_expr.function_name == "equals"
+        cast_expr = equals_expr.parameters[0]
+        assert isinstance(cast_expr, FunctionCall)
+        assert cast_expr.function_name == "cast"
+
+
+class TestDedupeAndConditions:
+    """dedupe_and_conditions removes structurally-identical top-level AND conjuncts.
+    treeify_or_and_conditions runs it, so every endpoint that treeifies gets it."""
+
+    @staticmethod
+    def _conjuncts(condition: object) -> list[FunctionCall]:
+        out: list[FunctionCall] = []
+        if isinstance(condition, FunctionCall) and condition.function_name == "and":
+            for param in condition.parameters:
+                out.extend(TestDedupeAndConditions._conjuncts(param))
+        elif isinstance(condition, FunctionCall):
+            out.append(condition)
+        return out
+
+    def test_duplicate_conjunct_removed(self) -> None:
+        a = f.greaterOrEquals(column("timestamp"), f.toDateTime("2026-06-09 21:30:00"))
+        b = f.equals(column("project_id"), 1)
+        query = Query(from_clause=None, condition=and_cond(a, b, a))
+
+        dedupe_and_conditions(query)
+
+        conjuncts = self._conjuncts(query.get_condition())
+        assert len(conjuncts) == 2
+        assert a in conjuncts and b in conjuncts
+
+    def test_no_duplicates_leaves_condition_untouched(self) -> None:
+        a = f.greaterOrEquals(column("timestamp"), f.toDateTime("2026-06-09 21:30:00"))
+        b = f.less(column("timestamp"), f.toDateTime("2026-06-10 21:40:00"))
+        original = and_cond(a, b)
+        query = Query(from_clause=None, condition=original)
+
+        dedupe_and_conditions(query)
+
+        # Unchanged object identity: we only rebuild when a duplicate is found.
+        assert query.get_condition() is original
+
+    def test_treeify_also_dedupes(self) -> None:
+        a = f.equals(column("project_id"), 1)
+        b = f.equals(column("organization_id"), 2)
+        query = Query(from_clause=None, condition=and_cond(a, b, a))
+
+        treeify_or_and_conditions(query)
+
+        conjuncts = self._conjuncts(query.get_condition())
+        assert len(conjuncts) == 2
+
+
 class TestAnalyzerSafeFilters:
     """Per-key filters on map-backed attributes are built from a NULL-free
     (mapContains, arrayElement) pair instead of the legacy