getsentry · phacops · Jun 16, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
@@ -13,6 +13,9 @@
 
 from snuba import settings, state
 from snuba.protos.common import (
+    ATTRIBUTES_TO_COALESCE,
+    PROTO_TYPE_TO_ATTRIBUTE_COLUMN,
+    PROTO_TYPE_TO_CLICKHOUSE_TYPE,
     MalformedAttributeException,
     type_array_to_membership_array_expression,
 )
@@ -24,6 +27,7 @@
 from snuba.query.dsl import Functions as f
 from snuba.query.dsl import (
     and_cond,
+    arrayElement,
     column,
     in_cond,
     literal,
@@ -289,6 +293,94 @@ def transform(exp: Expression) -> Expression:
     query.transform_expressions(transform)
 
 
+def _contains_subscriptable_reference(exp: Expression) -> bool:
+    """True if ``exp`` contains a ``SubscriptableReference`` (a map lookup) — the
+    map-backed keys we route through ``_map_backed_operands``."""
+    found = False
+
+    def visit(node: Expression) -> Expression:
+        nonlocal found
+        if isinstance(node, SubscriptableReference):
+            found = True
+        return node
+
+    exp.transform(visit)
+    return found
+
+
+def _map_backed_operands(k: AttributeKey) -> tuple[Expression, Expression]:
+    """Build ``(value, exists)`` for a map-backed key directly, NULL-free.
+
+    The legacy ``if(mapContains, arrayElement, NULL)`` form puts a NULL constant
+    into the predicate; the new ClickHouse analyzer canonicalizes that NULL
+    inconsistently between an aggregate's projection name and its computed block
+    (notably inside ``in(...)``), so the column isn't found ("Code: 10 ... Not
+    found column ... in block"). We write ``mapContains`` + ``arrayElement``
+    directly, so no NULL appears:
+
+        value  = arrayElement(k)                                  # single key
+        value  = multiIf(mapContains(k1), arrayElement(k1), ...,  # coalesced:
+                         arrayElement(kn))                         # first present
+        exists = mapContains(k1) OR ... OR mapContains(kn)
+
+    Callers build ``cmp(value, v)``, wrapped in ``and(exists, ...)`` only when the
+    literal could be the column default (see ``_comparison_can_match_column_default``)
+    — then ``exists``, not the value, distinguishes a missing key from a stored
+    empty value, since ``arrayElement`` reads both as the '' / 0 default. The
+    ``multiIf`` else is only reached when all keys are absent.
+
+    Built without aliases: conditions don't need them, and an alias here would
+    collide with the SELECT clause's existence ``if(...)`` for the same attribute
+    (same alias, different expression). Only valid for map-backed keys
+    (string/int/float); booleans / normalized columns / arrays take the legacy
+    path, and SELECT keeps its own ``coalesce(...)`` representation untouched.
+    """
+    col_name = PROTO_TYPE_TO_ATTRIBUTE_COLUMN[k.type]
+    names = [k.name] + list(ATTRIBUTES_TO_COALESCE.get(k.name, ()))
+
+    def _value(name: str) -> Expression:
+        elem = arrayElement(None, column(col_name), literal(name))
+        # ints live in the float map and surface as Int64, so they need a cast.
+        if k.type == AttributeKey.Type.TYPE_INT:
+            return f.cast(elem, f"Nullable({PROTO_TYPE_TO_CLICKHOUSE_TYPE[k.type]})")
+        return elem
+
+    values = [_value(name) for name in names]
+    existences = [f.mapContains(column(col_name), literal(name)) for name in names]
+
+    exists = combine_or_conditions(existences) if len(existences) > 1 else existences[0]
+    if len(values) == 1:
+        value: Expression = values[0]
+    else:
+        args: list[Expression] = []
+        for cond, val in zip(existences[:-1], values[:-1]):
+            args.extend((cond, val))
+        args.append(values[-1])
+        value = f.multiIf(*args)
+    return value, exists
+
+
+def _analyzer_safe_in_expression(
+    k: AttributeKey,
+    v_expression: Expression,
+    *,
+    negated: bool,
+    ignore_case: bool = False,
+    guard: bool = True,
+) -> Expression:
+    """``IN`` / ``NOT IN`` as ``[not] in(value, set)``, wrapped in
+    ``and(exists, ...)`` only when ``guard`` is set (see ``_map_backed_operands``
+    and ``_comparison_can_match_column_default``). The value lists carry only
+    scalars, so the set never contains NULL — the legacy ``has(set, NULL)`` branch
+    was always ``false``."""
+    value, exists = _map_backed_operands(k)
+    if ignore_case:
+        value = f.lower(value)
+    membership = in_cond(value, v_expression)
+    present = and_cond(exists, membership) if guard else membership
+    return not_cond(present) if negated else present
+
+
 def _scalar_value(v: AttributeValue) -> bool | str | int | float | None:
     """Extract a Python scalar from an AttributeValue proto."""
     match v.WhichOneof("value"):
@@ -302,12 +394,29 @@ def _scalar_value(v: AttributeValue) -> bool | str | int | float | None:
             return v.val_double
         case "val_int":
             return v.val_int
-        case None:
+        case "val_null" | None:
             return None
         case other:
             raise NotImplementedError(f"not a scalar AttributeValue type: {other}")
 
 
+def _comparison_can_match_column_default(
+    attr_type: AttributeKey.Type.ValueType, v: AttributeValue, value_type: str
+) -> bool:
+    """True if any compared literal is the column default ('' / 0), which an
+    absent key also reads as — so the ``mapContains`` guard is needed to avoid
+    matching absent keys. When no literal is the default the guard is dropped (the
+    simplest form). LIKE/NOT_LIKE always guard; null comparisons are separate."""
+    default: str | int = "" if attr_type == AttributeKey.Type.TYPE_STRING else 0
+    if value_type == "val_array":
+        scalars: list[Any] = [_scalar_value(x) for x in v.val_array.values]
+    elif value_type in ("val_str_array", "val_int_array", "val_float_array", "val_double_array"):
+        scalars = list(getattr(v, value_type).values)
+    else:
+        scalars = [_scalar_value(v)]
+    return any(s == default for s in scalars)
+
+
 def _attribute_value_to_expression(v: AttributeValue) -> Expression:
     """Convert an AttributeValue proto to a Snuba Expression."""
     value_type = v.WhichOneof("value")
@@ -610,7 +719,8 @@ def trace_item_filters_to_expression(
         if value_type is None:
             raise BadSnubaRPCRequestException("comparison does not have a right hand side")
 
-        if v.is_null or value_type == "val_null":
+        v_is_null = v.is_null or value_type == "val_null"
+        if v_is_null:
             v_expression: Expression = literal(None)
         else:
             v_expression = _attribute_value_to_expression(v)
@@ -622,6 +732,21 @@ def trace_item_filters_to_expression(
                 return _type_array_includes_scalar_expression(
                     k_expression, v, item_filter.comparison_filter.ignore_case
                 )
+            if _contains_subscriptable_reference(k_expression):
+                # Map-backed: NULL-free (exists, value) form (see _map_backed_operands).
+                value, exists = _map_backed_operands(k)
+                if v_is_null:  # `attr = null` <=> key absent
+                    return not_cond(exists)
+                lhs, rhs = (
+                    (f.lower(value), f.lower(v_expression))
+                    if item_filter.comparison_filter.ignore_case
+                    else (value, v_expression)
+                )
+                cmp = f.equals(lhs, rhs)
+                # mapContains guard only needed when '' / 0 could match an absent key.
+                if _comparison_can_match_column_default(k.type, v, value_type):
+                    return and_cond(exists, cmp)
+                return cmp
             else:
                 expr = (
                     f.equals(f.lower(k_expression), f.lower(v_expression))
@@ -642,6 +767,19 @@ def trace_item_filters_to_expression(
                         k_expression, v, item_filter.comparison_filter.ignore_case
                     )
                 )
+            if _contains_subscriptable_reference(k_expression):
+                # Negation of OP_EQUALS; an absent key is "not equal".
+                value, exists = _map_backed_operands(k)
+                if v_is_null:  # `attr != null` <=> key present
+                    return exists
+                lhs, rhs = (
+                    (f.lower(value), f.lower(v_expression))
+                    if item_filter.comparison_filter.ignore_case
+                    else (value, v_expression)
+                )
+                if _comparison_can_match_column_default(k.type, v, value_type):
+                    return not_cond(and_cond(exists, f.equals(lhs, rhs)))
+                return f.notEquals(lhs, rhs)
             else:
                 expr = (
                     f.notEquals(f.lower(k_expression), f.lower(v_expression))
@@ -670,6 +808,9 @@ def trace_item_filters_to_expression(
                     "the LIKE comparison is only supported on string and array keys"
                 )
             comparison_function = f.ilike if item_filter.comparison_filter.ignore_case else f.like
+            if _contains_subscriptable_reference(k_expression):
+                value, exists = _map_backed_operands(k)
+                return and_cond(exists, comparison_function(value, v_expression))
             return comparison_function(k_expression, v_expression)
         if op == ComparisonFilter.OP_NOT_LIKE:
             if k.type == AttributeKey.Type.TYPE_ARRAY:
@@ -688,6 +829,11 @@ def trace_item_filters_to_expression(
                 raise BadSnubaRPCRequestException(
                     "the NOT LIKE comparison is only supported on string and array keys"
                 )
+            if _contains_subscriptable_reference(k_expression):
+                # Negation of OP_LIKE; an absent key is "not like".
+                like_fn = f.ilike if item_filter.comparison_filter.ignore_case else f.like
+                value, exists = _map_backed_operands(k)
+                return not_cond(and_cond(exists, like_fn(value, v_expression)))
             comparison_function = (
                 f.notILike if item_filter.comparison_filter.ignore_case else f.notLike
             )
@@ -706,8 +852,8 @@ def trace_item_filters_to_expression(
             return f.greaterOrEquals(k_expression, v_expression)
         if op == ComparisonFilter.OP_IN:
             _check_non_string_values_cannot_ignore_case(item_filter.comparison_filter)
-            if item_filter.comparison_filter.ignore_case:
-                k_expression = f.lower(k_expression)
+            ignore_case = item_filter.comparison_filter.ignore_case
+            if ignore_case:
                 if value_type == "val_str_array":
                     v_expression = literals_array(
                         None,
@@ -718,19 +864,30 @@ def trace_item_filters_to_expression(
                         None,
                         [literal(elem.val_str.lower()) for elem in v.val_array.values],
                     )
-            expr = in_cond(k_expression, v_expression)
             # note: v_expression must be an array
             # we redefine the way in works for nulls
             # now null in ['hi', null] is true
+            if _contains_subscriptable_reference(k_expression):
+                # Map-backed: keep the existence if(...) out of in() (see helper).
+                return _analyzer_safe_in_expression(
+                    k,
+                    v_expression,
+                    negated=False,
+                    ignore_case=ignore_case,
+                    guard=_comparison_can_match_column_default(k.type, v, value_type),
+                )
+            if ignore_case:
+                k_expression = f.lower(k_expression)
+            expr = in_cond(k_expression, v_expression)
             expr_with_null = or_cond(
                 expr,
                 and_cond(f.isNull(k_expression), f.has(v_expression, literal(None))),
             )
             return expr_with_null
         if op == ComparisonFilter.OP_NOT_IN:
             _check_non_string_values_cannot_ignore_case(item_filter.comparison_filter)
-            if item_filter.comparison_filter.ignore_case:
-                k_expression = f.lower(k_expression)
+            ignore_case = item_filter.comparison_filter.ignore_case
+            if ignore_case:
                 if value_type == "val_str_array":
                     v_expression = literals_array(
                         None,
@@ -741,10 +898,21 @@ def trace_item_filters_to_expression(
                         None,
                         [literal(elem.val_str.lower()) for elem in v.val_array.values],
                     )
-            expr = not_cond(in_cond(k_expression, v_expression))
             # note: v_expression must be an array
             # we redefine the way not in works for nulls
             # now null not in ['hi'] is true
+            if _contains_subscriptable_reference(k_expression):
+                # Map-backed: keep the existence if(...) out of in() (see helper).
+                return _analyzer_safe_in_expression(
+                    k,
+                    v_expression,
+                    negated=True,
+                    ignore_case=ignore_case,
+                    guard=_comparison_can_match_column_default(k.type, v, value_type),
+                )
+            if ignore_case:
+                k_expression = f.lower(k_expression)
+            expr = not_cond(in_cond(k_expression, v_expression))
             expr_with_null = or_cond(
                 expr,
                 and_cond(

@@ -37,12 +37,18 @@
 from snuba.query.dsl import Functions as f
 from snuba.query.dsl import and_cond, if_cond, in_cond, literal, literals_array, or_cond
 from snuba.query.dsl import column as snuba_column
+from snuba.query.expressions import (
+    Column as ColumnExpr,
+)
 from snuba.query.expressions import (
     DangerousRawSQL,
     Expression,
     FunctionCall,
     SubscriptableReference,
 )
+from snuba.query.expressions import (
+    Literal as LiteralExpr,
+)
 from snuba.query.logical import Query
 from snuba.query.query_settings import HTTPQuerySettings
 from snuba.request import Request as SnubaRequest
@@ -52,6 +58,7 @@
     add_existence_check_to_subscriptable_references,
     attribute_key_to_expression,
     base_conditions_and,
+    get_field_existence_expression,
     timestamp_in_range_condition,
     trace_item_filters_to_expression,
     treeify_or_and_conditions,
@@ -137,31 +144,49 @@ def _apply_virtual_columns(
     mapped_column_to_context = {c.to_column_name: c for c in virtual_column_contexts}
 
     def transform_expressions(expression: Expression) -> Expression:
-        # virtual columns will show up as `attr_str[virtual_column_name]` or `attr_num[virtual_column_name]`
-        if not isinstance(expression, SubscriptableReference):
+        # An attribute read appears as attributes_string[key] (SELECT / group by) or
+        # as arrayElement / mapContains over attributes_string — the forms map-backed
+        # filters build directly (see _map_backed_operands). For a virtual column the
+        # value forms map to the value transform and the mapContains existence guard
+        # maps to the backing column, so filters resolve against from_column_name.
+        is_existence = False
+        if isinstance(expression, SubscriptableReference):
+            if expression.column.column_name != "attributes_string":
+                return expression
+            key = str(expression.key.value)
+        elif (
+            isinstance(expression, FunctionCall)
+            and expression.function_name in ("arrayElement", "mapContains")
+            and isinstance(expression.parameters[0], ColumnExpr)
+            and expression.parameters[0].column_name == "attributes_string"
+            and isinstance(expression.parameters[1], LiteralExpr)
+        ):
+            key = str(expression.parameters[1].value)
+            is_existence = expression.function_name == "mapContains"
+        else:
             return expression
 
-        if expression.column.column_name != "attributes_string":
+        context = mapped_column_to_context.get(key)
+        if context is None:
             return expression
-        context = mapped_column_to_context.get(str(expression.key.value))
-        if context:
-            attribute_expression = attribute_key_to_expression(
-                AttributeKey(
-                    name=context.from_column_name,
-                    type=NORMALIZED_COLUMNS_EAP_ITEMS.get(
-                        context.from_column_name, [AttributeKey.TYPE_STRING]
-                    )[0],
-                )
-            )
-            return f.transform(
-                f.CAST(f.ifNull(attribute_expression, literal("")), "String"),
-                literals_array(None, [literal(k) for k in context.value_map.keys()]),
-                literals_array(None, [literal(v) for v in context.value_map.values()]),
-                literal(context.default_value if context.default_value != "" else "unknown"),
-                alias=context.to_column_name,
-            )
 
-        return expression
+        from_column = attribute_key_to_expression(
+            AttributeKey(
+                name=context.from_column_name,
+                type=NORMALIZED_COLUMNS_EAP_ITEMS.get(
+                    context.from_column_name, [AttributeKey.TYPE_STRING]
+                )[0],
+            )
+        )
+        if is_existence:
+            return get_field_existence_expression(from_column)
+        return f.transform(
+            f.CAST(f.ifNull(from_column, literal("")), "String"),
+            literals_array(None, [literal(k) for k in context.value_map.keys()]),
+            literals_array(None, [literal(v) for v in context.value_map.values()]),
+            literal(context.default_value if context.default_value != "" else "unknown"),
+            alias=context.to_column_name,
+        )
 
     query.transform_expressions(transform_expressions)