diff --git a/snuba/web/rpc/common/common.py b/snuba/web/rpc/common/common.py index ff0349ef27..2ad3044d62 100644 --- a/snuba/web/rpc/common/common.py +++ b/snuba/web/rpc/common/common.py @@ -13,6 +13,9 @@ from snuba import settings, state from snuba.protos.common import ( + ATTRIBUTES_TO_COALESCE, + PROTO_TYPE_TO_ATTRIBUTE_COLUMN, + PROTO_TYPE_TO_CLICKHOUSE_TYPE, MalformedAttributeException, type_array_to_membership_array_expression, ) @@ -24,6 +27,7 @@ from snuba.query.dsl import Functions as f from snuba.query.dsl import ( and_cond, + arrayElement, column, in_cond, literal, @@ -289,6 +293,94 @@ def transform(exp: Expression) -> Expression: query.transform_expressions(transform) +def _contains_subscriptable_reference(exp: Expression) -> bool: + """True if ``exp`` contains a ``SubscriptableReference`` (a map lookup) — the + map-backed keys we route through ``_map_backed_operands``.""" + found = False + + def visit(node: Expression) -> Expression: + nonlocal found + if isinstance(node, SubscriptableReference): + found = True + return node + + exp.transform(visit) + return found + + +def _map_backed_operands(k: AttributeKey) -> tuple[Expression, Expression]: + """Build ``(value, exists)`` for a map-backed key directly, NULL-free. + + The legacy ``if(mapContains, arrayElement, NULL)`` form puts a NULL constant + into the predicate; the new ClickHouse analyzer canonicalizes that NULL + inconsistently between an aggregate's projection name and its computed block + (notably inside ``in(...)``), so the column isn't found ("Code: 10 ... Not + found column ... in block"). We write ``mapContains`` + ``arrayElement`` + directly, so no NULL appears: + + value = arrayElement(k) # single key + value = multiIf(mapContains(k1), arrayElement(k1), ..., # coalesced: + arrayElement(kn)) # first present + exists = mapContains(k1) OR ... OR mapContains(kn) + + Callers build ``cmp(value, v)``, wrapped in ``and(exists, ...)`` only when the + literal could be the column default (see ``_comparison_can_match_column_default``) + — then ``exists``, not the value, distinguishes a missing key from a stored + empty value, since ``arrayElement`` reads both as the '' / 0 default. The + ``multiIf`` else is only reached when all keys are absent. + + Built without aliases: conditions don't need them, and an alias here would + collide with the SELECT clause's existence ``if(...)`` for the same attribute + (same alias, different expression). Only valid for map-backed keys + (string/int/float); booleans / normalized columns / arrays take the legacy + path, and SELECT keeps its own ``coalesce(...)`` representation untouched. + """ + col_name = PROTO_TYPE_TO_ATTRIBUTE_COLUMN[k.type] + names = [k.name] + list(ATTRIBUTES_TO_COALESCE.get(k.name, ())) + + def _value(name: str) -> Expression: + elem = arrayElement(None, column(col_name), literal(name)) + # ints live in the float map and surface as Int64, so they need a cast. + if k.type == AttributeKey.Type.TYPE_INT: + return f.cast(elem, f"Nullable({PROTO_TYPE_TO_CLICKHOUSE_TYPE[k.type]})") + return elem + + values = [_value(name) for name in names] + existences = [f.mapContains(column(col_name), literal(name)) for name in names] + + exists = combine_or_conditions(existences) if len(existences) > 1 else existences[0] + if len(values) == 1: + value: Expression = values[0] + else: + args: list[Expression] = [] + for cond, val in zip(existences[:-1], values[:-1]): + args.extend((cond, val)) + args.append(values[-1]) + value = f.multiIf(*args) + return value, exists + + +def _analyzer_safe_in_expression( + k: AttributeKey, + v_expression: Expression, + *, + negated: bool, + ignore_case: bool = False, + guard: bool = True, +) -> Expression: + """``IN`` / ``NOT IN`` as ``[not] in(value, set)``, wrapped in + ``and(exists, ...)`` only when ``guard`` is set (see ``_map_backed_operands`` + and ``_comparison_can_match_column_default``). The value lists carry only + scalars, so the set never contains NULL — the legacy ``has(set, NULL)`` branch + was always ``false``.""" + value, exists = _map_backed_operands(k) + if ignore_case: + value = f.lower(value) + membership = in_cond(value, v_expression) + present = and_cond(exists, membership) if guard else membership + return not_cond(present) if negated else present + + def _scalar_value(v: AttributeValue) -> bool | str | int | float | None: """Extract a Python scalar from an AttributeValue proto.""" match v.WhichOneof("value"): @@ -302,12 +394,29 @@ def _scalar_value(v: AttributeValue) -> bool | str | int | float | None: return v.val_double case "val_int": return v.val_int - case None: + case "val_null" | None: return None case other: raise NotImplementedError(f"not a scalar AttributeValue type: {other}") +def _comparison_can_match_column_default( + attr_type: AttributeKey.Type.ValueType, v: AttributeValue, value_type: str +) -> bool: + """True if any compared literal is the column default ('' / 0), which an + absent key also reads as — so the ``mapContains`` guard is needed to avoid + matching absent keys. When no literal is the default the guard is dropped (the + simplest form). LIKE/NOT_LIKE always guard; null comparisons are separate.""" + default: str | int = "" if attr_type == AttributeKey.Type.TYPE_STRING else 0 + if value_type == "val_array": + scalars: list[Any] = [_scalar_value(x) for x in v.val_array.values] + elif value_type in ("val_str_array", "val_int_array", "val_float_array", "val_double_array"): + scalars = list(getattr(v, value_type).values) + else: + scalars = [_scalar_value(v)] + return any(s == default for s in scalars) + + def _attribute_value_to_expression(v: AttributeValue) -> Expression: """Convert an AttributeValue proto to a Snuba Expression.""" value_type = v.WhichOneof("value") @@ -610,7 +719,8 @@ def trace_item_filters_to_expression( if value_type is None: raise BadSnubaRPCRequestException("comparison does not have a right hand side") - if v.is_null or value_type == "val_null": + v_is_null = v.is_null or value_type == "val_null" + if v_is_null: v_expression: Expression = literal(None) else: v_expression = _attribute_value_to_expression(v) @@ -622,6 +732,21 @@ def trace_item_filters_to_expression( return _type_array_includes_scalar_expression( k_expression, v, item_filter.comparison_filter.ignore_case ) + if _contains_subscriptable_reference(k_expression): + # Map-backed: NULL-free (exists, value) form (see _map_backed_operands). + value, exists = _map_backed_operands(k) + if v_is_null: # `attr = null` <=> key absent + return not_cond(exists) + lhs, rhs = ( + (f.lower(value), f.lower(v_expression)) + if item_filter.comparison_filter.ignore_case + else (value, v_expression) + ) + cmp = f.equals(lhs, rhs) + # mapContains guard only needed when '' / 0 could match an absent key. + if _comparison_can_match_column_default(k.type, v, value_type): + return and_cond(exists, cmp) + return cmp else: expr = ( f.equals(f.lower(k_expression), f.lower(v_expression)) @@ -642,6 +767,19 @@ def trace_item_filters_to_expression( k_expression, v, item_filter.comparison_filter.ignore_case ) ) + if _contains_subscriptable_reference(k_expression): + # Negation of OP_EQUALS; an absent key is "not equal". + value, exists = _map_backed_operands(k) + if v_is_null: # `attr != null` <=> key present + return exists + lhs, rhs = ( + (f.lower(value), f.lower(v_expression)) + if item_filter.comparison_filter.ignore_case + else (value, v_expression) + ) + if _comparison_can_match_column_default(k.type, v, value_type): + return not_cond(and_cond(exists, f.equals(lhs, rhs))) + return f.notEquals(lhs, rhs) else: expr = ( f.notEquals(f.lower(k_expression), f.lower(v_expression)) @@ -670,6 +808,9 @@ def trace_item_filters_to_expression( "the LIKE comparison is only supported on string and array keys" ) comparison_function = f.ilike if item_filter.comparison_filter.ignore_case else f.like + if _contains_subscriptable_reference(k_expression): + value, exists = _map_backed_operands(k) + return and_cond(exists, comparison_function(value, v_expression)) return comparison_function(k_expression, v_expression) if op == ComparisonFilter.OP_NOT_LIKE: if k.type == AttributeKey.Type.TYPE_ARRAY: @@ -688,6 +829,11 @@ def trace_item_filters_to_expression( raise BadSnubaRPCRequestException( "the NOT LIKE comparison is only supported on string and array keys" ) + if _contains_subscriptable_reference(k_expression): + # Negation of OP_LIKE; an absent key is "not like". + like_fn = f.ilike if item_filter.comparison_filter.ignore_case else f.like + value, exists = _map_backed_operands(k) + return not_cond(and_cond(exists, like_fn(value, v_expression))) comparison_function = ( f.notILike if item_filter.comparison_filter.ignore_case else f.notLike ) @@ -706,8 +852,8 @@ def trace_item_filters_to_expression( return f.greaterOrEquals(k_expression, v_expression) if op == ComparisonFilter.OP_IN: _check_non_string_values_cannot_ignore_case(item_filter.comparison_filter) - if item_filter.comparison_filter.ignore_case: - k_expression = f.lower(k_expression) + ignore_case = item_filter.comparison_filter.ignore_case + if ignore_case: if value_type == "val_str_array": v_expression = literals_array( None, @@ -718,10 +864,21 @@ def trace_item_filters_to_expression( None, [literal(elem.val_str.lower()) for elem in v.val_array.values], ) - expr = in_cond(k_expression, v_expression) # note: v_expression must be an array # we redefine the way in works for nulls # now null in ['hi', null] is true + if _contains_subscriptable_reference(k_expression): + # Map-backed: keep the existence if(...) out of in() (see helper). + return _analyzer_safe_in_expression( + k, + v_expression, + negated=False, + ignore_case=ignore_case, + guard=_comparison_can_match_column_default(k.type, v, value_type), + ) + if ignore_case: + k_expression = f.lower(k_expression) + expr = in_cond(k_expression, v_expression) expr_with_null = or_cond( expr, and_cond(f.isNull(k_expression), f.has(v_expression, literal(None))), @@ -729,8 +886,8 @@ def trace_item_filters_to_expression( return expr_with_null if op == ComparisonFilter.OP_NOT_IN: _check_non_string_values_cannot_ignore_case(item_filter.comparison_filter) - if item_filter.comparison_filter.ignore_case: - k_expression = f.lower(k_expression) + ignore_case = item_filter.comparison_filter.ignore_case + if ignore_case: if value_type == "val_str_array": v_expression = literals_array( None, @@ -741,10 +898,21 @@ def trace_item_filters_to_expression( None, [literal(elem.val_str.lower()) for elem in v.val_array.values], ) - expr = not_cond(in_cond(k_expression, v_expression)) # note: v_expression must be an array # we redefine the way not in works for nulls # now null not in ['hi'] is true + if _contains_subscriptable_reference(k_expression): + # Map-backed: keep the existence if(...) out of in() (see helper). + return _analyzer_safe_in_expression( + k, + v_expression, + negated=True, + ignore_case=ignore_case, + guard=_comparison_can_match_column_default(k.type, v, value_type), + ) + if ignore_case: + k_expression = f.lower(k_expression) + expr = not_cond(in_cond(k_expression, v_expression)) expr_with_null = or_cond( expr, and_cond( diff --git a/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py b/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py index 8d7260fa57..7fdf7ff4a5 100644 --- a/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py +++ b/snuba/web/rpc/v1/resolvers/R_eap_items/resolver_trace_item_table.py @@ -37,12 +37,18 @@ from snuba.query.dsl import Functions as f from snuba.query.dsl import and_cond, if_cond, in_cond, literal, literals_array, or_cond from snuba.query.dsl import column as snuba_column +from snuba.query.expressions import ( + Column as ColumnExpr, +) from snuba.query.expressions import ( DangerousRawSQL, Expression, FunctionCall, SubscriptableReference, ) +from snuba.query.expressions import ( + Literal as LiteralExpr, +) from snuba.query.logical import Query from snuba.query.query_settings import HTTPQuerySettings from snuba.request import Request as SnubaRequest @@ -52,6 +58,7 @@ add_existence_check_to_subscriptable_references, attribute_key_to_expression, base_conditions_and, + get_field_existence_expression, timestamp_in_range_condition, trace_item_filters_to_expression, treeify_or_and_conditions, @@ -137,31 +144,49 @@ def _apply_virtual_columns( mapped_column_to_context = {c.to_column_name: c for c in virtual_column_contexts} def transform_expressions(expression: Expression) -> Expression: - # virtual columns will show up as `attr_str[virtual_column_name]` or `attr_num[virtual_column_name]` - if not isinstance(expression, SubscriptableReference): + # An attribute read appears as attributes_string[key] (SELECT / group by) or + # as arrayElement / mapContains over attributes_string — the forms map-backed + # filters build directly (see _map_backed_operands). For a virtual column the + # value forms map to the value transform and the mapContains existence guard + # maps to the backing column, so filters resolve against from_column_name. + is_existence = False + if isinstance(expression, SubscriptableReference): + if expression.column.column_name != "attributes_string": + return expression + key = str(expression.key.value) + elif ( + isinstance(expression, FunctionCall) + and expression.function_name in ("arrayElement", "mapContains") + and isinstance(expression.parameters[0], ColumnExpr) + and expression.parameters[0].column_name == "attributes_string" + and isinstance(expression.parameters[1], LiteralExpr) + ): + key = str(expression.parameters[1].value) + is_existence = expression.function_name == "mapContains" + else: return expression - if expression.column.column_name != "attributes_string": + context = mapped_column_to_context.get(key) + if context is None: return expression - context = mapped_column_to_context.get(str(expression.key.value)) - if context: - attribute_expression = attribute_key_to_expression( - AttributeKey( - name=context.from_column_name, - type=NORMALIZED_COLUMNS_EAP_ITEMS.get( - context.from_column_name, [AttributeKey.TYPE_STRING] - )[0], - ) - ) - return f.transform( - f.CAST(f.ifNull(attribute_expression, literal("")), "String"), - literals_array(None, [literal(k) for k in context.value_map.keys()]), - literals_array(None, [literal(v) for v in context.value_map.values()]), - literal(context.default_value if context.default_value != "" else "unknown"), - alias=context.to_column_name, - ) - return expression + from_column = attribute_key_to_expression( + AttributeKey( + name=context.from_column_name, + type=NORMALIZED_COLUMNS_EAP_ITEMS.get( + context.from_column_name, [AttributeKey.TYPE_STRING] + )[0], + ) + ) + if is_existence: + return get_field_existence_expression(from_column) + return f.transform( + f.CAST(f.ifNull(from_column, literal("")), "String"), + literals_array(None, [literal(k) for k in context.value_map.keys()]), + literals_array(None, [literal(v) for v in context.value_map.values()]), + literal(context.default_value if context.default_value != "" else "unknown"), + alias=context.to_column_name, + ) query.transform_expressions(transform_expressions) diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py index fe018e7756..5f4e88e056 100644 --- a/tests/web/rpc/test_common.py +++ b/tests/web/rpc/test_common.py @@ -20,6 +20,7 @@ StrArray, ) from sentry_protos.snuba.v1.trace_item_filter_pb2 import ( + AndFilter, AnyAttributeFilter, ComparisonFilter, ExistsFilter, @@ -31,7 +32,13 @@ from snuba.datasets.storages.factory import get_storage from snuba.datasets.storages.storage_key import StorageKey from snuba.protos.common import ATTRIBUTES_TO_COALESCE -from snuba.query.expressions import FunctionCall, Lambda, Literal +from snuba.query.expressions import ( + Expression, + FunctionCall, + Lambda, + Literal, + SubscriptableReference, +) from snuba.web.rpc.common.common import ( _any_attribute_filter_to_expression, attribute_key_to_expression, @@ -262,6 +269,172 @@ def test_exists_filter_on_non_coalesced_attribute_unchanged(self) -> None: assert expr.function_name == "mapContains" +class TestAnalyzerSafeFilters: + """Per-key filters on map-backed attributes are built from a NULL-free + (mapContains, arrayElement) pair instead of the legacy + if(mapContains, arrayElement, NULL) + isNull(...) idiom, so the new ClickHouse + analyzer names the aggregate column consistently (SNUBA-B62/B6C/A13). The + mapContains guard preserves the absent-vs-empty distinction. + """ + + @staticmethod + def _walk(expr: Expression) -> list[Expression]: + nodes: list[Expression] = [] + + def visit(node: Expression) -> Expression: + nodes.append(node) + return node + + expr.transform(visit) + return nodes + + def _fn_names(self, expr: Expression) -> set[str]: + return {n.function_name for n in self._walk(expr) if isinstance(n, FunctionCall)} + + def _assert_clean(self, expr: Expression) -> None: + # No NULL literal, no leftover SubscriptableReference, no foldable if(...). + for n in self._walk(expr): + assert not (isinstance(n, Literal) and n.value is None) + assert not isinstance(n, SubscriptableReference) + assert not (isinstance(n, FunctionCall) and n.function_name == "if") + + def _build( + self, + op: ComparisonFilter.Op.ValueType, + *, + name: str = "some.custom.tag", + value: str | None = None, + values: list[str] | None = None, + is_null: bool = False, + ignore_case: bool = False, + ) -> Expression: + if values is not None: + av = AttributeValue(val_str_array=StrArray(values=values)) + elif is_null: + av = AttributeValue(val_null=True) + else: + av = AttributeValue(val_str=value or "") + item_filter = TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.Type.TYPE_STRING, name=name), + op=op, + value=av, + ignore_case=ignore_case, + ) + ) + return trace_item_filters_to_expression(item_filter, attribute_key_to_expression) + + # --- pruned forms: literal != column default, so no mapContains guard --- + + def test_in_pruned(self) -> None: + expr = self._build(ComparisonFilter.OP_IN, values=["a", "b"]) + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "in" + assert "mapContains" not in self._fn_names(expr) + + def test_not_in_pruned(self) -> None: + expr = self._build(ComparisonFilter.OP_NOT_IN, values=["a", "b"]) + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "not" + names = self._fn_names(expr) + assert {"not", "in", "arrayElement"} <= names + assert "mapContains" not in names and "has" not in names + + def test_equals_pruned(self) -> None: + expr = self._build(ComparisonFilter.OP_EQUALS, value="ok") + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "equals" + assert "mapContains" not in self._fn_names(expr) + + def test_not_equals_pruned(self) -> None: + expr = self._build(ComparisonFilter.OP_NOT_EQUALS, value="ok") + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "notEquals" + assert "mapContains" not in self._fn_names(expr) + + def test_coalesced_pruned_uses_multi_if_no_outer_guard(self) -> None: + # Coalesced keys resolve via a NULL-free multiIf over per-key mapContains + # (never coalesce(if(..., NULL), ...)); with a non-default literal the + # outer existence OR-guard is dropped too. + assert "transaction" in ATTRIBUTES_TO_COALESCE + expr = self._build(ComparisonFilter.OP_EQUALS, name="transaction", value="t") + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "equals" + names = self._fn_names(expr) + assert "multiIf" in names and "coalesce" not in names + assert "or" not in names # no outer exists guard + + def test_in_ignore_case_lowers_array_element(self) -> None: + expr = self._build(ComparisonFilter.OP_NOT_IN, values=["A", "B"], ignore_case=True) + self._assert_clean(expr) + (in_call,) = [ + n for n in self._walk(expr) if isinstance(n, FunctionCall) and n.function_name == "in" + ] + operand = in_call.parameters[0] + assert isinstance(operand, FunctionCall) and operand.function_name == "lower" + inner = operand.parameters[0] + assert isinstance(inner, FunctionCall) and inner.function_name == "arrayElement" + + # --- guarded forms: literal IS the column default, so mapContains is kept --- + + def test_equals_default_value_keeps_guard(self) -> None: + expr = self._build(ComparisonFilter.OP_EQUALS, value="") # '' could match an absent key + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "and" + assert {"and", "mapContains", "equals", "arrayElement"} <= self._fn_names(expr) + + def test_in_with_default_value_keeps_guard(self) -> None: + expr = self._build(ComparisonFilter.OP_IN, values=["", "x"]) + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "and" + assert {"and", "in", "mapContains"} <= self._fn_names(expr) + + def test_coalesced_default_value_keeps_guard(self) -> None: + expr = self._build(ComparisonFilter.OP_EQUALS, name="transaction", value="") + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "and" + assert {"and", "or", "equals", "multiIf", "mapContains"} <= self._fn_names(expr) + + # --- always-guarded forms: null comparisons and LIKE/NOT_LIKE --- + + def test_equals_null_is_not_map_contains(self) -> None: + expr = self._build(ComparisonFilter.OP_EQUALS, is_null=True) # attr = null <=> absent + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "not" + assert self._fn_names(expr) == {"not", "mapContains"} + + def test_not_equals_null_is_map_contains(self) -> None: + expr = self._build(ComparisonFilter.OP_NOT_EQUALS, is_null=True) # attr != null <=> present + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "mapContains" + + def test_in_null_value_builds_without_raising(self) -> None: + # null isn't the column default, so the guard helper must treat it as a + # non-default scalar rather than raising (see _scalar_value / val_null). + expr = self._build(ComparisonFilter.OP_IN, is_null=True) + assert "in" in self._fn_names(expr) + + def test_not_in_null_value_builds_without_raising(self) -> None: + expr = self._build(ComparisonFilter.OP_NOT_IN, is_null=True) + assert {"not", "in"} <= self._fn_names(expr) + + def test_like_keeps_guard(self) -> None: + expr = self._build(ComparisonFilter.OP_LIKE, value="%ok%") + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "and" + assert {"and", "mapContains", "like", "arrayElement"} <= self._fn_names(expr) + + def test_not_like_keeps_guard_and_negates_positive_like(self) -> None: + expr = self._build(ComparisonFilter.OP_NOT_LIKE, value="%ok%") + self._assert_clean(expr) + assert isinstance(expr, FunctionCall) and expr.function_name == "not" + names = self._fn_names(expr) + assert {"not", "and", "mapContains", "like", "arrayElement"} <= names + assert "notLike" not in names # positive like under the negation + ilike = self._build(ComparisonFilter.OP_NOT_LIKE, value="%ok%", ignore_case=True) + assert "ilike" in self._fn_names(ilike) and "notILike" not in self._fn_names(ilike) + + @pytest.mark.redis_db @pytest.mark.clickhouse_db def test_convert_rpc_exception_to_proto_packs_details() -> None: @@ -568,3 +741,144 @@ def test_no_match_returns_empty(self) -> None: ) ) assert colors == [] + + +@pytest.mark.eap +@pytest.mark.redis_db +class TestEmptyVsAbsentComparison: + """End-to-end: the migrated per-key comparisons (EQUALS / NOT_EQUALS / LIKE / + NOT_LIKE) must keep distinguishing a *stored empty value* from an *absent + key*, since arrayElement reads both as '' and only mapContains tells them + apart. Three spans, by the attribute under test: + - present, value "ok" (color "red") + - present, empty "" (color "blue") + - absent (color "green") + + A unique per-run batch tag (present on all three rows and AND-ed into every + query) isolates these rows, so the absent-key cases aren't polluted by other + rows in the shared table (which also lack the attribute under test). + """ + + # Custom attribute names not present in gen_item_message's default set + # (which injects e.g. status="ok"), so we fully control present/empty/absent. + ATTR = "test.empty_vs_absent.status" + BATCH_ATTR = "test.empty_vs_absent.batch" + + @pytest.fixture(autouse=True) + def setup(self, eap: None, redis_db: None) -> None: + self.batch = f"batch-{uuid.uuid4().hex}" + self.base_time = datetime.now(tz=timezone.utc).replace( + minute=0, second=0, microsecond=0 + ) - timedelta(hours=1) + self.start_ts = Timestamp(seconds=int((self.base_time - timedelta(hours=1)).timestamp())) + self.end_ts = Timestamp(seconds=int((self.base_time + timedelta(hours=2)).timestamp())) + batch = AnyValue(string_value=self.batch) + messages = [ + gen_item_message( + start_timestamp=self.base_time, + attributes={ + self.BATCH_ATTR: batch, + self.ATTR: AnyValue(string_value="ok"), + "color": AnyValue(string_value="red"), + }, + ), + gen_item_message( + start_timestamp=self.base_time + timedelta(minutes=1), + attributes={ + self.BATCH_ATTR: batch, + self.ATTR: AnyValue(string_value=""), + "color": AnyValue(string_value="blue"), + }, + ), + gen_item_message( + start_timestamp=self.base_time + timedelta(minutes=2), + attributes={ + self.BATCH_ATTR: batch, + "color": AnyValue(string_value="green"), + }, + ), + ] + storage = get_storage(StorageKey("eap_items")) + write_raw_unprocessed_events(storage, messages) # type: ignore + + def _execute( + self, + op: ComparisonFilter.Op.ValueType, + *, + value: str | None = None, + is_null: bool = False, + ) -> list[str]: + av = AttributeValue(val_null=True) if is_null else AttributeValue(val_str=value or "") + filt = TraceItemFilter( + and_filter=AndFilter( + filters=[ + TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.TYPE_STRING, name=self.BATCH_ATTR), + op=ComparisonFilter.OP_EQUALS, + value=AttributeValue(val_str=self.batch), + ) + ), + TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.TYPE_STRING, name=self.ATTR), + op=op, + value=av, + ) + ), + ] + ) + ) + message = TraceItemTableRequest( + meta=RequestMeta( + project_ids=[1], + organization_id=1, + cogs_category="test", + referrer="test", + start_timestamp=self.start_ts, + end_timestamp=self.end_ts, + request_id=uuid.uuid4().hex, + trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN, + ), + filter=filt, + columns=[Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color"))], + order_by=[ + TraceItemTableRequest.OrderBy( + column=Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color")) + ) + ], + limit=100, + ) + response = EndpointTraceItemTable().execute(message) + if not response.column_values: + return [] + return sorted(r.val_str for r in response.column_values[0].results) + + def test_equals_empty_matches_only_stored_empty(self) -> None: + # The critical case: `status = ''` must match the stored empty, NOT the + # absent key (arrayElement reads both as ''). + assert self._execute(ComparisonFilter.OP_EQUALS, value="") == ["blue"] + + def test_equals_value_matches_only_that_value(self) -> None: + assert self._execute(ComparisonFilter.OP_EQUALS, value="ok") == ["red"] + + def test_equals_null_matches_only_absent(self) -> None: + assert self._execute(ComparisonFilter.OP_EQUALS, is_null=True) == ["green"] + + def test_not_equals_value_includes_empty_and_absent(self) -> None: + assert self._execute(ComparisonFilter.OP_NOT_EQUALS, value="ok") == ["blue", "green"] + + def test_not_equals_empty_includes_value_and_absent(self) -> None: + assert self._execute(ComparisonFilter.OP_NOT_EQUALS, value="") == ["green", "red"] + + def test_not_equals_null_matches_only_present(self) -> None: + assert self._execute(ComparisonFilter.OP_NOT_EQUALS, is_null=True) == ["blue", "red"] + + def test_like_wildcard_matches_present_not_absent(self) -> None: + # '%' matches both present rows (incl. the stored empty) but never the + # absent key. + assert self._execute(ComparisonFilter.OP_LIKE, value="%") == ["blue", "red"] + + def test_not_like_wildcard_matches_only_absent(self) -> None: + # Present rows all `like '%'`, so only the absent key survives NOT LIKE. + assert self._execute(ComparisonFilter.OP_NOT_LIKE, value="%") == ["green"] diff --git a/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py b/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py index db37dda848..d4630afc8d 100644 --- a/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py +++ b/tests/web/rpc/v1/test_endpoint_trace_item_table/test_endpoint_trace_item_table.py @@ -3268,6 +3268,92 @@ def test_virtual_column_with_missing_attribute(self) -> None: AttributeValue(val_str="a") for _ in range(10) ] + [AttributeValue(val_str="default") for _ in range(5)] + def test_virtual_column_filter_uses_value_map(self) -> None: + # Regression: a filter on a virtual column must compare against the mapped + # value. Map-backed filters build arrayElement(attributes_string, key) + # directly (see _map_backed_operands), so _apply_virtual_columns has to + # rewrite that form too, not only the SubscriptableReference used by + # SELECT / group by — otherwise the filter matches the raw key/value and + # returns nothing. + span_ts = BASE_TIME - timedelta(minutes=1) + write_eap_item(span_ts, {"device.class": "1"}, 3) + write_eap_item(span_ts, {"device.class": "2"}, 5) + message = TraceItemTableRequest( + meta=RequestMeta( + project_ids=[1], + organization_id=1, + cogs_category="something", + referrer="something", + start_timestamp=START_TIMESTAMP, + end_timestamp=END_TIMESTAMP, + trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN, + ), + columns=[ + Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="device.class.label")), + ], + filter=TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="device.class.label"), + op=ComparisonFilter.OP_EQUALS, + value=AttributeValue(val_str="low"), + ) + ), + virtual_column_contexts=[ + VirtualColumnContext( + from_column_name="device.class", + to_column_name="device.class.label", + value_map={"1": "low", "2": "medium"}, + default_value="Unknown", + ), + ], + ) + response = EndpointTraceItemTable().execute(message) + assert response.column_values[0].results == [ + AttributeValue(val_str="low") for _ in range(3) + ] + + def test_virtual_column_like_filter_uses_backing_existence(self) -> None: + # LIKE (and null / default-value guards) build a mapContains existence + # check on the request key; for a virtual column that key is absent in + # storage, so _apply_virtual_columns must rewrite the existence guard to + # the backing column too — otherwise it matches nothing. + span_ts = BASE_TIME - timedelta(minutes=1) + write_eap_item(span_ts, {"device.class": "1"}, 3) + write_eap_item(span_ts, {"device.class": "2"}, 5) + message = TraceItemTableRequest( + meta=RequestMeta( + project_ids=[1], + organization_id=1, + cogs_category="something", + referrer="something", + start_timestamp=START_TIMESTAMP, + end_timestamp=END_TIMESTAMP, + trace_item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN, + ), + columns=[ + Column(key=AttributeKey(type=AttributeKey.TYPE_STRING, name="device.class.label")), + ], + filter=TraceItemFilter( + comparison_filter=ComparisonFilter( + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="device.class.label"), + op=ComparisonFilter.OP_LIKE, + value=AttributeValue(val_str="%low%"), + ) + ), + virtual_column_contexts=[ + VirtualColumnContext( + from_column_name="device.class", + to_column_name="device.class.label", + value_map={"1": "low", "2": "medium"}, + default_value="Unknown", + ), + ], + ) + response = EndpointTraceItemTable().execute(message) + assert response.column_values[0].results == [ + AttributeValue(val_str="low") for _ in range(3) + ] + def test_normal_mode_end_to_end(self) -> None: items_storage = get_storage(StorageKey("eap_items")) msg_timestamp = BASE_TIME - timedelta(minutes=1)