From 8c705f513d0c668ed807fdd6bd7f244f8a25ec82 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 11:18:47 -0700 Subject: [PATCH 1/9] feat(eap-items): add trace_metric_name column Add a `trace_metric_name` LowCardinality(String) column to the eap_items_1 tables (local, dist, and the downsampled 8/64/512 tables) to store the trace metric name. This sets up a future bloom_filter index on the column. Co-Authored-By: Claude Opus 4.8 (1M context) Agent transcript: https://claudescope.sentry.dev/share/NYTrkkz-0_0mKttPxUNymU3l-z042ptNDua5SgEa-Wk --- .../0057_add_trace_metric_name_column.py | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py new file mode 100644 index 0000000000..af9ebd2670 --- /dev/null +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py @@ -0,0 +1,101 @@ +from snuba.clickhouse.columns import Column, String +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import migration, operations +from snuba.migrations.columns import MigrationModifiers as Modifiers +from snuba.migrations.operations import OperationTarget + +storage_set = StorageSetKey.EVENTS_ANALYTICS_PLATFORM +table_name_prefix = "eap_items_1" +new_columns = [ + Column( + "trace_metric_name", + String( + Modifiers( + low_cardinality=True, + codecs=[ + "ZSTD(1)", + ], + ), + ), + ), +] +after = "attributes_array" +sampling_weights = [8, 8**2, 8**3] + + +class Migration(migration.ClickhouseNodeMigration): + blocking = False + + def forwards_ops(self) -> list[operations.SqlOperation]: + ops: list[operations.SqlOperation] = [ + operations.AddColumn( + storage_set=storage_set, + table_name=f"{table_name_prefix}_{suffix}", + column=new_column, + after=after, + target=target, + ) + for suffix, target in [ + ("local", OperationTarget.LOCAL), + ("dist", OperationTarget.DISTRIBUTED), + ] + for new_column in new_columns + ] + + for sampling_weight in sampling_weights: + downsampled_table_prefix = f"eap_items_1_downsample_{sampling_weight}" + + ops.extend( + [ + operations.AddColumn( + storage_set=storage_set, + table_name=f"{downsampled_table_prefix}_{suffix}", + column=new_column, + after=after, + target=target, + ) + for suffix, target in [ + ("local", OperationTarget.LOCAL), + ("dist", OperationTarget.DISTRIBUTED), + ] + for new_column in new_columns + ] + ) + + return ops + + def backwards_ops(self) -> list[operations.SqlOperation]: + ops: list[operations.SqlOperation] = [ + operations.DropColumn( + storage_set=storage_set, + table_name=f"{table_name_prefix}_{suffix}", + column_name=new_column.name, + target=target, + ) + for suffix, target in [ + ("dist", OperationTarget.DISTRIBUTED), + ("local", OperationTarget.LOCAL), + ] + for new_column in new_columns + ] + + for sampling_weight in sampling_weights: + downsampled_table_prefix = f"eap_items_1_downsample_{sampling_weight}" + + ops.extend( + [ + operations.DropColumn( + storage_set=storage_set, + table_name=f"{downsampled_table_prefix}_{suffix}", + column_name=new_column.name, + target=target, + ) + for suffix, target in [ + ("dist", OperationTarget.DISTRIBUTED), + ("local", OperationTarget.LOCAL), + ] + for new_column in new_columns + ] + ) + + return ops From bfc476621c110c62c10829956ef0b5a9e0756ceb Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 11:28:58 -0700 Subject: [PATCH 2/9] fix(eap-items): add trace_metric_name to read-only dist tables The _dist_ro tables are created via `CREATE TABLE ... AS` and do not inherit schema changes from their source tables, so the column must be added explicitly for the read path to see it. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../0057_add_trace_metric_name_column.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py index af9ebd2670..92b6b7ea4e 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py @@ -5,6 +5,7 @@ from snuba.migrations.operations import OperationTarget storage_set = StorageSetKey.EVENTS_ANALYTICS_PLATFORM +ro_storage_set = StorageSetKey.EVENTS_ANALYTICS_PLATFORM_RO table_name_prefix = "eap_items_1" new_columns = [ Column( @@ -62,6 +63,27 @@ def forwards_ops(self) -> list[operations.SqlOperation]: ] ) + # The read-only distributed tables (created via `CREATE TABLE ... AS`) + # do not inherit schema changes from their source tables, so the column + # must be added explicitly for queries on the read path to see it. + ro_table_names = [f"{table_name_prefix}_dist_ro"] + [ + f"eap_items_1_downsample_{sampling_weight}_dist_ro" + for sampling_weight in sampling_weights + ] + ops.extend( + [ + operations.AddColumn( + storage_set=ro_storage_set, + table_name=ro_table_name, + column=new_column, + after=after, + target=OperationTarget.DISTRIBUTED, + ) + for ro_table_name in ro_table_names + for new_column in new_columns + ] + ) + return ops def backwards_ops(self) -> list[operations.SqlOperation]: @@ -98,4 +120,21 @@ def backwards_ops(self) -> list[operations.SqlOperation]: ] ) + ro_table_names = [f"{table_name_prefix}_dist_ro"] + [ + f"eap_items_1_downsample_{sampling_weight}_dist_ro" + for sampling_weight in sampling_weights + ] + ops.extend( + [ + operations.DropColumn( + storage_set=ro_storage_set, + table_name=ro_table_name, + column_name=new_column.name, + target=OperationTarget.DISTRIBUTED, + ) + for ro_table_name in ro_table_names + for new_column in new_columns + ] + ) + return ops From f16106068ec62c1be1b41835e99152821b5fed81 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 12:06:31 -0700 Subject: [PATCH 3/9] feat(eap-items): rename column to name and add bloom_filter index Rename the new column from `trace_metric_name` to `name` and add a `bf_name` bloom_filter index on `eap_items_1_local`, matching the existing bloom_filter index convention (local table only). Co-Authored-By: Claude Opus 4.8 (1M context) Agent transcript: https://claudescope.sentry.dev/share/qdL1I1HW5uIk6IHkW6Dx9PXWP25TprCOs-FEFCgoxQg --- ...n.py => 0057_add_name_column_and_index.py} | 48 +++++++++++++++---- 1 file changed, 38 insertions(+), 10 deletions(-) rename snuba/snuba_migrations/events_analytics_platform/{0057_add_trace_metric_name_column.py => 0057_add_name_column_and_index.py} (78%) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py similarity index 78% rename from snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py rename to snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py index 92b6b7ea4e..069af2667b 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_trace_metric_name_column.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py @@ -9,7 +9,7 @@ table_name_prefix = "eap_items_1" new_columns = [ Column( - "trace_metric_name", + "name", String( Modifiers( low_cardinality=True, @@ -22,6 +22,11 @@ ] after = "attributes_array" sampling_weights = [8, 8**2, 8**3] +local_table_name = f"{table_name_prefix}_local" +index_name = "bf_name" +index_expression = "name" +index_type = "bloom_filter" +index_granularity = 1 class Migration(migration.ClickhouseNodeMigration): @@ -84,23 +89,46 @@ def forwards_ops(self) -> list[operations.SqlOperation]: ] ) + ops.append( + operations.AddIndex( + storage_set=storage_set, + table_name=local_table_name, + index_name=index_name, + index_expression=index_expression, + index_type=index_type, + granularity=index_granularity, + target=OperationTarget.LOCAL, + ) + ) + return ops def backwards_ops(self) -> list[operations.SqlOperation]: ops: list[operations.SqlOperation] = [ - operations.DropColumn( + operations.DropIndex( storage_set=storage_set, - table_name=f"{table_name_prefix}_{suffix}", - column_name=new_column.name, - target=target, + table_name=local_table_name, + index_name=index_name, + target=OperationTarget.LOCAL, ) - for suffix, target in [ - ("dist", OperationTarget.DISTRIBUTED), - ("local", OperationTarget.LOCAL), - ] - for new_column in new_columns ] + ops.extend( + [ + operations.DropColumn( + storage_set=storage_set, + table_name=f"{table_name_prefix}_{suffix}", + column_name=new_column.name, + target=target, + ) + for suffix, target in [ + ("dist", OperationTarget.DISTRIBUTED), + ("local", OperationTarget.LOCAL), + ] + for new_column in new_columns + ] + ) + for sampling_weight in sampling_weights: downsampled_table_prefix = f"eap_items_1_downsample_{sampling_weight}" From 56e033a44f9cb965f6e1ea787d113e2dbee9755d Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 12:09:02 -0700 Subject: [PATCH 4/9] feat(eap-items): extend bf_name index to downsampled local tables Add the bf_name bloom_filter index to the downsampled local tables (eap_items_1_downsample_{8,64,512}_local) in addition to the main eap_items_1_local table, so name lookups are accelerated on the downsampled read paths as well. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../0057_add_name_column_and_index.py | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py index 069af2667b..80f36b7bb8 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py @@ -89,28 +89,40 @@ def forwards_ops(self) -> list[operations.SqlOperation]: ] ) - ops.append( - operations.AddIndex( - storage_set=storage_set, - table_name=local_table_name, - index_name=index_name, - index_expression=index_expression, - index_type=index_type, - granularity=index_granularity, - target=OperationTarget.LOCAL, - ) + local_table_names = [local_table_name] + [ + f"eap_items_1_downsample_{sampling_weight}_local" + for sampling_weight in sampling_weights + ] + ops.extend( + [ + operations.AddIndex( + storage_set=storage_set, + table_name=table_name, + index_name=index_name, + index_expression=index_expression, + index_type=index_type, + granularity=index_granularity, + target=OperationTarget.LOCAL, + ) + for table_name in local_table_names + ] ) return ops def backwards_ops(self) -> list[operations.SqlOperation]: + local_table_names = [local_table_name] + [ + f"eap_items_1_downsample_{sampling_weight}_local" + for sampling_weight in sampling_weights + ] ops: list[operations.SqlOperation] = [ operations.DropIndex( storage_set=storage_set, - table_name=local_table_name, + table_name=table_name, index_name=index_name, target=OperationTarget.LOCAL, ) + for table_name in local_table_names ] ops.extend( From 8adad085137aa0e0911034543b067ea9f5939513 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 12:23:52 -0700 Subject: [PATCH 5/9] ref(eap-items): position name column before attribute buckets Place the name column after attributes_int (before attributes_string_0) so it sits ahead of all the string/float attribute bucket columns instead of after attributes_array. Co-Authored-By: Claude Opus 4.8 (1M context) Agent transcript: https://claudescope.sentry.dev/share/D5xH-5PKdtHd7RX0JgQzXhCz8wJnAXR_d5sDw_M9dcw --- .../events_analytics_platform/0057_add_name_column_and_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py index 80f36b7bb8..396cc3e9ce 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py @@ -20,7 +20,7 @@ ), ), ] -after = "attributes_array" +after = "attributes_int" sampling_weights = [8, 8**2, 8**3] local_table_name = f"{table_name_prefix}_local" index_name = "bf_name" From ea9a3e792185f2334d85d2abd618be625969dd3f Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Wed, 10 Jun 2026 12:25:26 -0700 Subject: [PATCH 6/9] ref(eap-items): position name column before all attribute columns Anchor the name column after retention_days so it precedes the entire attributes block (attributes_bool, attributes_int, and the string/float buckets). retention_days is used as the anchor because it exists on every target table, including the downsampled and _dist_ro variants. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../events_analytics_platform/0057_add_name_column_and_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py index 396cc3e9ce..e8e4ad4e6f 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py @@ -20,7 +20,7 @@ ), ), ] -after = "attributes_int" +after = "retention_days" sampling_weights = [8, 8**2, 8**3] local_table_name = f"{table_name_prefix}_local" index_name = "bf_name" From 269ce7f611fc10cbb3a4f56db6f79cebdc6b12d0 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Fri, 12 Jun 2026 15:14:36 -0700 Subject: [PATCH 7/9] ref: Remove low cardinality modifier from name column Co-Authored-By: Claude Opus 4.8 (1M context) --- .../events_analytics_platform/0057_add_name_column_and_index.py | 1 - 1 file changed, 1 deletion(-) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py index e8e4ad4e6f..ddba8a9496 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py @@ -12,7 +12,6 @@ "name", String( Modifiers( - low_cardinality=True, codecs=[ "ZSTD(1)", ], From 10bf35b67c40bc061481f5f8963ab6956a548993 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Fri, 12 Jun 2026 15:16:54 -0700 Subject: [PATCH 8/9] ref: Rename name column to indexed_name Co-Authored-By: Claude Opus 4.8 (1M context) --- .../0057_add_name_column_and_index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py index ddba8a9496..11270a7af3 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py @@ -9,7 +9,7 @@ table_name_prefix = "eap_items_1" new_columns = [ Column( - "name", + "indexed_name", String( Modifiers( codecs=[ @@ -22,8 +22,8 @@ after = "retention_days" sampling_weights = [8, 8**2, 8**3] local_table_name = f"{table_name_prefix}_local" -index_name = "bf_name" -index_expression = "name" +index_name = "bf_indexed_name" +index_expression = "indexed_name" index_type = "bloom_filter" index_granularity = 1 From 039ba8b2ad73b267a31858b0bfe43a9cfd65fc8f Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Fri, 12 Jun 2026 15:34:35 -0700 Subject: [PATCH 9/9] feat: Recreate downsample MVs to populate indexed_name The downsample materialized views select an explicit column list from eap_items_1_local, so adding the column to the downsampled tables alone left indexed_name unpopulated (and the bloom_filter index empty) on the downsampled read paths. Recreate the three downsample MVs as version 5 with indexed_name in the SELECT; the backward op restores the v4 views. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../0057_add_name_column_and_index.py | 176 +++++++++++++++++- 1 file changed, 166 insertions(+), 10 deletions(-) diff --git a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py index 11270a7af3..789a784f9c 100644 --- a/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py +++ b/snuba/snuba_migrations/events_analytics_platform/0057_add_name_column_and_index.py @@ -3,13 +3,15 @@ from snuba.migrations import migration, operations from snuba.migrations.columns import MigrationModifiers as Modifiers from snuba.migrations.operations import OperationTarget +from snuba.utils.schemas import UUID, Bool, DateTime, Float, Int, Map, UInt storage_set = StorageSetKey.EVENTS_ANALYTICS_PLATFORM ro_storage_set = StorageSetKey.EVENTS_ANALYTICS_PLATFORM_RO table_name_prefix = "eap_items_1" +column_name = "indexed_name" new_columns = [ Column( - "indexed_name", + column_name, String( Modifiers( codecs=[ @@ -27,6 +29,136 @@ index_type = "bloom_filter" index_granularity = 1 +# Materialized views that feed the downsampled tables select an explicit column +# list from eap_items_1_local, so the new column has to be added to the views +# (not just the tables) for it to be populated on the downsampled read paths. +# These columns mirror the current mv_4 definition (migration 0049) with the new +# `indexed_name` column inserted after `retention_days`. +num_attr_buckets = 40 +mv_old_version = 4 +mv_new_version = mv_old_version + 1 + +mv_columns: list[Column[Modifiers]] = [ + Column("organization_id", UInt(64)), + Column("project_id", UInt(64)), + Column("item_type", UInt(8)), + Column("timestamp", DateTime(Modifiers(codecs=["DoubleDelta", "ZSTD(1)"]))), + Column("trace_id", UUID()), + Column("item_id", UInt(128)), + Column("sampling_weight", UInt(64, modifiers=Modifiers(codecs=["ZSTD(1)"]))), + Column("sampling_factor", Float(64, modifiers=Modifiers(codecs=["ZSTD(1)"]))), + Column( + "retention_days", + UInt(16, modifiers=Modifiers(codecs=["T64", "ZSTD(1)"])), + ), + new_columns[0], + Column( + "attributes_bool", + Map( + String(), + Bool(), + ), + ), + Column( + "attributes_int", + Map( + String(), + Int(64), + ), + ), +] +mv_columns.extend( + [ + Column( + f"attributes_string_{i}", + Map( + String(), + String(), + modifiers=Modifiers( + codecs=["ZSTD(1)"], + ), + ), + ) + for i in range(num_attr_buckets) + ] +) +mv_columns.extend( + [ + Column( + f"attributes_float_{i}", + Map( + String(), + Float(64), + modifiers=Modifiers( + codecs=["ZSTD(1)"], + ), + ), + ) + for i in range(num_attr_buckets) + ] +) + + +def _should_passthrough_column(name: str) -> bool: + # Columns computed in the SELECT rather than copied straight through. + return name not in { + "sampling_weight", + "sampling_factor", + "retention_days", + "client_sample_rate", + "server_sample_rate", + } + + +def _materialized_view_query(sampling_weight: int, include_indexed_name: bool) -> str: + passthrough = [ + c.name + for c in mv_columns + if _should_passthrough_column(c.name) and (include_indexed_name or c.name != column_name) + ] + return " ".join( + [ + "SELECT", + f"{', '.join(passthrough)},", + "downsampled_retention_days AS retention_days,", + f"sampling_weight * {sampling_weight} AS sampling_weight,", + f"sampling_factor / {sampling_weight} AS sampling_factor,", + f"client_sample_rate / {sampling_weight} AS client_sample_rate,", + f"server_sample_rate / {sampling_weight} AS server_sample_rate", + "FROM eap_items_1_local", + f"WHERE (cityHash64(item_id + {sampling_weight}) % {sampling_weight}) = 0", + ] + ) + + +def _recreate_materialized_views_ops( + create_version: int, + drop_version: int, + columns: list[Column[Modifiers]], + include_indexed_name: bool, +) -> list[operations.SqlOperation]: + ops: list[operations.SqlOperation] = [] + for sampling_weight in sampling_weights: + downsampled_local_table = f"eap_items_1_downsample_{sampling_weight}_local" + ops.extend( + [ + operations.CreateMaterializedView( + storage_set=storage_set, + view_name=f"eap_items_1_downsample_{sampling_weight}_mv_{create_version}", + columns=columns, + destination_table_name=downsampled_local_table, + target=OperationTarget.LOCAL, + query=_materialized_view_query(sampling_weight, include_indexed_name), + ), + operations.DropTable( + storage_set=storage_set, + table_name=f"eap_items_1_downsample_{sampling_weight}_mv_{drop_version}", + target=OperationTarget.LOCAL, + ), + ] + ) + return ops + class Migration(migration.ClickhouseNodeMigration): blocking = False @@ -107,22 +239,46 @@ def forwards_ops(self) -> list[operations.SqlOperation]: ] ) + # Recreate the downsample materialized views so they select the new + # column and populate it on the downsampled tables. The column must + # already exist on both eap_items_1_local and the downsampled tables + # (added above) before the views are recreated. + ops.extend( + _recreate_materialized_views_ops( + create_version=mv_new_version, + drop_version=mv_old_version, + columns=mv_columns, + include_indexed_name=True, + ) + ) + return ops def backwards_ops(self) -> list[operations.SqlOperation]: + # Restore the previous materialized views (which do not reference the + # new column) before dropping the column the new views read from. + ops: list[operations.SqlOperation] = _recreate_materialized_views_ops( + create_version=mv_old_version, + drop_version=mv_new_version, + columns=[c for c in mv_columns if c.name != column_name], + include_indexed_name=False, + ) + local_table_names = [local_table_name] + [ f"eap_items_1_downsample_{sampling_weight}_local" for sampling_weight in sampling_weights ] - ops: list[operations.SqlOperation] = [ - operations.DropIndex( - storage_set=storage_set, - table_name=table_name, - index_name=index_name, - target=OperationTarget.LOCAL, - ) - for table_name in local_table_names - ] + ops.extend( + [ + operations.DropIndex( + storage_set=storage_set, + table_name=table_name, + index_name=index_name, + target=OperationTarget.LOCAL, + ) + for table_name in local_table_names + ] + ) ops.extend( [