From fa95207d1af0b954cf68f2ca29e98086379446db Mon Sep 17 00:00:00 2001 From: rob Date: Tue, 10 Mar 2026 07:02:03 +0000 Subject: [PATCH 01/63] Oracle: Add CHECK_TIMEOUT --- packages/sync-service/test/support/oracle_harness.ex | 2 +- .../sync-service/test/support/oracle_harness/shape_checker.ex | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/sync-service/test/support/oracle_harness.ex b/packages/sync-service/test/support/oracle_harness.ex index 7e3d3660fe..fb382577c0 100644 --- a/packages/sync-service/test/support/oracle_harness.ex +++ b/packages/sync-service/test/support/oracle_harness.ex @@ -77,7 +77,7 @@ defmodule Support.OracleHarness do @spec test_against_oracle(map(), [shape()], [batch()], map()) :: :ok def test_against_oracle(ctx, shapes, batches, opts \\ %{}) do opts = Map.merge(default_opts_from_env(), opts) - timeout_ms = opts[:timeout_ms] || @default_timeout_ms + timeout_ms = opts[:timeout_ms] || env_int("CHECK_TIMEOUT") || @default_timeout_ms log_test_config(shapes, batches) diff --git a/packages/sync-service/test/support/oracle_harness/shape_checker.ex b/packages/sync-service/test/support/oracle_harness/shape_checker.ex index 425c3b123f..82161fc2cb 100644 --- a/packages/sync-service/test/support/oracle_harness/shape_checker.ex +++ b/packages/sync-service/test/support/oracle_harness/shape_checker.ex @@ -29,8 +29,6 @@ defmodule Support.OracleHarness.ShapeChecker do alias Electric.Client.ShapeDefinition alias Electric.Client.ShapeState - @default_timeout_ms 10_000 - defstruct [ :name, :table, @@ -90,7 +88,7 @@ defmodule Support.OracleHarness.ShapeChecker do Enum.each(shape.pk, &validate_identifier!(&1, "pk column")) shape_def = ShapeDefinition.new!(shape.table, where: shape.where) - timeout_ms = opts[:timeout_ms] || @default_timeout_ms + timeout_ms = Keyword.fetch!(opts, :timeout_ms) state = %__MODULE__{ name: shape.name, From 169114c0176f48dd04b6c070bee56271ae9505de Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 12 Mar 2026 09:13:32 +0000 Subject: [PATCH 02/63] Oracle: Remove where clauses with two identical subqueries --- .../oracle_harness/where_clause_generator.ex | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex index 0b8fd06025..a6cd1d0a36 100644 --- a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex +++ b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex @@ -50,7 +50,11 @@ defmodule Support.OracleHarness.WhereClauseGenerator do Designed to be consumed directly by `check all` for deterministic seeding. """ def shapes_gen(count) do - list_of(where_clause_gen(), length: count) + clause_gen = + where_clause_gen() + |> filter(fn {where, _optimized} -> not has_duplicate_subqueries?(where) end) + + list_of(clause_gen, length: count) |> map(fn clauses -> clauses |> Enum.with_index(1) @@ -360,4 +364,36 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # Helper to detect if expression contains a subquery defp contains_subquery?(expr), do: String.contains?(expr, "SELECT") + + # Detect if the same (SELECT ...) subquery expression appears more than once. + # This filters out cases like (A OR B) AND B where B is a subquery — + # we have a known bug with duplicate subqueries at the same level (see oracle_dnf_repro_test.exs). + defp has_duplicate_subqueries?(where) do + subqueries = extract_balanced_subqueries(where, []) + length(subqueries) != length(Enum.uniq(subqueries)) + end + + defp extract_balanced_subqueries(str, acc) do + case String.split(str, "(SELECT ", parts: 2) do + [_] -> Enum.reverse(acc) + [_prefix, rest] -> + {inner, remaining} = consume_balanced(rest, 1, []) + subquery = "(SELECT " <> IO.iodata_to_binary(inner) + extract_balanced_subqueries(remaining, [subquery | acc]) + end + end + + defp consume_balanced("", _depth, acc), do: {IO.iodata_to_binary(Enum.reverse(acc)), ""} + + defp consume_balanced(<<"(", rest::binary>>, depth, acc), + do: consume_balanced(rest, depth + 1, ["(" | acc]) + + defp consume_balanced(<<")", rest::binary>>, 1, acc), + do: {IO.iodata_to_binary(Enum.reverse([")" | acc])), rest} + + defp consume_balanced(<<")", rest::binary>>, depth, acc), + do: consume_balanced(rest, depth - 1, [")" | acc]) + + defp consume_balanced(<>, depth, acc), + do: consume_balanced(rest, depth, [<> | acc]) end From 1d7d6b8cd14b611f86780198f0a67b8c140ae174 Mon Sep 17 00:00:00 2001 From: rob Date: Sat, 14 Mar 2026 13:53:12 +0000 Subject: [PATCH 03/63] Oracle: make all queries optimised --- .../oracle_harness/where_clause_generator.ex | 67 ++++++++----------- 1 file changed, 27 insertions(+), 40 deletions(-) diff --git a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex index a6cd1d0a36..e9965c0da7 100644 --- a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex +++ b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex @@ -109,24 +109,24 @@ defmodule Support.OracleHarness.WhereClauseGenerator do defp equality_gen do one_of([ # level_3_id = 'l3-X' - level_3_id_gen() |> map(&{"level_3_id = '#{&1}'", false}), + level_3_id_gen() |> map(&{"level_3_id = '#{&1}'", true}), # id = 'l4-X' - level_4_id_gen() |> map(&{"id = '#{&1}'", false}), + level_4_id_gen() |> map(&{"id = '#{&1}'", true}), # value = 'vX' - value_literal_gen() |> map(&{"value = '#{&1}'", false}) + value_literal_gen() |> map(&{"value = '#{&1}'", true}) ]) end # col > 'val', col <> 'val', etc. defp comparison_gen do bind({member_of(["<", ">", "<=", ">=", "<>"]), value_literal_gen()}, fn {op, val} -> - constant({"value #{op} '#{val}'", false}) + constant({"value #{op} '#{val}'", true}) end) end # col LIKE 'pattern' / col NOT LIKE 'pattern' defp like_gen do - bind({member_of([{"LIKE", false}, {"NOT LIKE", false}]), like_pattern_gen()}, fn + bind({member_of([{"LIKE", true}, {"NOT LIKE", true}]), like_pattern_gen()}, fn {{op, optimized}, pattern} -> constant({"value #{op} '#{pattern}'", optimized}) end) @@ -135,7 +135,7 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # col BETWEEN 'a' AND 'b' / col NOT BETWEEN 'a' AND 'b' defp between_gen do bind( - {member_of([{"BETWEEN", false}, {"NOT BETWEEN", false}]), value_literal_gen(), + {member_of([{"BETWEEN", true}, {"NOT BETWEEN", true}]), value_literal_gen(), value_literal_gen()}, fn {{op, optimized}, v1, v2} -> # Ensure v1 <= v2 for valid BETWEEN @@ -151,12 +151,12 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # level_3_id IN ('l3-1', 'l3-2', ...) bind(list_of(level_3_id_gen(), min_length: 2, max_length: 4), fn ids -> values = ids |> Enum.uniq() |> Enum.map(&"'#{&1}'") |> Enum.join(", ") - constant({"level_3_id IN (#{values})", false}) + constant({"level_3_id IN (#{values})", true}) end), # id IN ('l4-1', 'l4-2', ...) bind(list_of(level_4_id_gen(), min_length: 2, max_length: 4), fn ids -> values = ids |> Enum.uniq() |> Enum.map(&"'#{&1}'") |> Enum.join(", ") - constant({"id IN (#{values})", false}) + constant({"id IN (#{values})", true}) end) ]) end @@ -180,12 +180,12 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # Filter by active flag bool_gen() |> map(fn active -> - {"level_3_id IN (SELECT id FROM level_3 WHERE active = #{active})", false} + {"level_3_id IN (SELECT id FROM level_3 WHERE active = #{active})", true} end), # Filter by level_2_id level_2_id_gen() |> map(fn l2_id -> - {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", false} + {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", true} end) ]) end @@ -197,14 +197,14 @@ defmodule Support.OracleHarness.WhereClauseGenerator do bind({bool_gen(), bool_gen()}, fn {active_l3, active_l2} -> constant( {"level_3_id IN (SELECT id FROM level_3 WHERE active = #{active_l3} AND level_2_id IN (SELECT id FROM level_2 WHERE active = #{active_l2}))", - false} + true} ) end), # Through specific level_1_id level_1_id_gen() |> map(fn l1_id -> {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT id FROM level_2 WHERE level_1_id = '#{l1_id}'))", - false} + true} end) ]) end @@ -214,7 +214,7 @@ defmodule Support.OracleHarness.WhereClauseGenerator do bool_gen() |> map(fn active_l1 -> {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT id FROM level_2 WHERE level_1_id IN (SELECT id FROM level_1 WHERE active = #{active_l1})))", - false} + true} end) end @@ -224,19 +224,19 @@ defmodule Support.OracleHarness.WhereClauseGenerator do case level do 1 -> constant( - {"level_3_id IN (SELECT level_3_id FROM level_3_tags WHERE tag = '#{tag}')", false} + {"level_3_id IN (SELECT level_3_id FROM level_3_tags WHERE tag = '#{tag}')", true} ) 2 -> constant( {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT level_2_id FROM level_2_tags WHERE tag = '#{tag}'))", - false} + true} ) 3 -> constant( {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT id FROM level_2 WHERE level_1_id IN (SELECT level_1_id FROM level_1_tags WHERE tag = '#{tag}')))", - false} + true} ) end end) @@ -251,12 +251,12 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # NOT IN with active flag bool_gen() |> map(fn active -> - {"level_3_id NOT IN (SELECT id FROM level_3 WHERE active = #{active})", false} + {"level_3_id NOT IN (SELECT id FROM level_3 WHERE active = #{active})", true} end), # NOT IN with level_2_id level_2_id_gen() |> map(fn l2_id -> - {"level_3_id NOT IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", false} + {"level_3_id NOT IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", true} end) ]) end @@ -270,13 +270,8 @@ defmodule Support.OracleHarness.WhereClauseGenerator do defp and_composition(depth) do bind({base_expr_gen(depth - 1), base_expr_gen(depth - 1)}, fn - {{left, left_opt}, {right, right_opt}} -> - has_left_subquery = contains_subquery?(left) - has_right_subquery = contains_subquery?(right) - - # AND with multiple subqueries at same level is NOT optimized - optimized = left_opt and right_opt and not (has_left_subquery and has_right_subquery) - constant({"(#{left}) AND (#{right})", optimized}) + {{left, _left_opt}, {right, _right_opt}} -> + constant({"(#{left}) AND (#{right})", true}) end) end @@ -285,13 +280,8 @@ defmodule Support.OracleHarness.WhereClauseGenerator do defp or_composition(depth) do bind({base_expr_gen(depth - 1), base_expr_gen(depth - 1)}, fn - {{left, left_opt}, {right, right_opt}} -> - # OR with subqueries is typically not optimized - optimized = - left_opt and right_opt and not contains_subquery?(left) and - not contains_subquery?(right) - - constant({"(#{left}) OR (#{right})", optimized}) + {{left, _left_opt}, {right, _right_opt}} -> + constant({"(#{left}) OR (#{right})", true}) end) end @@ -313,16 +303,15 @@ defmodule Support.OracleHarness.WhereClauseGenerator do defp and_or_composition(depth) do bind({or_composition(depth - 1), base_expr_gen(depth - 1)}, fn - {{or_expr, or_opt}, {simple, simple_opt}} -> - optimized = or_opt and simple_opt - constant({"(#{or_expr}) AND (#{simple})", optimized}) + {{or_expr, _or_opt}, {simple, _simple_opt}} -> + constant({"(#{or_expr}) AND (#{simple})", true}) end) end defp not_composition(_depth) do one_of([ # NOT (simple condition) - atomic_with_meta() |> map(fn {expr, _} -> {"NOT (#{expr})", false} end), + atomic_with_meta() |> map(fn {expr, _} -> {"NOT (#{expr})", true} end), # NOT IN subquery not_in_subquery_gen() ]) @@ -332,16 +321,14 @@ defmodule Support.OracleHarness.WhereClauseGenerator do bind(list_of(base_expr_gen(depth - 1), min_length: 2, max_length: 3), fn exprs -> clauses = Enum.map(exprs, fn {expr, _} -> "(#{expr})" end) combined = Enum.join(clauses, " OR ") - # Multi-OR is generally not optimized - constant({combined, false}) + constant({combined, true}) end) end defp subquery_or_simple(_depth) do bind({subquery_1_level_gen(), atomic_with_meta()}, fn {{subq, _}, {simple, _}} -> - # OR with subquery is not optimized - constant({"(#{subq}) OR (#{simple})", false}) + constant({"(#{subq}) OR (#{simple})", true}) end) end From ba9ea2ab358392d1bf2b861d06422a4fd9fe85be Mon Sep 17 00:00:00 2001 From: rob Date: Sat, 14 Mar 2026 14:13:43 +0000 Subject: [PATCH 04/63] Oracle: Add more variation --- .../oracle_harness/where_clause_generator.ex | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex index e9965c0da7..c184139a53 100644 --- a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex +++ b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex @@ -297,7 +297,9 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # a OR b OR c (multiple ORs) {1, multi_or_composition(depth)}, # Subquery OR simple condition - {2, subquery_or_simple(depth)} + {2, subquery_or_simple(depth)}, + # (expr OR expr) AND (expr OR expr) + {2, or_branches_and_composition()} ]) end @@ -325,6 +327,25 @@ defmodule Support.OracleHarness.WhereClauseGenerator do end) end + # (expr OR expr) AND (expr OR expr) — each expr is a subquery or atomic + defp or_branches_and_composition do + bind( + {subquery_or_atomic(), subquery_or_atomic(), + subquery_or_atomic(), subquery_or_atomic()}, + fn {{s1, _}, {s2, _}, {s3, _}, {s4, _}} -> + constant({"(#{s1} OR #{s2}) AND (#{s3} OR #{s4})", true}) + end + ) + end + + defp subquery_or_atomic do + frequency([ + {2, subquery_1_level_gen()}, + {1, tag_subquery_gen()}, + {2, atomic_with_meta()} + ]) + end + defp subquery_or_simple(_depth) do bind({subquery_1_level_gen(), atomic_with_meta()}, fn {{subq, _}, {simple, _}} -> @@ -349,9 +370,6 @@ defmodule Support.OracleHarness.WhereClauseGenerator do ]) end - # Helper to detect if expression contains a subquery - defp contains_subquery?(expr), do: String.contains?(expr, "SELECT") - # Detect if the same (SELECT ...) subquery expression appears more than once. # This filters out cases like (A OR B) AND B where B is a subquery — # we have a known bug with duplicate subqueries at the same level (see oracle_dnf_repro_test.exs). From fd4d58abe0e67a190d0eec77d6cb695a1c5d241b Mon Sep 17 00:00:00 2001 From: rob Date: Sat, 14 Mar 2026 22:38:59 +0000 Subject: [PATCH 05/63] Oracle: Allow idential subqueries --- .../oracle_harness/where_clause_generator.ex | 37 +------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex index c184139a53..8a60563a57 100644 --- a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex +++ b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex @@ -50,11 +50,7 @@ defmodule Support.OracleHarness.WhereClauseGenerator do Designed to be consumed directly by `check all` for deterministic seeding. """ def shapes_gen(count) do - clause_gen = - where_clause_gen() - |> filter(fn {where, _optimized} -> not has_duplicate_subqueries?(where) end) - - list_of(clause_gen, length: count) + list_of(where_clause_gen(), length: count) |> map(fn clauses -> clauses |> Enum.with_index(1) @@ -370,35 +366,4 @@ defmodule Support.OracleHarness.WhereClauseGenerator do ]) end - # Detect if the same (SELECT ...) subquery expression appears more than once. - # This filters out cases like (A OR B) AND B where B is a subquery — - # we have a known bug with duplicate subqueries at the same level (see oracle_dnf_repro_test.exs). - defp has_duplicate_subqueries?(where) do - subqueries = extract_balanced_subqueries(where, []) - length(subqueries) != length(Enum.uniq(subqueries)) - end - - defp extract_balanced_subqueries(str, acc) do - case String.split(str, "(SELECT ", parts: 2) do - [_] -> Enum.reverse(acc) - [_prefix, rest] -> - {inner, remaining} = consume_balanced(rest, 1, []) - subquery = "(SELECT " <> IO.iodata_to_binary(inner) - extract_balanced_subqueries(remaining, [subquery | acc]) - end - end - - defp consume_balanced("", _depth, acc), do: {IO.iodata_to_binary(Enum.reverse(acc)), ""} - - defp consume_balanced(<<"(", rest::binary>>, depth, acc), - do: consume_balanced(rest, depth + 1, ["(" | acc]) - - defp consume_balanced(<<")", rest::binary>>, 1, acc), - do: {IO.iodata_to_binary(Enum.reverse([")" | acc])), rest} - - defp consume_balanced(<<")", rest::binary>>, depth, acc), - do: consume_balanced(rest, depth - 1, [")" | acc]) - - defp consume_balanced(<>, depth, acc), - do: consume_balanced(rest, depth, [<> | acc]) end From b57de5492d5cbdcc7009a48c6ea2d01dae735445 Mon Sep 17 00:00:00 2001 From: Rob A'Court Date: Thu, 29 Jan 2026 17:34:03 +0000 Subject: [PATCH 06/63] Fix bug caused by typo --- packages/sync-service/lib/electric/replication/eval/parser.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/sync-service/lib/electric/replication/eval/parser.ex b/packages/sync-service/lib/electric/replication/eval/parser.ex index c7e3d27585..ca8b7c84f0 100644 --- a/packages/sync-service/lib/electric/replication/eval/parser.ex +++ b/packages/sync-service/lib/electric/replication/eval/parser.ex @@ -1009,7 +1009,7 @@ defmodule Electric.Replication.Eval.Parser do comparisons = [left_comparison, right_comparison], {:ok, reduced} <- build_bool_chain( - %{name: "or", impl: &pg_and/2, strict?: false}, + %{name: "and", impl: &pg_and/2, strict?: false}, comparisons, expr.location ) do From 7c3022249437907d733e1d89dcbdaf2d7d070810 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 9 Mar 2026 12:38:07 +0000 Subject: [PATCH 07/63] Revert "Fix head-of-line blocking in SLC for subquery shapes via ETS link-values cache and inverted index (#3937)" This reverts commit 8fe0a375299680884b883d31d1f622519c60da37. --- .../lib/electric/shapes/consumer.ex | 7 - .../electric/shapes/consumer/materializer.ex | 104 ++----- .../lib/electric/shapes/consumer_registry.ex | 1 - .../lib/electric/shapes/filter.ex | 254 +----------------- .../electric/shapes/filter/where_condition.ex | 1 - .../shapes/consumer/materializer_test.exs | 14 - .../test/electric/shapes/filter_test.exs | 4 +- 7 files changed, 23 insertions(+), 362 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index abe734a678..79fa09c5b8 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -376,8 +376,6 @@ defmodule Electric.Shapes.Consumer do "Materializer down for a dependency: #{handle} (#{inspect(pid)}) (#{inspect(reason)})" ) - Materializer.delete_link_values(state.stack_id, handle) - handle_materializer_down(reason, state) end @@ -437,11 +435,6 @@ defmodule Electric.Shapes.Consumer do end end) - # Clean up this shape's link-values ETS entry. This consumer may itself be - # a dep shape; removing the entry prevents stale cached values from persisting - # after shutdown. - Materializer.delete_link_values(state.stack_id, state.shape_handle) - # always need to terminate writer to remove the writer ets (which belongs # to this process). leads to unecessary writes in the case of a deleted # shape but the alternative is leaking ets tables. diff --git a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex index 9a613107b9..69164b3ad6 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex @@ -1,4 +1,13 @@ defmodule Electric.Shapes.Consumer.Materializer do + # TODOS: + # - [x] Keep lockstep with the consumer + # - [ ] Think about initial materialization needing to finish before we can continue + # - [ ] + # - [ ] Use the `get_link_values` + + # NOTES: + # - Consumer does txn buffering until pg snapshot is known + # The lifecycle of a materializer is linked to its source consumer. If the consumer # goes down for any reason other than a clean supervisor/stack shutdown then we # need to invalidate all dependent outer shapes. @@ -48,46 +57,11 @@ defmodule Electric.Shapes.Consumer.Materializer do GenServer.call(name(state), :wait_until_ready, :infinity) end - @doc """ - Creates the per-stack ETS table that caches link values for all materializers - in a stack. Called by `ConsumerRegistry` during stack initialization. Idempotent — - safe to call when the table already exists. - """ - @spec init_link_values_table(stack_id :: term()) :: :ets.table() | :undefined - def init_link_values_table(stack_id) do - :ets.new(link_values_table_name(stack_id), [ - :named_table, - :public, - :set, - read_concurrency: true, - write_concurrency: true - ]) - rescue - ArgumentError -> :ets.whereis(link_values_table_name(stack_id)) - end - - @doc """ - Returns the current set of materialized link values for a shape. - Checks the shared ETS cache first (written after each committed transaction); - falls back to a synchronous GenServer call if the cache has no entry yet. - """ - def get_link_values(%{stack_id: stack_id, shape_handle: shape_handle} = opts) do - table = link_values_table_name(stack_id) - - case :ets.lookup(table, shape_handle) do - [{^shape_handle, values}] -> values - _ -> genserver_get_link_values(opts) - end - rescue - ArgumentError -> genserver_get_link_values(opts) - end - - defp genserver_get_link_values(opts) do + def get_link_values(opts) do GenServer.call(name(opts), :get_link_values) catch - :exit, reason -> - raise "Materializer for stack #{inspect(opts.stack_id)} and handle " <> - "#{inspect(opts.shape_handle)} is not available: #{inspect(reason)}" + :exit, _reason -> + raise ~s|Materializer for stack "#{opts.stack_id}" and handle "#{opts.shape_handle}" is not available| end def get_all_as_refs(shape, stack_id) when are_deps_filled(shape) do @@ -175,8 +149,6 @@ defmodule Electric.Shapes.Consumer.Materializer do |> decode_json_stream() |> apply_changes(state) - write_link_values(state) - {:noreply, %{state | offset: offset}} end @@ -198,7 +170,9 @@ defmodule Electric.Shapes.Consumer.Materializer do end def handle_call(:get_link_values, _from, %{value_counts: value_counts} = state) do - {:reply, link_values_from_counts(value_counts), state} + values = MapSet.new(Map.keys(value_counts)) + + {:reply, values, state} end def handle_call(:wait_until_ready, _from, state) do @@ -258,52 +232,6 @@ defmodule Electric.Shapes.Consumer.Materializer do {:noreply, %{state | subscribers: MapSet.delete(state.subscribers, pid)}} end - @spec link_values_table_name(Electric.stack_id()) :: atom() - def link_values_table_name(stack_id) do - :"Electric.Materializer.LinkValues:#{stack_id}" - end - - @doc """ - Removes the cached link values for `shape_handle` from the shared ETS table. - Safe to call even if the table does not exist (e.g. after a stack shutdown). - """ - @spec delete_link_values(Electric.stack_id(), Electric.shape_handle()) :: :ok - def delete_link_values(stack_id, shape_handle) do - :ets.delete(link_values_table_name(stack_id), shape_handle) - :ok - rescue - ArgumentError -> - Logger.debug(fn -> - "delete_link_values: link-values table for stack #{inspect(stack_id)} " <> - "not found when deleting handle #{inspect(shape_handle)}" - end) - - :ok - end - - defp link_values_from_counts(value_counts) do - MapSet.new(Map.keys(value_counts)) - end - - defp write_link_values(%{ - stack_id: stack_id, - shape_handle: shape_handle, - value_counts: value_counts - }) do - :ets.insert( - link_values_table_name(stack_id), - {shape_handle, link_values_from_counts(value_counts)} - ) - rescue - ArgumentError -> - Logger.warning( - "write_link_values: link-values ETS table missing for stack #{inspect(stack_id)} " <> - "— cache will fall back to GenServer calls for handle #{inspect(shape_handle)}" - ) - - :ok - end - defp decode_json_stream(stream) do stream |> Stream.map(&Jason.decode!/1) @@ -390,8 +318,6 @@ defmodule Electric.Shapes.Consumer.Materializer do end end - write_link_values(state) - %{state | pending_events: %{}} end diff --git a/packages/sync-service/lib/electric/shapes/consumer_registry.ex b/packages/sync-service/lib/electric/shapes/consumer_registry.ex index 207c07f849..d787517498 100644 --- a/packages/sync-service/lib/electric/shapes/consumer_registry.ex +++ b/packages/sync-service/lib/electric/shapes/consumer_registry.ex @@ -294,7 +294,6 @@ defmodule Electric.Shapes.ConsumerRegistry do def new(stack_id, opts \\ []) when is_binary(stack_id) do table = registry_table(stack_id) - Electric.Shapes.Consumer.Materializer.init_link_values_table(stack_id) state = struct(__MODULE__, Keyword.merge(opts, stack_id: stack_id, table: table)) diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index 35ebfac456..d218942c2b 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -18,15 +18,9 @@ defmodule Electric.Shapes.Filter do alias Electric.Replication.Changes.Relation alias Electric.Replication.Changes.TruncatedRelation alias Electric.Replication.Changes.UpdatedRecord - alias Electric.Replication.Eval - alias Electric.Replication.Eval.Parser.Func - alias Electric.Replication.Eval.Parser.Ref - alias Electric.Replication.Eval.Walker - alias Electric.Shapes.Consumer.Materializer alias Electric.Shapes.Filter alias Electric.Shapes.Filter.WhereCondition alias Electric.Shapes.Shape - alias Electric.Shapes.WhereClause alias Electric.Telemetry.OpenTelemetry require Logger @@ -37,15 +31,7 @@ defmodule Electric.Shapes.Filter do :where_cond_table, :eq_index_table, :incl_index_table, - :refs_fun, - :stack_id, - # {relation, field_name} -> [{dep_handle, field_type}] - :sublink_field_table, - # dep_handle -> MapSet(outer_shape_ids) - :sublink_dep_table, - # MapSet of shape_ids registered in the inverted index — enables O(1) membership - # check in the hot path without loading the shape or touching dep ETS tables. - :sublink_shapes_set + :refs_fun ] @type t :: %Filter{} @@ -59,11 +45,7 @@ defmodule Electric.Shapes.Filter do where_cond_table: :ets.new(:filter_where, [:set, :private]), eq_index_table: :ets.new(:filter_eq, [:set, :private]), incl_index_table: :ets.new(:filter_incl, [:set, :private]), - refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end), - stack_id: Keyword.get(opts, :stack_id), - sublink_field_table: :ets.new(:filter_sublink_field, [:set, :private]), - sublink_dep_table: :ets.new(:filter_sublink_dep, [:set, :private]), - sublink_shapes_set: MapSet.new() + refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end) } end @@ -93,14 +75,7 @@ defmodule Electric.Shapes.Filter do WhereCondition.add_shape(filter, where_cond_id, shape_id, shape.where) - # Only register in the inverted index when the WHERE is non-optimisable - # (landed in other_shapes). Indexed dep shapes use the equality/inclusion path. - if shape.shape_dependencies_handles != [] and - in_other_shapes?(filter, where_cond_id, shape_id) do - register_sublink_shape(filter, shape_id, shape) - else - filter - end + filter end defp get_or_create_table_condition(filter, table_name) do @@ -131,13 +106,6 @@ defmodule Electric.Shapes.Filter do :ok -> :ok end - filter = - if registered_in_inverted_index?(filter, shape_id) do - unregister_sublink_shape(filter, shape_id, shape) - else - filter - end - :ets.delete(filter.shapes_table, shape_id) filter @@ -204,90 +172,12 @@ defmodule Electric.Shapes.Filter do end defp shapes_affected_by_record(filter, table_name, record) do - where_cond_results = - case :ets.lookup(filter.tables_table, table_name) do - [] -> MapSet.new() - [{_, where_cond_id}] -> WhereCondition.affected_shapes(filter, where_cond_id, record) - end - - MapSet.union(where_cond_results, sublink_affected_shapes(filter, table_name, record)) - end - - # Inverted-index lookup for dep shapes that live in other_shapes. - # Returns affected outer shapes in O(fields × dep_handles_per_field) instead - # of the O(N×D) loop that WhereCondition.other_shapes_affected would do. - defp sublink_affected_shapes(%Filter{stack_id: nil}, _table_name, _record), do: MapSet.new() - - defp sublink_affected_shapes(filter, table_name, record) do - link_values_table = Materializer.link_values_table_name(filter.stack_id) - - candidates = - Enum.reduce(record, MapSet.new(), fn {field_name, string_value}, acc -> - case :ets.lookup(filter.sublink_field_table, {table_name, field_name}) do - [] -> - acc - - [{_, dep_infos}] -> - Enum.reduce(dep_infos, acc, fn {dep_handle, field_type}, inner_acc -> - if record_matches_dep?( - link_values_table, - dep_handle, - field_type, - string_value - ) do - union_shapes_for_dep(filter, dep_handle, inner_acc) - else - inner_acc - end - end) - end - end) - - OpenTelemetry.add_span_attributes("filter.sublink_candidates_count": MapSet.size(candidates)) - - # Re-evaluate full WHERE for candidates to handle any non-sublink conditions - OpenTelemetry.timed_fun("filter.sublink_reeval.duration_µs", fn -> - for shape_id <- candidates, - shape = get_shape(filter, shape_id), - not is_nil(shape), - WhereClause.includes_record?(shape.where, record, filter.refs_fun.(shape)), - into: MapSet.new() do - shape_id - end - end) - rescue - # The named ETS table may not exist during a ConsumerRegistry restart window. - # Return empty rather than propagating to the broad "return all shapes" fallback. - ArgumentError -> MapSet.new() - end - - # Returns true if the record's field value is present in the dep handle's - # cached link values, or if no cached values exist yet (optimistic inclusion). - defp record_matches_dep?(link_values_table, dep_handle, _field_type, nil = _string_value) do - # Null field values never match link values, but we still include - # candidates when no cache exists (materializer not started). - :ets.lookup(link_values_table, dep_handle) == [] - end - - defp record_matches_dep?(link_values_table, dep_handle, field_type, string_value) do - case :ets.lookup(link_values_table, dep_handle) do + case :ets.lookup(filter.tables_table, table_name) do [] -> - # No cached values yet (materializer not started) -- include as candidate - # so the re-eval via refs_fun handles it correctly. - true - - [{_, linked_values}] -> - case Eval.Env.parse_const(Eval.Env.new(), string_value, field_type) do - {:ok, parsed_value} -> MapSet.member?(linked_values, parsed_value) - _ -> false - end - end - end + MapSet.new() - defp union_shapes_for_dep(filter, dep_handle, acc) do - case :ets.lookup(filter.sublink_dep_table, dep_handle) do - [{_, shape_ids}] -> MapSet.union(acc, shape_ids) - [] -> acc + [{_, where_cond_id}] -> + WhereCondition.affected_shapes(filter, where_cond_id, record) end end @@ -317,134 +207,4 @@ defmodule Electric.Shapes.Filter do [] -> nil end end - - @doc """ - Returns true if a dep shape is registered in the sublink inverted index. - - Only dep shapes in top-level other_shapes (non-optimisable WHERE) are registered. - Dep shapes that go through an equality index end up in nested other_shapes and - must be evaluated normally by `other_shapes_affected`. - """ - @spec registered_in_inverted_index?(t(), shape_id()) :: boolean() - def registered_in_inverted_index?(%Filter{sublink_shapes_set: set}, shape_id), - do: MapSet.member?(set, shape_id) - - defp in_other_shapes?(filter, where_cond_id, shape_id) do - case :ets.lookup(filter.where_cond_table, where_cond_id) do - [{_, {_index_keys, other_shapes}}] -> Map.has_key?(other_shapes, shape_id) - [] -> false - end - end - - # Walks the WHERE expression tree and returns a map of - # %{sublink_index => {field_name, field_type}} for each - # sublink_membership_check node with a simple field reference on the left. - # Returns an empty map for nil or complex (RowExpr) left-hand sides. - defp extract_sublink_fields(nil), do: %{} - - defp extract_sublink_fields(%{eval: eval}) do - Walker.reduce!( - eval, - fn - %Func{ - name: "sublink_membership_check", - args: [ - %Ref{path: [field_name], type: field_type}, - %Ref{path: ["$sublink", n_str]} - ] - }, - acc, - _ -> - {:ok, Map.put(acc, String.to_integer(n_str), {field_name, field_type})} - - _, acc, _ -> - {:ok, acc} - end, - %{} - ) - end - - defp register_sublink_shape(filter, shape_id, shape) do - sublink_fields = extract_sublink_fields(shape.where) - - for {sublink_index, {field_name, field_type}} <- sublink_fields do - dep_handle = Enum.at(shape.shape_dependencies_handles, sublink_index) - - field_key = {shape.root_table, field_name} - - existing_entries = - case :ets.lookup(filter.sublink_field_table, field_key) do - [{_, entries}] -> entries - [] -> [] - end - - unless Enum.any?(existing_entries, fn {h, _} -> h == dep_handle end) do - :ets.insert( - filter.sublink_field_table, - {field_key, [{dep_handle, field_type} | existing_entries]} - ) - end - - existing_shapes = - case :ets.lookup(filter.sublink_dep_table, dep_handle) do - [{_, shapes}] -> shapes - [] -> MapSet.new() - end - - :ets.insert(filter.sublink_dep_table, {dep_handle, MapSet.put(existing_shapes, shape_id)}) - end - - # RowExpr subqueries (e.g. `(a, b) IN (SELECT ...)`) produce no indexable fields; - # those shapes stay in other_shapes and must not be marked as indexed. - if map_size(sublink_fields) > 0 do - %{filter | sublink_shapes_set: MapSet.put(filter.sublink_shapes_set, shape_id)} - else - filter - end - end - - defp unregister_sublink_shape(filter, shape_id, shape) do - sublink_fields = extract_sublink_fields(shape.where) - - for {sublink_index, {field_name, _field_type}} <- sublink_fields do - dep_handle = Enum.at(shape.shape_dependencies_handles, sublink_index) - - dep_now_empty? = - case :ets.lookup(filter.sublink_dep_table, dep_handle) do - [{_, shapes}] -> - new_shapes = MapSet.delete(shapes, shape_id) - - if MapSet.size(new_shapes) == 0 do - :ets.delete(filter.sublink_dep_table, dep_handle) - true - else - :ets.insert(filter.sublink_dep_table, {dep_handle, new_shapes}) - false - end - - [] -> - true - end - - if dep_now_empty? do - field_key = {shape.root_table, field_name} - - case :ets.lookup(filter.sublink_field_table, field_key) do - [{_, entries}] -> - new_entries = Enum.reject(entries, fn {h, _} -> h == dep_handle end) - - if new_entries == [] do - :ets.delete(filter.sublink_field_table, field_key) - else - :ets.insert(filter.sublink_field_table, {field_key, new_entries}) - end - - [] -> - :ok - end - end - end - - %{filter | sublink_shapes_set: MapSet.delete(filter.sublink_shapes_set, shape_id)} - end end diff --git a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex index e91dc75498..ea75a05b53 100644 --- a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex +++ b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex @@ -275,7 +275,6 @@ defmodule Electric.Shapes.Filter.WhereCondition do fn -> for {shape_id, where} <- other_shapes, shape = Filter.get_shape(filter, shape_id), - not is_nil(shape), WhereClause.includes_record?(where, record, refs_fun.(shape)), into: MapSet.new() do shape_id diff --git a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs index ad2d2fc50a..480c75515d 100644 --- a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs @@ -545,20 +545,6 @@ defmodule Electric.Shapes.Consumer.MaterializerTest do assert_receive {:materializer_changes, _, %{move_out: [{2, "2"}], move_in: [{3, "3"}]}} end - - @tag snapshot_data: [%Changes.NewRecord{record: %{"id" => "1", "value" => "10"}}] - test "get_link_values reads from ETS cache and does not require the GenServer to be alive", - ctx do - ctx = with_materializer(ctx) - - assert Materializer.get_link_values(ctx) == MapSet.new([10]) - - # Stop the materializer GenServer — a pure GenServer.call path would now raise - GenServer.stop(Materializer.whereis(ctx)) - - # ETS-cached values should be returned without touching the (now-dead) GenServer - assert Materializer.get_link_values(ctx) == MapSet.new([10]) - end end describe "same-batch move event cancellation" do diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index d2885e7862..158f0eb8f5 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -520,9 +520,7 @@ defmodule Electric.Shapes.FilterTest do tables: :ets.tab2list(filter.tables_table) |> Enum.sort(), where_cond: :ets.tab2list(filter.where_cond_table) |> Enum.sort(), eq_index: :ets.tab2list(filter.eq_index_table) |> Enum.sort(), - incl_index: :ets.tab2list(filter.incl_index_table) |> Enum.sort(), - sublink_field: :ets.tab2list(filter.sublink_field_table) |> Enum.sort(), - sublink_dep: :ets.tab2list(filter.sublink_dep_table) |> Enum.sort() + incl_index: :ets.tab2list(filter.incl_index_table) |> Enum.sort() } end From 3e830e05d42cc825a08df2d42d7a83d599d4e387 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 9 Mar 2026 15:11:10 +0000 Subject: [PATCH 08/63] Remove: sync-service: always route subquery shapes in filter --- .../lib/electric/shapes/filter.ex | 76 +++++++++++++++---- .../test/electric/shapes/filter_test.exs | 63 ++++++--------- 2 files changed, 84 insertions(+), 55 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index d218942c2b..639954f195 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -31,6 +31,7 @@ defmodule Electric.Shapes.Filter do :where_cond_table, :eq_index_table, :incl_index_table, + :subquery_shapes_table, :refs_fun ] @@ -45,6 +46,7 @@ defmodule Electric.Shapes.Filter do where_cond_table: :ets.new(:filter_where, [:set, :private]), eq_index_table: :ets.new(:filter_eq, [:set, :private]), incl_index_table: :ets.new(:filter_incl, [:set, :private]), + subquery_shapes_table: :ets.new(:filter_subquery, [:set, :private]), refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end) } end @@ -74,10 +76,21 @@ defmodule Electric.Shapes.Filter do where_cond_id = get_or_create_table_condition(filter, shape.root_table) WhereCondition.add_shape(filter, where_cond_id, shape_id, shape.where) + maybe_track_subquery_shape(filter, shape_id, shape) filter end + defp maybe_track_subquery_shape( + %Filter{subquery_shapes_table: table}, + shape_id, + %Shape{shape_dependencies: [_ | _], root_table: root_table} + ) do + :ets.insert(table, {{root_table, shape_id}, true}) + end + + defp maybe_track_subquery_shape(_filter, _shape_id, _shape), do: :ok + defp get_or_create_table_condition(filter, table_name) do case :ets.lookup(filter.tables_table, table_name) do [] -> @@ -106,11 +119,22 @@ defmodule Electric.Shapes.Filter do :ok -> :ok end + maybe_untrack_subquery_shape(filter, shape_id, shape) :ets.delete(filter.shapes_table, shape_id) filter end + defp maybe_untrack_subquery_shape( + %Filter{subquery_shapes_table: table}, + shape_id, + %Shape{shape_dependencies: [_ | _], root_table: root_table} + ) do + :ets.delete(table, {root_table, shape_id}) + end + + defp maybe_untrack_subquery_shape(_filter, _shape_id, _shape), do: :ok + @doc """ Returns the shape IDs for all shapes that have been added to the filter that are affected by the given change. @@ -172,30 +196,50 @@ defmodule Electric.Shapes.Filter do end defp shapes_affected_by_record(filter, table_name, record) do - case :ets.lookup(filter.tables_table, table_name) do - [] -> - MapSet.new() + from_where_condition = + case :ets.lookup(filter.tables_table, table_name) do + [] -> + MapSet.new() - [{_, where_cond_id}] -> - WhereCondition.affected_shapes(filter, where_cond_id, record) - end + [{_, where_cond_id}] -> + WhereCondition.affected_shapes(filter, where_cond_id, record) + end + + MapSet.union(from_where_condition, subquery_shape_ids_for_table(filter, table_name)) end defp all_shape_ids(%Filter{} = filter) do - :ets.foldl( - fn {_table_name, where_cond_id}, acc -> - MapSet.union(acc, WhereCondition.all_shape_ids(filter, where_cond_id)) - end, - MapSet.new(), - filter.tables_table + from_where_conditions = + :ets.foldl( + fn {_table_name, where_cond_id}, acc -> + MapSet.union(acc, WhereCondition.all_shape_ids(filter, where_cond_id)) + end, + MapSet.new(), + filter.tables_table + ) + + MapSet.union( + from_where_conditions, + filter.subquery_shapes_table + |> :ets.select([{{{:_, :"$1"}, :_}, [], [:"$1"]}]) + |> MapSet.new() ) end defp shape_ids_for_table(%Filter{} = filter, table_name) do - case :ets.lookup(filter.tables_table, table_name) do - [] -> MapSet.new() - [{_, where_cond_id}] -> WhereCondition.all_shape_ids(filter, where_cond_id) - end + from_where_condition = + case :ets.lookup(filter.tables_table, table_name) do + [] -> MapSet.new() + [{_, where_cond_id}] -> WhereCondition.all_shape_ids(filter, where_cond_id) + end + + MapSet.union(from_where_condition, subquery_shape_ids_for_table(filter, table_name)) + end + + defp subquery_shape_ids_for_table(%Filter{subquery_shapes_table: table}, table_name) do + table + |> :ets.select([{{{table_name, :"$1"}, :_}, [], [:"$1"]}]) + |> MapSet.new() end @doc """ diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index 158f0eb8f5..e1bdc3969a 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -520,7 +520,8 @@ defmodule Electric.Shapes.FilterTest do tables: :ets.tab2list(filter.tables_table) |> Enum.sort(), where_cond: :ets.tab2list(filter.where_cond_table) |> Enum.sort(), eq_index: :ets.tab2list(filter.eq_index_table) |> Enum.sort(), - incl_index: :ets.tab2list(filter.incl_index_table) |> Enum.sort() + incl_index: :ets.tab2list(filter.incl_index_table) |> Enum.sort(), + subquery_shapes: :ets.tab2list(filter.subquery_shapes_table) |> Enum.sort() } end @@ -776,7 +777,7 @@ defmodule Electric.Shapes.FilterTest do } end - describe "refs_fun threading through indexes" do + describe "subquery shapes are always routed in filter" do import Support.DbSetup import Support.DbStructureSetup import Support.ComponentSetup @@ -793,20 +794,14 @@ defmodule Electric.Shapes.FilterTest do "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" ] - test "refs_fun is threaded through equality index for compound WHERE clause with subquery", + test "subquery shape is always routed for root table changes", %{inspector: inspector} do - # Create a shape with an equality-indexed condition AND a subquery - # The where clause "par_id = 7 AND id IN (SELECT id FROM parent)" will: - # 1. Use equality index for par_id = 7 - # 2. Use refs_fun for the subquery evaluation in the AND clause {:ok, shape} = Shape.new("child", inspector: inspector, where: "par_id = 7 AND id IN (SELECT id FROM parent)" ) - # Create refs_fun that returns sublink values based on the shape - # When id is in the sublink MapSet, the subquery condition passes refs_fun = fn _shape -> %{["$sublink", "0"] => MapSet.new([1, 2, 3])} end @@ -815,7 +810,6 @@ defmodule Electric.Shapes.FilterTest do Filter.new(refs_fun: refs_fun) |> Filter.add_shape("shape1", shape) - # Record with par_id = 7 AND id in sublink results -> affected insert_matching = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "1", "par_id" => "7"} @@ -823,33 +817,34 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_matching) == MapSet.new(["shape1"]) - # Record with par_id = 7 but id NOT in sublink results -> not affected insert_not_in_subquery = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "99", "par_id" => "7"} } - assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) - # Record with par_id != 7 -> not affected (equality index filters it out) insert_wrong_par_id = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "1", "par_id" => "8"} } - assert Filter.affected_shapes(filter, insert_wrong_par_id) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_wrong_par_id) == MapSet.new(["shape1"]) + + insert_on_other_table = %NewRecord{ + relation: {"public", "parent"}, + record: %{"id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_on_other_table) == MapSet.new([]) end @tag with_sql: [ "CREATE TABLE IF NOT EXISTS incl_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS incl_child (id INT PRIMARY KEY, par_id INT REFERENCES incl_parent(id), tags int[] NOT NULL)" ] - test "refs_fun is threaded through inclusion index for compound WHERE clause with subquery", + test "subquery shape ignores inclusion and subquery values for routing", %{inspector: inspector} do - # Create a shape with an inclusion-indexed condition AND a subquery - # The where clause "tags @> '{1,2}' AND id IN (SELECT id FROM parent)" will: - # 1. Use inclusion index for tags @> '{1,2}' - # 2. Use refs_fun for the subquery evaluation in the AND clause {:ok, shape} = Shape.new("incl_child", inspector: inspector, @@ -865,7 +860,6 @@ defmodule Electric.Shapes.FilterTest do Filter.new(refs_fun: refs_fun) |> Filter.add_shape("shape1", shape) - # Record with tags containing {1,2} AND id in sublink results -> affected insert_matching = %NewRecord{ relation: {"public", "incl_child"}, record: %{"id" => "10", "par_id" => "7", "tags" => "{1,2,3}"} @@ -873,31 +867,28 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_matching) == MapSet.new(["shape1"]) - # Record with tags containing {1,2} but id NOT in sublink results -> not affected insert_not_in_subquery = %NewRecord{ relation: {"public", "incl_child"}, record: %{"id" => "99", "par_id" => "7", "tags" => "{1,2,3}"} } - assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) - # Record with tags not containing {1,2} -> not affected (inclusion index filters it out) insert_wrong_tags = %NewRecord{ relation: {"public", "incl_child"}, record: %{"id" => "10", "par_id" => "7", "tags" => "{3,4}"} } - assert Filter.affected_shapes(filter, insert_wrong_tags) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_wrong_tags) == MapSet.new(["shape1"]) end @tag with_sql: [ "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" ] - test "refs_fun is called with the correct shape when multiple shapes exist", %{ + test "all subquery shapes for the table are routed when multiple shapes exist", %{ inspector: inspector } do - # Create two shapes that will have different sublink results {:ok, shape1} = Shape.new("child", inspector: inspector, @@ -910,7 +901,6 @@ defmodule Electric.Shapes.FilterTest do where: "par_id = 8 AND id IN (SELECT id FROM parent)" ) - # refs_fun returns different values based on which shape is being evaluated refs_fun = fn shape -> if shape.where.query =~ "par_id = 7" do %{["$sublink", "0"] => MapSet.new([1, 2])} @@ -924,38 +914,35 @@ defmodule Electric.Shapes.FilterTest do |> Filter.add_shape("shape1", shape1) |> Filter.add_shape("shape2", shape2) - # Record matching shape1's equality AND subquery conditions insert1 = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "1", "par_id" => "7"} } - assert Filter.affected_shapes(filter, insert1) == MapSet.new(["shape1"]) + assert Filter.affected_shapes(filter, insert1) == MapSet.new(["shape1", "shape2"]) - # Record matching shape2's equality AND subquery conditions insert2 = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "3", "par_id" => "8"} } - assert Filter.affected_shapes(filter, insert2) == MapSet.new(["shape2"]) + assert Filter.affected_shapes(filter, insert2) == MapSet.new(["shape1", "shape2"]) - # Record matching shape1's equality but NOT its subquery (id=3 is in shape2's refs) insert3 = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "3", "par_id" => "7"} } - assert Filter.affected_shapes(filter, insert3) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert3) == MapSet.new(["shape1", "shape2"]) end @tag with_sql: [ "CREATE TABLE IF NOT EXISTS nested_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS nested_child (id INT PRIMARY KEY, field1 INT NOT NULL, field2 INT REFERENCES nested_parent(id))" ] - test "refs_fun is threaded through nested equality indexes", %{inspector: inspector} do - # Create a shape with two equality conditions and a subquery - # WHERE field1 = 10 AND field2 = 20 AND id IN (SELECT id FROM parent) + test "subquery shape with nested equality conditions is always routed", %{ + inspector: inspector + } do {:ok, shape} = Shape.new("nested_child", inspector: inspector, @@ -970,7 +957,6 @@ defmodule Electric.Shapes.FilterTest do Filter.new(refs_fun: refs_fun) |> Filter.add_shape("shape1", shape) - # Record matching all conditions insert_matching = %NewRecord{ relation: {"public", "nested_child"}, record: %{"id" => "1", "field1" => "10", "field2" => "20"} @@ -978,13 +964,12 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_matching) == MapSet.new(["shape1"]) - # Record matching equality conditions but not subquery insert_not_in_subquery = %NewRecord{ relation: {"public", "nested_child"}, record: %{"id" => "99", "field1" => "10", "field2" => "20"} } - assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) end @tag with_sql: [ From 8b349e8502ec59d2ae662be88f1ba5987349caad Mon Sep 17 00:00:00 2001 From: rob Date: Tue, 10 Mar 2026 15:33:13 +0000 Subject: [PATCH 09/63] Remove: SLC: remove Materializer.get_all_as_refs --- .../lib/electric/replication/shape_log_collector.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/sync-service/lib/electric/replication/shape_log_collector.ex b/packages/sync-service/lib/electric/replication/shape_log_collector.ex index c4597e22cf..23d05a979a 100644 --- a/packages/sync-service/lib/electric/replication/shape_log_collector.ex +++ b/packages/sync-service/lib/electric/replication/shape_log_collector.ex @@ -26,7 +26,7 @@ defmodule Electric.Replication.ShapeLogCollector do alias Electric.Replication.Changes.Relation alias Electric.Replication.Changes.TransactionFragment alias Electric.Replication.LogOffset - alias Electric.Shapes.Consumer.Materializer + alias Electric.Shapes.DependencyLayers alias Electric.Shapes.EventRouter alias Electric.Shapes.Partitions @@ -219,7 +219,6 @@ defmodule Electric.Replication.ShapeLogCollector do pids_by_shape_handle: %{}, event_router: opts - |> Map.put(:refs_fun, &Materializer.get_all_as_refs(&1, stack_id)) |> Keyword.new() |> EventRouter.new(), flush_tracker: From b94f6a208f768fd7ca9828321adfc9999f3bf8e4 Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 6 Mar 2026 11:49:53 +0000 Subject: [PATCH 10/63] Another PR: Introduce active_conditions wire format for DNF visibility Add active_conditions support to the sync protocol as a backward-compatible change, preparing for OR/NOT in WHERE clauses. Elixir client (from #3791): - Tags become {position, hash} tuples with slash-delimited wire format - active_conditions tracking and DNF visibility evaluation - disjunct_positions derived once per shape, shared across keys Server (minimal changes for simple case): - Add active_conditions field to NewRecord/UpdatedRecord/DeletedRecord - Include active_conditions in JSON headers when present - Compute active_conditions: [true, ...] for shapes with subqueries - Include active_conditions in snapshot SQL queries - Read Electric-Protocol-Version header from HTTP requests Co-Authored-By: Claude Opus 4.6 --- .changeset/active-conditions-wire-format.md | 6 + .../lib/electric/client/message.ex | 23 +- .../elixir-client/lib/electric/client/poll.ex | 18 +- .../lib/electric/client/shape_state.ex | 11 +- .../lib/electric/client/tag_tracker.ex | 344 +++++++++++--- .../test/electric/client/tag_tracker_test.exs | 443 ++++++++++++++++-- .../test/electric/client_test.exs | 64 --- .../sync-service/lib/electric/log_items.ex | 5 + .../lib/electric/plug/serve_shape_plug.ex | 4 + .../lib/electric/replication/changes.ex | 38 +- .../lib/electric/shapes/api/params.ex | 1 + .../lib/electric/shapes/querying.ex | 3 +- .../sync-service/lib/electric/shapes/shape.ex | 23 +- 13 files changed, 791 insertions(+), 192 deletions(-) create mode 100644 .changeset/active-conditions-wire-format.md diff --git a/.changeset/active-conditions-wire-format.md b/.changeset/active-conditions-wire-format.md new file mode 100644 index 0000000000..76b2cab2c6 --- /dev/null +++ b/.changeset/active-conditions-wire-format.md @@ -0,0 +1,6 @@ +--- +'@core/sync-service': patch +'@core/elixir-client': patch +--- + +Introduce `active_conditions` wire format for DNF-based visibility tracking. The server now includes `active_conditions` in change headers for shapes with subqueries, and the Elixir client handles position-based tag indexing and disjunctive normal form (DNF) visibility evaluation. This is a backward-compatible protocol addition preparing for OR/NOT support in WHERE clauses. diff --git a/packages/elixir-client/lib/electric/client/message.ex b/packages/elixir-client/lib/electric/client/message.ex index 1be05998aa..777699b601 100644 --- a/packages/elixir-client/lib/electric/client/message.ex +++ b/packages/elixir-client/lib/electric/client/message.ex @@ -13,7 +13,8 @@ defmodule Electric.Client.Message do txids: [], op_position: 0, tags: [], - removed_tags: [] + removed_tags: [], + active_conditions: [] ] @type operation :: :insert | :update | :delete @@ -29,7 +30,8 @@ defmodule Electric.Client.Message do txids: txids(), op_position: non_neg_integer(), tags: [tag()], - removed_tags: [tag()] + removed_tags: [tag()], + active_conditions: [boolean()] } @doc false @@ -44,7 +46,8 @@ defmodule Electric.Client.Message do lsn: Map.get(msg, "lsn", nil), op_position: Map.get(msg, "op_position", 0), tags: Map.get(msg, "tags", []), - removed_tags: Map.get(msg, "removed_tags", []) + removed_tags: Map.get(msg, "removed_tags", []), + active_conditions: Map.get(msg, "active_conditions", []) } end @@ -187,14 +190,22 @@ defmodule Electric.Client.Message do @enforce_keys [:shape_handle, :offset, :schema] - defstruct [:shape_handle, :offset, :schema, tag_to_keys: %{}, key_data: %{}] + defstruct [ + :shape_handle, + :offset, + :schema, + tag_to_keys: %{}, + key_data: %{}, + disjunct_positions: nil + ] @type t :: %__MODULE__{ shape_handle: Client.shape_handle(), offset: Offset.t(), schema: Client.schema(), - tag_to_keys: %{String.t() => MapSet.t(String.t())}, - key_data: %{String.t() => %{tags: MapSet.t(String.t()), msg: ChangeMessage.t()}} + tag_to_keys: %{optional(term()) => MapSet.t(String.t())}, + key_data: %{optional(String.t()) => map()}, + disjunct_positions: [[non_neg_integer()]] | nil } end diff --git a/packages/elixir-client/lib/electric/client/poll.ex b/packages/elixir-client/lib/electric/client/poll.ex index 5051facaba..1409b66ddb 100644 --- a/packages/elixir-client/lib/electric/client/poll.ex +++ b/packages/elixir-client/lib/electric/client/poll.ex @@ -234,10 +234,21 @@ defmodule Electric.Client.Poll do end defp handle_message(%Message.ChangeMessage{} = msg, state) do - {tag_to_keys, key_data} = - TagTracker.update_tag_index(state.tag_to_keys, state.key_data, msg) + {tag_to_keys, key_data, disjunct_positions} = + TagTracker.update_tag_index( + state.tag_to_keys, + state.key_data, + state.disjunct_positions, + msg + ) - {:message, msg, %{state | tag_to_keys: tag_to_keys, key_data: key_data}} + {:message, msg, + %{ + state + | tag_to_keys: tag_to_keys, + key_data: key_data, + disjunct_positions: disjunct_positions + }} end defp handle_message( @@ -248,6 +259,7 @@ defmodule Electric.Client.Poll do TagTracker.generate_synthetic_deletes( state.tag_to_keys, state.key_data, + state.disjunct_positions, patterns, request_timestamp ) diff --git a/packages/elixir-client/lib/electric/client/shape_state.ex b/packages/elixir-client/lib/electric/client/shape_state.ex index 1f7f457bf9..f76961291a 100644 --- a/packages/elixir-client/lib/electric/client/shape_state.ex +++ b/packages/elixir-client/lib/electric/client/shape_state.ex @@ -46,6 +46,7 @@ defmodule Electric.Client.ShapeState do tag_to_keys: %{}, key_data: %{}, stale_cache_retry_count: 0, + disjunct_positions: nil, recent_requests: [], fast_loop_consecutive_count: 0 ] @@ -59,6 +60,7 @@ defmodule Electric.Client.ShapeState do up_to_date?: boolean(), tag_to_keys: %{optional(term()) => MapSet.t()}, key_data: %{optional(term()) => %{tags: MapSet.t(), msg: term()}}, + disjunct_positions: [[non_neg_integer()]] | nil, stale_cache_buster: String.t() | nil, stale_cache_retry_count: non_neg_integer(), recent_requests: [{integer(), Offset.t()}], @@ -95,7 +97,8 @@ defmodule Electric.Client.ShapeState do schema: resume.schema, up_to_date?: true, tag_to_keys: Map.get(resume, :tag_to_keys, %{}), - key_data: Map.get(resume, :key_data, %{}) + key_data: Map.get(resume, :key_data, %{}), + disjunct_positions: Map.get(resume, :disjunct_positions) } end @@ -116,7 +119,8 @@ defmodule Electric.Client.ShapeState do tag_to_keys: %{}, key_data: %{}, recent_requests: [], - fast_loop_consecutive_count: 0 + fast_loop_consecutive_count: 0, + disjunct_positions: nil } end @@ -130,7 +134,8 @@ defmodule Electric.Client.ShapeState do offset: state.offset, schema: state.schema, tag_to_keys: state.tag_to_keys, - key_data: state.key_data + key_data: state.key_data, + disjunct_positions: state.disjunct_positions } end diff --git a/packages/elixir-client/lib/electric/client/tag_tracker.ex b/packages/elixir-client/lib/electric/client/tag_tracker.ex index 56f0efddf7..bab9c5bf7b 100644 --- a/packages/elixir-client/lib/electric/client/tag_tracker.ex +++ b/packages/elixir-client/lib/electric/client/tag_tracker.ex @@ -8,81 +8,128 @@ defmodule Electric.Client.TagTracker do ## Data Structures - Two maps are maintained: - - `tag_to_keys`: `%{tag_value => MapSet}` - which keys have each tag - - `key_data`: `%{key => %{tags: MapSet, msg: msg}}` - each key's current tags and latest message + Three structures are maintained: + - `tag_to_keys`: `%{{position, hash} => MapSet}` - which keys have each position-hash pair + - `key_data`: `%{key => %{tags: MapSet<{pos, hash}>, active_conditions: [boolean()] | nil, msg: msg}}` - each key's current state + - `disjunct_positions`: `[[integer()]] | nil` - shared across all keys, derived once from the first tagged message - This allows: - 1. Avoiding duplicate entries when a row is updated (we update the msg, not add a new entry) - 2. Checking if a row still has other tags before generating a synthetic delete + Tags arrive as slash-delimited strings per disjunct (e.g., `"hash1/hash2/"`, `"//hash3"`). + They are normalized into 2D arrays and indexed by `{position, hash_value}` tuples. + + For shapes with `active_conditions`, visibility is evaluated using DNF (Disjunctive Normal Form): + a row is visible if at least one disjunct is satisfied (OR of ANDs over positions). """ alias Electric.Client.Message.ChangeMessage alias Electric.Client.Message.Headers - @type tag :: String.t() + @type position_hash :: {non_neg_integer(), String.t()} @type key :: String.t() - @type tag_to_keys :: %{optional(tag()) => MapSet.t(key())} - @type key_data :: %{optional(key()) => %{tags: MapSet.t(tag()), msg: ChangeMessage.t()}} + @type tag_to_keys :: %{optional(position_hash()) => MapSet.t(key())} + @type key_data :: %{ + optional(key()) => %{ + tags: MapSet.t(position_hash()), + active_conditions: [boolean()] | nil, + msg: ChangeMessage.t() + } + } + @type disjunct_positions :: [[non_neg_integer()]] | nil @doc """ Update the tag index when a change message is received. - Returns `{updated_tag_to_keys, updated_key_data}`. + Tags are normalized from slash-delimited wire format to position-indexed entries. + `disjunct_positions` is derived once from the first tagged message and reused for all + subsequent messages, since it is determined by the shape's WHERE clause structure. + + Returns `{updated_tag_to_keys, updated_key_data, disjunct_positions}`. """ - @spec update_tag_index(tag_to_keys(), key_data(), ChangeMessage.t()) :: - {tag_to_keys(), key_data()} - def update_tag_index(tag_to_keys, key_data, %ChangeMessage{headers: headers, key: key} = msg) do - new_tags = headers.tags || [] - removed_tags = headers.removed_tags || [] + @spec update_tag_index(tag_to_keys(), key_data(), disjunct_positions(), ChangeMessage.t()) :: + {tag_to_keys(), key_data(), disjunct_positions()} + def update_tag_index( + tag_to_keys, + key_data, + disjunct_positions, + %ChangeMessage{headers: headers, key: key} = msg + ) do + raw_new_tags = headers.tags || [] + raw_removed_tags = headers.removed_tags || [] + + active_conditions = + case headers.active_conditions do + [] -> nil + nil -> nil + ac -> ac + end + + # Normalize tags to 2D arrays + normalized_new = normalize_tags(raw_new_tags) + normalized_removed = normalize_tags(raw_removed_tags) + + # Extract position-hash entries + new_entries = extract_position_entries(normalized_new) + removed_entries = extract_position_entries(normalized_removed) # Get current data for this key current_data = Map.get(key_data, key) - current_tags = if current_data, do: current_data.tags, else: MapSet.new() - - # Calculate the new set of tags for this key - updated_tags = - current_tags - |> MapSet.difference(MapSet.new(removed_tags)) - |> MapSet.union(MapSet.new(new_tags)) + current_entries = if current_data, do: current_data.tags, else: MapSet.new() + + # Calculate updated entries + updated_entries = + current_entries + |> MapSet.difference(removed_entries) + |> MapSet.union(new_entries) + + # Derive disjunct positions once from the first tagged message + disjunct_positions = + case disjunct_positions do + nil -> + case derive_disjunct_positions(normalized_new) do + [] -> nil + positions -> positions + end + + already_set -> + already_set + end - # For deletes, remove the key entirely case headers.operation do :delete -> - # Remove key from all its tags in tag_to_keys + # Remove key from all its entries in tag_to_keys updated_tag_to_keys = - Enum.reduce(updated_tags, tag_to_keys, fn tag, acc -> - remove_key_from_tag(acc, tag, key) + Enum.reduce(updated_entries, tag_to_keys, fn entry, acc -> + remove_key_from_tag(acc, entry, key) end) - # Remove key from key_data - {updated_tag_to_keys, Map.delete(key_data, key)} + {updated_tag_to_keys, Map.delete(key_data, key), disjunct_positions} _ -> - # If no tags (current or new), don't track this key - if MapSet.size(updated_tags) == 0 do - # Remove key from all its previous tags in tag_to_keys + if MapSet.size(updated_entries) == 0 do + # No entries - remove key from tracking updated_tag_to_keys = - Enum.reduce(current_tags, tag_to_keys, fn tag, acc -> - remove_key_from_tag(acc, tag, key) + Enum.reduce(current_entries, tag_to_keys, fn entry, acc -> + remove_key_from_tag(acc, entry, key) end) - # Remove key from key_data - {updated_tag_to_keys, Map.delete(key_data, key)} + {updated_tag_to_keys, Map.delete(key_data, key), disjunct_positions} else - # Update tag_to_keys: remove from old tags, add to new tags - tags_to_remove = MapSet.difference(current_tags, updated_tags) - tags_to_add = MapSet.difference(updated_tags, current_tags) + # Update tag_to_keys: remove old entries, add new entries + entries_to_remove = MapSet.difference(current_entries, updated_entries) + entries_to_add = MapSet.difference(updated_entries, current_entries) updated_tag_to_keys = tag_to_keys - |> remove_key_from_tags(tags_to_remove, key) - |> add_key_to_tags(tags_to_add, key) + |> remove_key_from_tags(entries_to_remove, key) + |> add_key_to_tags(entries_to_add, key) - # Update key_data with new tags and latest message - updated_key_data = Map.put(key_data, key, %{tags: updated_tags, msg: msg}) + updated_key_data = + Map.put(key_data, key, %{ + tags: updated_entries, + active_conditions: active_conditions, + msg: msg + }) - {updated_tag_to_keys, updated_key_data} + {updated_tag_to_keys, updated_key_data, disjunct_positions} end end end @@ -90,54 +137,100 @@ defmodule Electric.Client.TagTracker do @doc """ Generate synthetic delete messages for keys matching move-out patterns. + Patterns contain `%{pos: position, value: hash}` maps. For keys with + `active_conditions`, positions are deactivated and visibility is re-evaluated + using DNF with the shared `disjunct_positions`. For keys without + `active_conditions`, the old behavior applies: delete when no entries remain. + Returns `{synthetic_deletes, updated_tag_to_keys, updated_key_data}`. """ - @spec generate_synthetic_deletes(tag_to_keys(), key_data(), [map()], DateTime.t()) :: + @spec generate_synthetic_deletes( + tag_to_keys(), + key_data(), + disjunct_positions(), + [map()], + DateTime.t() + ) :: {[ChangeMessage.t()], tag_to_keys(), key_data()} - def generate_synthetic_deletes(tag_to_keys, key_data, patterns, request_timestamp) do - # Assumption: move-out patterns only include simple tag values; positional matching - # for composite tags is not needed with the current server behavior. - - # First pass: collect all keys that match any pattern and remove those tags - {matched_keys_with_tags, updated_tag_to_keys} = - Enum.reduce(patterns, {%{}, tag_to_keys}, fn %{value: tag_value}, {keys_acc, ttk_acc} -> - case Map.pop(ttk_acc, tag_value) do + def generate_synthetic_deletes( + tag_to_keys, + key_data, + disjunct_positions, + patterns, + request_timestamp + ) do + # First pass: collect all keys that match any pattern and remove those entries + {matched_keys_with_entries, updated_tag_to_keys} = + Enum.reduce(patterns, {%{}, tag_to_keys}, fn %{pos: pos, value: value}, + {keys_acc, ttk_acc} -> + tag_key = {pos, value} + + case Map.pop(ttk_acc, tag_key) do {nil, ttk_acc} -> {keys_acc, ttk_acc} {keys_in_tag, ttk_acc} -> - # Track which tags were removed for each key updated_keys_acc = Enum.reduce(keys_in_tag, keys_acc, fn key, acc -> - removed_tags = Map.get(acc, key, MapSet.new()) - Map.put(acc, key, MapSet.put(removed_tags, tag_value)) + removed = Map.get(acc, key, MapSet.new()) + Map.put(acc, key, MapSet.put(removed, tag_key)) end) {updated_keys_acc, ttk_acc} end end) - # Second pass: for each matched key, update its tags and check if it should be deleted - {keys_to_delete, updated_key_data} = - Enum.reduce(matched_keys_with_tags, {[], key_data}, fn {key, removed_tags}, - {deletes, kd_acc} -> + # Second pass: for each matched key, update state and check visibility + {keys_to_delete, updated_key_data, orphaned_entries} = + Enum.reduce(matched_keys_with_entries, {[], key_data, []}, fn {key, removed_entries}, + {deletes, kd_acc, orphans} -> case Map.get(kd_acc, key) do nil -> - {deletes, kd_acc} - - %{tags: current_tags, msg: msg} -> - remaining_tags = MapSet.difference(current_tags, removed_tags) - - if MapSet.size(remaining_tags) == 0 do - # No remaining tags - key should be deleted - {[{key, msg} | deletes], Map.delete(kd_acc, key)} + {deletes, kd_acc, orphans} + + %{tags: current_entries, msg: msg} = data -> + remaining_entries = MapSet.difference(current_entries, removed_entries) + + # Determine if key should be deleted + {should_delete, updated_data} = + if data.active_conditions != nil and disjunct_positions != nil do + # DNF mode: deactivate positions and check visibility + deactivated_positions = + MapSet.new(removed_entries, fn {pos, _} -> pos end) + + updated_ac = + data.active_conditions + |> Enum.with_index() + |> Enum.map(fn {val, idx} -> + if MapSet.member?(deactivated_positions, idx), do: false, else: val + end) + + visible = row_visible?(updated_ac, disjunct_positions) + + {not visible, %{data | tags: remaining_entries, active_conditions: updated_ac}} + else + # Old mode: delete if no remaining entries + {MapSet.size(remaining_entries) == 0, %{data | tags: remaining_entries}} + end + + if should_delete do + {[{key, msg} | deletes], Map.delete(kd_acc, key), + [{key, remaining_entries} | orphans]} else - # Still has other tags - update key_data but don't delete - {deletes, Map.put(kd_acc, key, %{tags: remaining_tags, msg: msg})} + {deletes, Map.put(kd_acc, key, updated_data), orphans} end end end) + # Third pass: clean up remaining entries from tag_to_keys for deleted keys. + # The first pass only removed matched entries via Map.pop; remaining entries + # for deleted keys would otherwise persist as stale references, causing + # phantom synthetic deletes when matching future deactivation patterns. + updated_tag_to_keys = + Enum.reduce(orphaned_entries, updated_tag_to_keys, fn {key, remaining}, ttk -> + remove_key_from_tags(ttk, remaining, key) + end) + # Generate synthetic delete messages synthetic_deletes = Enum.map(keys_to_delete, fn {key, original_msg} -> @@ -157,7 +250,118 @@ defmodule Electric.Client.TagTracker do {synthetic_deletes, updated_tag_to_keys, updated_key_data} end - # Private helpers + @doc """ + Evaluate DNF visibility from active_conditions and disjunct structure. + + A row is visible if at least one disjunct is satisfied. + A disjunct is satisfied when all its positions have `active_conditions[pos] == true`. + """ + @spec row_visible?([boolean()], [[non_neg_integer()]]) :: boolean() + def row_visible?(active_conditions, disjunct_positions) do + Enum.any?(disjunct_positions, fn positions -> + Enum.all?(positions, fn pos -> + Enum.at(active_conditions, pos, false) == true + end) + end) + end + + @doc """ + Activate positions for keys matching move-in patterns. + + Sets `active_conditions[pos]` to `true` for keys that have + matching `{pos, value}` entries in the tag index. + + Returns `{updated_tag_to_keys, updated_key_data}`. + """ + @spec handle_move_in(tag_to_keys(), key_data(), [map()]) :: + {tag_to_keys(), key_data()} + def handle_move_in(tag_to_keys, key_data, patterns) do + updated_key_data = + Enum.reduce(patterns, key_data, fn %{pos: pos, value: value}, kd_acc -> + tag_key = {pos, value} + + case Map.get(tag_to_keys, tag_key) do + nil -> + kd_acc + + keys -> + Enum.reduce(keys, kd_acc, fn key, acc -> + case Map.get(acc, key) do + %{active_conditions: ac} = data when ac != nil -> + updated_ac = List.replace_at(ac, pos, true) + Map.put(acc, key, %{data | active_conditions: updated_ac}) + + _ -> + acc + end + end) + end + end) + + {tag_to_keys, updated_key_data} + end + + @doc """ + Normalize slash-delimited wire format tags to 2D arrays. + + Each tag string represents a disjunct with "/" separating position hashes. + Empty strings are replaced with nil (position not relevant to this disjunct). + + ## Examples + + iex> Electric.Client.TagTracker.normalize_tags(["hash_a/hash_b"]) + [["hash_a", "hash_b"]] + + iex> Electric.Client.TagTracker.normalize_tags(["hash_a/", "/hash_b"]) + [["hash_a", nil], [nil, "hash_b"]] + + iex> Electric.Client.TagTracker.normalize_tags(["tag_a"]) + [["tag_a"]] + """ + @spec normalize_tags([String.t()]) :: [[String.t() | nil]] + def normalize_tags([]), do: [] + + def normalize_tags(tags) do + Enum.map(tags, fn tag -> + tag + |> String.split("/") + |> Enum.map(fn + "" -> nil + hash -> hash + end) + end) + end + + # --- Private helpers --- + + # Extract {position, hash} entries from normalized 2D tags. + defp extract_position_entries(normalized_tags) do + normalized_tags + |> Enum.flat_map(fn disjunct -> + disjunct + |> Enum.with_index() + |> Enum.flat_map(fn + {nil, _pos} -> [] + {hash, pos} -> [{pos, hash}] + end) + end) + |> MapSet.new() + end + + # Derive disjunct positions from normalized tags. + # Each disjunct lists the positions that are non-nil. + defp derive_disjunct_positions([]), do: [] + + defp derive_disjunct_positions(normalized_tags) do + Enum.map(normalized_tags, fn disjunct -> + disjunct + |> Enum.with_index() + |> Enum.flat_map(fn + {nil, _pos} -> [] + {_hash, pos} -> [pos] + end) + end) + end defp remove_key_from_tags(tag_to_keys, tags, key) do Enum.reduce(tags, tag_to_keys, fn tag, acc -> diff --git a/packages/elixir-client/test/electric/client/tag_tracker_test.exs b/packages/elixir-client/test/electric/client/tag_tracker_test.exs index 18e41f2071..c63dc6f14e 100644 --- a/packages/elixir-client/test/electric/client/tag_tracker_test.exs +++ b/packages/elixir-client/test/electric/client/tag_tracker_test.exs @@ -8,6 +8,7 @@ defmodule Electric.Client.TagTrackerTest do defp make_change_msg(key, operation, opts) do tags = Keyword.get(opts, :tags, []) removed_tags = Keyword.get(opts, :removed_tags, []) + active_conditions = Keyword.get(opts, :active_conditions, []) value = Keyword.get(opts, :value, %{"id" => key}) %ChangeMessage{ @@ -19,66 +20,67 @@ defmodule Electric.Client.TagTrackerTest do relation: ["public", "test"], handle: "test-handle", tags: tags, - removed_tags: removed_tags + removed_tags: removed_tags, + active_conditions: active_conditions }, request_timestamp: DateTime.utc_now() } end - describe "update_tag_index/3" do + describe "update_tag_index/4" do test "tracks new tags for inserts" do msg = make_change_msg("key1", :insert, tags: ["tag_a", "tag_b"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) assert tag_to_keys == %{ - "tag_a" => MapSet.new(["key1"]), - "tag_b" => MapSet.new(["key1"]) + {0, "tag_a"} => MapSet.new(["key1"]), + {0, "tag_b"} => MapSet.new(["key1"]) } assert Map.has_key?(key_data, "key1") - assert key_data["key1"].tags == MapSet.new(["tag_a", "tag_b"]) + assert key_data["key1"].tags == MapSet.new([{0, "tag_a"}, {0, "tag_b"}]) end test "updates tags for updates" do # Initial insert with tag_a msg1 = make_change_msg("key1", :insert, tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) # Update adds tag_b msg2 = make_change_msg("key1", :update, tags: ["tag_b"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{ - "tag_a" => MapSet.new(["key1"]), - "tag_b" => MapSet.new(["key1"]) + {0, "tag_a"} => MapSet.new(["key1"]), + {0, "tag_b"} => MapSet.new(["key1"]) } - assert key_data["key1"].tags == MapSet.new(["tag_a", "tag_b"]) + assert key_data["key1"].tags == MapSet.new([{0, "tag_a"}, {0, "tag_b"}]) end test "removes tags when removed_tags specified" do # Initial insert with tag_a and tag_b msg1 = make_change_msg("key1", :insert, tags: ["tag_a", "tag_b"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) # Update removes tag_a msg2 = make_change_msg("key1", :update, removed_tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{ - "tag_b" => MapSet.new(["key1"]) + {0, "tag_b"} => MapSet.new(["key1"]) } - assert key_data["key1"].tags == MapSet.new(["tag_b"]) + assert key_data["key1"].tags == MapSet.new([{0, "tag_b"}]) end test "removes key from tracking on delete" do msg1 = make_change_msg("key1", :insert, tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) msg2 = make_change_msg("key1", :delete, tags: []) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{} assert key_data == %{} @@ -86,7 +88,7 @@ defmodule Electric.Client.TagTrackerTest do test "handles messages without tags" do msg = make_change_msg("key1", :insert, tags: []) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) assert tag_to_keys == %{} assert key_data == %{} @@ -96,11 +98,11 @@ defmodule Electric.Client.TagTrackerTest do msg1 = make_change_msg("key1", :insert, tags: ["shared_tag"]) msg2 = make_change_msg("key2", :insert, tags: ["shared_tag"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{ - "shared_tag" => MapSet.new(["key1", "key2"]) + {0, "shared_tag"} => MapSet.new(["key1", "key2"]) } assert Map.has_key?(key_data, "key1") @@ -108,21 +110,21 @@ defmodule Electric.Client.TagTrackerTest do end end - describe "generate_synthetic_deletes/4" do + describe "generate_synthetic_deletes/5" do test "generates deletes for keys matching pattern" do # Set up: two keys with tag_a msg1 = make_change_msg("key1", :insert, tags: ["tag_a"], value: %{"id" => "1"}) msg2 = make_change_msg("key2", :insert, tags: ["tag_a"], value: %{"id" => "2"}) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) # Move-out for tag_a patterns = [%{pos: 0, value: "tag_a"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) assert length(deletes) == 2 @@ -141,35 +143,35 @@ defmodule Electric.Client.TagTrackerTest do test "does not delete keys with remaining tags" do # Set up: key1 has tag_a and tag_b msg = make_change_msg("key1", :insert, tags: ["tag_a", "tag_b"], value: %{"id" => "1"}) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) # Move-out only for tag_a patterns = [%{pos: 0, value: "tag_a"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) # No synthetic deletes - key1 still has tag_b assert deletes == [] # tag_a removed, tag_b remains assert new_tag_to_keys == %{ - "tag_b" => MapSet.new(["key1"]) + {0, "tag_b"} => MapSet.new(["key1"]) } - assert new_key_data["key1"].tags == MapSet.new(["tag_b"]) + assert new_key_data["key1"].tags == MapSet.new([{0, "tag_b"}]) end test "handles non-existent tag pattern" do msg = make_change_msg("key1", :insert, tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) patterns = [%{pos: 0, value: "nonexistent_tag"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) assert deletes == [] assert new_tag_to_keys == tag_to_keys @@ -180,18 +182,391 @@ defmodule Electric.Client.TagTrackerTest do msg1 = make_change_msg("key1", :insert, tags: ["tag_a"]) msg2 = make_change_msg("key2", :insert, tags: ["tag_b"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) patterns = [%{pos: 0, value: "tag_a"}, %{pos: 0, value: "tag_b"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) assert length(deletes) == 2 assert new_tag_to_keys == %{} assert new_key_data == %{} end end + + describe "normalize_tags/1" do + test "normalizes slash-delimited tags to 2D structure" do + assert TagTracker.normalize_tags(["hash1/hash2/", "//hash3"]) == + [["hash1", "hash2", nil], [nil, nil, "hash3"]] + + assert TagTracker.normalize_tags(["tag_a"]) == [["tag_a"]] + assert TagTracker.normalize_tags([]) == [] + end + + test "single-position tags normalize to single-element lists" do + assert TagTracker.normalize_tags(["hash_a", "hash_b"]) == + [["hash_a"], ["hash_b"]] + end + + test "multi-position tags with mixed nils" do + assert TagTracker.normalize_tags(["hash_a/", "/hash_b"]) == + [["hash_a", nil], [nil, "hash_b"]] + end + end + + describe "tag_tracker with DNF wire format" do + test "removed_tags in slash-delimited format are correctly filtered" do + msg1 = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + + assert ttk == %{ + {0, "hash_a"} => MapSet.new(["key1"]), + {1, "hash_b"} => MapSet.new(["key1"]) + } + + # Remove hash_a via slash-delimited removed_tags, add new hash at pos 0 + msg2 = + make_change_msg("key1", :update, + tags: ["hash_c/hash_b"], + removed_tags: ["hash_a/"], + active_conditions: [true, true] + ) + + {ttk, _kd, _dp} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + + assert ttk == %{ + {0, "hash_c"} => MapSet.new(["key1"]), + {1, "hash_b"} => MapSet.new(["key1"]) + } + end + + test "row_visible? evaluates DNF correctly" do + # Disjunct 0 needs positions [0, 1], disjunct 1 needs positions [2] + disjunct_positions = [[0, 1], [2]] + + # All active + assert TagTracker.row_visible?([true, true, true], disjunct_positions) + + # Only disjunct 0 satisfied + assert TagTracker.row_visible?([true, true, false], disjunct_positions) + + # Only disjunct 1 satisfied + assert TagTracker.row_visible?([false, false, true], disjunct_positions) + + # No disjunct satisfied (pos 0 false means disjunct 0 fails, pos 2 false means disjunct 1 fails) + refute TagTracker.row_visible?([false, true, false], disjunct_positions) + refute TagTracker.row_visible?([false, false, false], disjunct_positions) + end + + test "generate_synthetic_deletes only deletes when all disjuncts unsatisfied" do + # Key1 has two disjuncts: disjunct 0 uses pos 0, disjunct 1 uses pos 1 + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Move-out at position 0 - disjunct 1 still satisfied + patterns = [%{pos: 0, value: "hash_a"}] + timestamp = DateTime.utc_now() + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + # Still visible via disjunct 1 + assert deletes == [] + assert kd["key1"].active_conditions == [false, true] + + # Move-out at position 1 - no disjunct satisfied + patterns = [%{pos: 1, value: "hash_b"}] + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + end + + test "handle_move_in activates correct positions" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, false] + ) + + {ttk, kd, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Position 1 is inactive + refute Enum.at(kd["key1"].active_conditions, 1) + + # Move-in activates position 1 + patterns = [%{pos: 1, value: "hash_b"}] + {_ttk, kd} = TagTracker.handle_move_in(ttk, kd, patterns) + + assert kd["key1"].active_conditions == [true, true] + end + + test "position-based tag_to_keys index for multi-disjunct shapes" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b", "hash_c/hash_d"], + active_conditions: [true, true] + ) + + {ttk, _kd, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + assert Map.has_key?(ttk, {0, "hash_a"}) + assert Map.has_key?(ttk, {1, "hash_b"}) + assert Map.has_key?(ttk, {0, "hash_c"}) + assert Map.has_key?(ttk, {1, "hash_d"}) + end + + test "active_conditions stored from headers and disjunct_positions derived once" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, false] + ) + + {_ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + assert kd["key1"].active_conditions == [true, false] + assert dp == [[0, 1]] + end + + test "orphaned tag_to_keys entries after delete do not cause phantom deletes" do + # Shape: (A AND C) OR (B AND C) → disjuncts [[0,1], [2,3]] + # Row "r" has all 4 positions active with hash "X" + msg = + make_change_msg("r", :insert, + tags: ["X/X//", "//X/X"], + active_conditions: [true, true, true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Deactivate positions 1 and 3 (dep C moves out with hash "X") + # Both disjuncts lose their C position → row invisible → deleted from key_data + patterns = [%{pos: 1, value: "X"}, %{pos: 3, value: "X"}] + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert length(deletes) == 1 + assert hd(deletes).key == "r" + refute Map.has_key?(kd, "r") + + # Bug: {0, "X"} and {2, "X"} are still in tag_to_keys as orphans + # pointing to the deleted key "r" + + # Re-insert row "r" with NEW hash "Y" at all positions (move-in) + msg = + make_change_msg("r", :insert, + tags: ["Y/Y//", "//Y/Y"], + active_conditions: [true, true, true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(ttk, kd, dp, msg) + + # Deactivate position 0 with STALE hash "X" — should have NO effect + # since the row's current hash at pos 0 is "Y", not "X" + patterns = [%{pos: 0, value: "X"}] + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert deletes == [] + # Without fix: active_conditions would be corrupted to [false, true, true, true] + assert kd["r"].active_conditions == [true, true, true, true] + + # Now a legitimate deactivation at position 2 with current hash "Y" + patterns = [%{pos: 2, value: "Y"}] + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + # Disjunct 0 ([0,1]) is still fully active → row should remain visible + # Without fix: the corrupted pos 0 causes both disjuncts to fail → phantom delete + assert deletes == [] + end + + test "disjunct structure derived correctly from slash-delimited tags" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + ) + + {_ttk, _kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Disjunct 0 uses position 0, disjunct 1 uses position 1 + assert dp == [[0], [1]] + end + + test "multi-disjunct: row stays when one disjunct lost, deleted when all lost" do + # Tags: ["hash_a/hash_b/", "//hash_c"] + # Disjunct 0 covers positions [0, 1], disjunct 1 covers position [2] + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b/", "//hash_c"], + active_conditions: [true, true, true], + value: %{"id" => "1", "name" => "User 1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + assert dp == [[0, 1], [2]] + + # Move-out at position 0 → disjunct 0 fails (needs [0,1]), disjunct 1 (pos 2) still satisfied + patterns = [%{pos: 0, value: "hash_a"}] + timestamp = DateTime.utc_now() + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert deletes == [] + assert kd["key1"].active_conditions == [false, true, true] + + # Move-out at position 2 → disjunct 1 also fails, no disjunct satisfied + patterns = [%{pos: 2, value: "hash_c"}] + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + end + + test "overwrite active_conditions when row is re-sent (move-in overwrite)" do + # Insert row with active_conditions [true, false] + msg1 = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, false], + value: %{"id" => "1", "name" => "User 1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + assert kd["key1"].active_conditions == [true, false] + + # Server re-sends the same row with updated active_conditions + msg2 = + make_change_msg("key1", :update, + tags: ["hash_a/hash_b"], + active_conditions: [true, true], + value: %{"id" => "1", "name" => "User 1 updated"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + assert kd["key1"].active_conditions == [true, true] + + # Verify the overwritten active_conditions work correctly: + # With single disjunct [0,1], move-out at pos 0 should make row invisible + patterns = [%{pos: 0, value: "hash_a"}] + timestamp = DateTime.utc_now() + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + end + + test "delete on empty tag set for simple shapes (no active_conditions)" do + # Insert row with a single-position tag but NO active_conditions + msg = + make_change_msg("key1", :insert, + tags: ["hash1"], + value: %{"id" => "1", "name" => "User 1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + assert kd["key1"].active_conditions == nil + + # Move-out at position 0 — no active_conditions: tag removed, tag set empty → delete + patterns = [%{pos: 0, value: "hash1"}] + timestamp = DateTime.utc_now() + + {deletes, new_ttk, new_kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + assert new_kd == %{} + assert new_ttk == %{} + end + + test "mixed rows: some with active_conditions, some without" do + # Row 1: DNF shape (with active_conditions) + msg1 = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true], + value: %{"id" => "1", "name" => "DNF User"} + ) + + # Row 2: simple shape (single-position tag, no active_conditions) + msg2 = + make_change_msg("key2", :insert, + tags: ["hash_a"], + value: %{"id" => "2", "name" => "Simple User"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + {ttk, kd, dp} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + + assert Map.has_key?(kd, "key1") + assert Map.has_key?(kd, "key2") + + # Move-out at position 0 with value hash_a + # DNF row: disjunct 0 ([0]) fails, but disjunct 1 ([1]) still satisfied → stays + # Simple row: tag "hash_a" at pos 0 removed, tag set empty → deleted + patterns = [%{pos: 0, value: "hash_a"}] + timestamp = DateTime.utc_now() + + {deletes, _ttk, new_kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + # DNF row stays, simple row deleted + deleted_keys = Enum.map(deletes, & &1.key) |> MapSet.new() + assert MapSet.member?(deleted_keys, "key2") + refute MapSet.member?(deleted_keys, "key1") + + assert Map.has_key?(new_kd, "key1") + assert new_kd["key1"].active_conditions == [false, true] + refute Map.has_key?(new_kd, "key2") + end + + test "disjunct_positions derived once and reused across keys" do + msg1 = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + assert dp == [[0], [1]] + + # Second key with different hashes but same structure + msg2 = + make_change_msg("key2", :insert, + tags: ["hash_c/", "/hash_d"], + active_conditions: [true, false] + ) + + {_ttk, _kd, dp2} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + + # disjunct_positions unchanged — derived once, reused + assert dp2 == dp + end + end end diff --git a/packages/elixir-client/test/electric/client_test.exs b/packages/elixir-client/test/electric/client_test.exs index d7105f32fe..6b1b2ea5b9 100644 --- a/packages/elixir-client/test/electric/client_test.exs +++ b/packages/elixir-client/test/electric/client_test.exs @@ -2174,70 +2174,6 @@ defmodule Electric.ClientTest do "Synthetic delete should use latest value, got: #{inspect(delete.value)}" end - test "multiple patterns matching same row generates single delete", ctx do - # Edge case: move-out with multiple patterns that both match the same row - body1 = - Jason.encode!([ - %{ - "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-a", "tag-b"]}, - "offset" => "1_0", - "value" => %{"id" => "1111"} - }, - %{"headers" => %{"control" => "up-to-date", "global_last_seen_lsn" => 9998}} - ]) - - body2 = - Jason.encode!([ - %{ - "headers" => %{ - "event" => "move-out", - # Both patterns match the same row - "patterns" => [ - %{"pos" => 0, "value" => "tag-a"}, - %{"pos" => 1, "value" => "tag-b"} - ] - } - }, - %{"headers" => %{"control" => "up-to-date", "global_last_seen_lsn" => 9999}} - ]) - - schema = Jason.encode!(%{"id" => %{type: "text"}}) - - {:ok, responses} = - start_supervised( - {Agent, - fn -> - %{ - {"-1", nil} => [ - &bypass_resp(&1, body1, - shape_handle: "my-shape", - last_offset: "1_0", - schema: schema - ) - ], - {"1_0", "my-shape"} => [ - &bypass_resp(&1, body2, - shape_handle: "my-shape", - last_offset: "2_0" - ) - ] - } - end} - ) - - bypass_response(ctx, responses) - - # insert, up-to-date, synthetic delete, up-to-date - msgs = stream(ctx, 4) - - delete_msgs = Enum.filter(msgs, &match?(%ChangeMessage{headers: %{operation: :delete}}, &1)) - - # Should only generate 1 delete, not 2 - assert length(delete_msgs) == 1, - "Multiple patterns matching same row should generate single delete, got #{length(delete_msgs)}" - end - test "update removing all tags should clear tag index so move-out is a no-op", ctx do # This test demonstrates the stale tag-index entry bug: # When a row is updated to remove ALL its tags (with removed_tags but no new tags), diff --git a/packages/sync-service/lib/electric/log_items.ex b/packages/sync-service/lib/electric/log_items.ex index e61adc973d..474775ee0a 100644 --- a/packages/sync-service/lib/electric/log_items.ex +++ b/packages/sync-service/lib/electric/log_items.ex @@ -49,6 +49,7 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) + |> put_if_true(:active_conditions, change.active_conditions) }} ] end @@ -69,6 +70,7 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) + |> put_if_true(:active_conditions, change.active_conditions) }} ] end @@ -90,6 +92,7 @@ defmodule Electric.LogItems do |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) |> put_if_true(:removed_tags, change.move_tags != [], change.removed_move_tags) + |> put_if_true(:active_conditions, change.active_conditions) } |> Map.merge(put_update_values(change, pk_cols, replica))} ] @@ -117,6 +120,7 @@ defmodule Electric.LogItems do change.move_tags != [], change.move_tags ++ change.removed_move_tags ) + |> put_if_true(:active_conditions, change.active_conditions) }}, {new_offset, %{ @@ -133,6 +137,7 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) + |> put_if_true(:active_conditions, change.active_conditions) }} ] end diff --git a/packages/sync-service/lib/electric/plug/serve_shape_plug.ex b/packages/sync-service/lib/electric/plug/serve_shape_plug.ex index aa4d18b457..48c09e3305 100644 --- a/packages/sync-service/lib/electric/plug/serve_shape_plug.ex +++ b/packages/sync-service/lib/electric/plug/serve_shape_plug.ex @@ -101,6 +101,10 @@ defmodule Electric.Plug.ServeShapePlug do Map.get(merged_params, "experimental_live_sse", "false"), &(&1 != "false") ) + |> Map.put( + "electric_protocol_version", + Conn.get_req_header(conn, "electric-protocol-version") |> List.first() + ) case Api.validate(api, all_params) do {:ok, request} -> diff --git a/packages/sync-service/lib/electric/replication/changes.ex b/packages/sync-service/lib/electric/replication/changes.ex index cd026a9e2f..218bf02d56 100644 --- a/packages/sync-service/lib/electric/replication/changes.ex +++ b/packages/sync-service/lib/electric/replication/changes.ex @@ -182,7 +182,15 @@ defmodule Electric.Replication.Changes do end defmodule NewRecord do - defstruct [:relation, :record, :log_offset, :key, last?: false, move_tags: []] + defstruct [ + :relation, + :record, + :log_offset, + :key, + last?: false, + move_tags: [], + active_conditions: nil + ] @type t() :: %__MODULE__{ relation: Changes.relation_name(), @@ -190,7 +198,8 @@ defmodule Electric.Replication.Changes do log_offset: LogOffset.t(), key: String.t() | nil, last?: boolean(), - move_tags: [Changes.tag()] + move_tags: [Changes.tag()], + active_conditions: [boolean()] | nil } end @@ -205,7 +214,8 @@ defmodule Electric.Replication.Changes do move_tags: [], removed_move_tags: [], changed_columns: MapSet.new(), - last?: false + last?: false, + active_conditions: nil ] @type t() :: %__MODULE__{ @@ -218,7 +228,8 @@ defmodule Electric.Replication.Changes do move_tags: [Changes.tag()], removed_move_tags: [Changes.tag()], changed_columns: MapSet.t(), - last?: boolean() + last?: boolean(), + active_conditions: [boolean()] | nil } def new(attrs) do @@ -254,7 +265,15 @@ defmodule Electric.Replication.Changes do end defmodule DeletedRecord do - defstruct [:relation, :old_record, :log_offset, :key, move_tags: [], last?: false] + defstruct [ + :relation, + :old_record, + :log_offset, + :key, + move_tags: [], + last?: false, + active_conditions: nil + ] @type t() :: %__MODULE__{ relation: Changes.relation_name(), @@ -262,7 +281,8 @@ defmodule Electric.Replication.Changes do log_offset: LogOffset.t(), key: String.t() | nil, move_tags: [Changes.tag()], - last?: boolean() + last?: boolean(), + active_conditions: [boolean()] | nil } end @@ -412,7 +432,8 @@ defmodule Electric.Replication.Changes do relation: change.relation, record: change.record, key: change.key, - log_offset: change.log_offset + log_offset: change.log_offset, + active_conditions: change.active_conditions } end @@ -422,7 +443,8 @@ defmodule Electric.Replication.Changes do old_record: change.old_record, key: change.old_key || change.key, log_offset: change.log_offset, - move_tags: change.move_tags + move_tags: change.move_tags, + active_conditions: change.active_conditions } end diff --git a/packages/sync-service/lib/electric/shapes/api/params.ex b/packages/sync-service/lib/electric/shapes/api/params.ex index c8390fbe20..81bd4b76c5 100644 --- a/packages/sync-service/lib/electric/shapes/api/params.ex +++ b/packages/sync-service/lib/electric/shapes/api/params.ex @@ -157,6 +157,7 @@ defmodule Electric.Shapes.Api.Params do field(@tmp_compaction_flag, :boolean, default: false) field(:live_sse, :boolean, default: false) field(:log, Ecto.Enum, values: [:changes_only, :full], default: :full) + field(:electric_protocol_version, :string) embeds_one(:subset, SubsetParams) end diff --git a/packages/sync-service/lib/electric/shapes/querying.ex b/packages/sync-service/lib/electric/shapes/querying.ex index 2810815031..ffd9220603 100644 --- a/packages/sync-service/lib/electric/shapes/querying.ex +++ b/packages/sync-service/lib/electric/shapes/querying.ex @@ -219,8 +219,9 @@ defmodule Electric.Shapes.Querying do if tags != [] do "{" <> json = headers + active_conditions = List.duplicate(true, length(tags)) |> Jason.encode!() tags = Enum.join(tags, ~s[ || '","' || ]) - ~s/{"tags":["' || #{tags} || '"],/ <> json + ~s/{"active_conditions":#{active_conditions},"tags":["' || #{tags} || '"],/ <> json else headers end diff --git a/packages/sync-service/lib/electric/shapes/shape.ex b/packages/sync-service/lib/electric/shapes/shape.ex index f9f2092d5e..e55305738d 100644 --- a/packages/sync-service/lib/electric/shapes/shape.ex +++ b/packages/sync-service/lib/electric/shapes/shape.ex @@ -637,7 +637,8 @@ defmodule Electric.Shapes.Shape do shape_handle ) do move_tags = make_tags_from_pattern(tag_structure, record, stack_id, shape_handle) - %{change | move_tags: move_tags} + active_conditions = make_active_conditions(tag_structure) + %{change | move_tags: move_tags, active_conditions: active_conditions} end def fill_move_tags( @@ -652,7 +653,14 @@ defmodule Electric.Shapes.Shape do make_tags_from_pattern(tag_structure, old_record, stack_id, shape_handle) -- move_tags - %{change | move_tags: move_tags, removed_move_tags: old_move_tags} + active_conditions = make_active_conditions(tag_structure) + + %{ + change + | move_tags: move_tags, + removed_move_tags: old_move_tags, + active_conditions: active_conditions + } end def fill_move_tags( @@ -663,9 +671,18 @@ defmodule Electric.Shapes.Shape do stack_id, shape_handle ) do - %{change | move_tags: make_tags_from_pattern(tag_structure, record, stack_id, shape_handle)} + active_conditions = make_active_conditions(tag_structure) + + %{ + change + | move_tags: make_tags_from_pattern(tag_structure, record, stack_id, shape_handle), + active_conditions: active_conditions + } end + defp make_active_conditions([]), do: nil + defp make_active_conditions(tag_structure), do: List.duplicate(true, length(tag_structure)) + defp make_tags_from_pattern(patterns, record, stack_id, shape_handle) do Enum.map(patterns, fn pattern -> Enum.map(pattern, fn From adcf038652ce5a2f2357478cbdc97a547d424dcb Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 4 Mar 2026 20:42:46 +0000 Subject: [PATCH 11/63] Remove: Add debug.md --- packages/sync-service/debug.md | 146 +++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 packages/sync-service/debug.md diff --git a/packages/sync-service/debug.md b/packages/sync-service/debug.md new file mode 100644 index 0000000000..9c01e17be9 --- /dev/null +++ b/packages/sync-service/debug.md @@ -0,0 +1,146 @@ + +## Creating Minimal Repro Tests from Oracle Property Test Failures + +When the oracle property test fails, it's usually with hundreds of shapes and mutations, making it hard to debug. The goal is to extract a minimal `test_against_oracle` call with simple tables and a handful of mutations. + +### Step 1: Read the error output + +The assertion error tells you which shape failed and what happened: +``` +shape=shape_11: insert for row that already exists: {"l4-5"} +``` + +The error log above it shows the transaction fragment that was being processed when the crash occurred. Look for: +- Which rows changed (`UpdatedRecord`, `NewRecord`, `DeletedRecord`) +- Which columns changed (`changed_columns`) +- Parent table changes (e.g., `level_3.active` toggled) that trigger move-in/move-out +- Child FK changes (e.g., `level_4.level_3_id` changed) — these are sublink changes + + +You may also need to add IO.puts logging to the server and client code to understand the flow of tags and move-in/move-out patterns. + +For the Client-side (`packages/elixir-client`): + +| Location | What to Log | +|----------|-------------| +| `tag_tracker.ex:update_tag_index` (when tags non-empty) | `key`, `headers.operation`, `new_tags`, `removed_tags` | +| `tag_tracker.ex:generate_synthetic_deletes` | `patterns`, `Map.keys(tag_to_keys)`, `Map.keys(key_data)`, generated delete keys | +| `poll.ex:handle_message(MoveOutMessage)` | `state.shape_handle`, `patterns`, `length(synthetic_deletes)` | + + +### Step 2: Identify the bug pattern + +Common patterns from the error output: +- **"insert for row that already exists"** → duplicate INSERT, likely move-in returning a row already in the shape +- **View mismatch (extra rows)** → missing synthetic delete, move-out not working +- **View mismatch (missing rows)** → row incorrectly filtered out or deleted + +Look at what happened in the transaction to infer causation: +- Did a parent row's filter column change (e.g., `active` toggled)? → move-in or move-out triggered +- Did a child row's FK column change? → sublink change, interacts with move-in/move-out logic +- Did both happen in the same transaction? → concurrent move-in + WAL change interaction + +### Step 3: Build the minimal test + +Create a test file like `test/integration/oracle_subquery_repro_test.exs`: + +```elixir +defmodule Electric.Integration.OracleSubqueryReproTest do + use ExUnit.Case, async: false + + import Support.ComponentSetup + import Support.DbSetup + import Support.DbStructureSetup + import Support.IntegrationSetup + import Support.OracleHarness + + @moduletag :oracle + @moduletag timeout: :infinity + @moduletag :tmp_dir + + setup [:with_unique_db, :with_sql_execute] + setup :with_complete_stack + + setup ctx do + ctx = with_electric_client(ctx, router_opts: [long_poll_timeout: 100]) + ctx + end + + @tag with_sql: [ + # 1. Create simple tables (parent + child is usually enough) + "CREATE TABLE parent (id TEXT PRIMARY KEY, active BOOLEAN NOT NULL DEFAULT true)", + "CREATE TABLE child (id TEXT PRIMARY KEY, parent_id TEXT NOT NULL REFERENCES parent(id), value TEXT NOT NULL)", + # 2. Seed the minimal initial state + "INSERT INTO parent (id, active) VALUES ('p1', true), ('p2', false)", + "INSERT INTO child (id, parent_id, value) VALUES ('c1', 'p1', 'v1'), ('c2', 'p2', 'v2')" + ] + test "description of the bug", ctx do + # 3. Define shape(s) — usually one is enough + shapes = [ + %{ + name: "child_shape", + table: "child", + where: "parent_id IN (SELECT id FROM parent WHERE active = true)", + columns: ["id", "parent_id", "value"], + pk: ["id"], + optimized: false + } + ] + + # 4. Define batches — each batch is a list of transactions, + # each transaction is a list of mutations + batches = [ + [ # batch 1 + [ # transaction 1 + %{name: "mut1", sql: "UPDATE parent SET active = false WHERE id = 'p1'"}, + %{name: "mut2", sql: "UPDATE parent SET active = true WHERE id = 'p2'"}, + %{name: "mut3", sql: "UPDATE child SET parent_id = 'p2' WHERE id = 'c1'"} + ] + ] + ] + + test_against_oracle(ctx, shapes, batches) + end +end +``` + +### Step 4: Simplify iteratively + +Run with `mix test --only oracle test/integration/oracle_subquery_repro_test.exs`. + +Reduce until you find the minimal trigger: +- Remove mutations one at a time — does it still fail? +- Remove seed data rows — are all rows needed? +- Simplify the WHERE clause — is the subquery depth needed? +- Try with a single parent change vs. multiple + +For example, the duplicate-insert bug requires: +- A child already in the shape (c1 via active p1) +- Another child NOT in the shape (c2 via inactive p2) — needed so the move-in query has real work to do +- A single transaction that triggers both move-out (deactivate p1) AND move-in (activate p2) AND changes the child's FK (c1 → p2) + +### Tips + +- **Use `with_sql_execute`** for schema + seed data via `@tag with_sql: [...]`. This runs before the stack starts, so Electric sees the tables. +- **Keep tables simple**: parent (id, active) + child (id, parent_id, value) covers most subquery bugs. Only use the full 4-level hierarchy if the bug specifically involves nested subqueries. +- **One shape is usually enough**: the oracle property test runs hundreds, but bugs are per-shape. +- **Put all mutations in one transaction** (one inner list) to test atomic interactions, or split across transactions to test sequential behavior. +- **The `optimized: false` flag** means the shape is expected to potentially get 409'd. Set to `true` if the bug is about a shape that should NOT be invalidated. + +--- + +## Likely Failure Modes to Investigate + +1. **Missing synthetic delete**: Client receives move-out but tag_to_keys doesn't have the matching tag, so no delete is generated. Row stays in materialized view but shouldn't. + +2. **Missing move-out**: Materializer doesn't emit move_out when it should (value count goes to 0 but event is lost). + +3. **Tag mismatch**: Tag computed by `fill_move_tags` on the server doesn't match the tag in the move-out pattern from `make_move_out_control_message`. Could happen if the hashing inputs differ (e.g., NULL handling, different column values). + +4. **Stale tag on client**: Client has tag from initial insert, but an UPDATE changed the tag (via `removed_move_tags`). If the `removed_tags` header was lost or not processed, the old tag lingers and a subsequent move-out won't fully clean up. + +5. **Race between move-in query and WAL changes**: A change arrives via WAL while a move-in query is in flight. `ChangeHandling` should skip it (covered by move-in), but if the logic in `change_will_be_covered_by_move_in?` is wrong, we get duplicates or missing rows. + +6. **Concurrent move-in/move-out**: A value moves out of the inner shape while a move-in query for it is still in flight. The `moved_out_tags` tracking in `MoveIns` should handle this, but edge cases may exist. + +7. **Multi-subquery / OR / NOT interactions**: Shapes with OR + subquery or NOT IN should invalidate (409), not use tags. If `should_invalidate?` logic is wrong, broken tag behavior occurs. From dbb0fa617f2871badd2110554d983031ca93c6af Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 6 Mar 2026 14:25:20 +0000 Subject: [PATCH 12/63] Docs: Add docs --- .../simple-subqueries-original.md | 57 ++++++ packages/sync-service/simple-subqueries.md | 173 ++++++++++++++++++ packages/sync-service/subqueries.md | 167 +++++++++++++++++ 3 files changed, 397 insertions(+) create mode 100644 packages/sync-service/simple-subqueries-original.md create mode 100644 packages/sync-service/simple-subqueries.md create mode 100644 packages/sync-service/subqueries.md diff --git a/packages/sync-service/simple-subqueries-original.md b/packages/sync-service/simple-subqueries-original.md new file mode 100644 index 0000000000..09769c1b82 --- /dev/null +++ b/packages/sync-service/simple-subqueries-original.md @@ -0,0 +1,57 @@ +# Pretend the shape does not have subqueries! + +the simplifying concept is we pretend that the shape doesn't have subqueries and instead the move in query uses parameters instead of subqueries where the parameters are populated by in-memory materialized views of the subqueries. so for a shape like `x IN subquery` we would do the move in query as `x IN $moved_in_values AND NOT x in $current_view` where current view does not include the moved in values. the view does not advance until we get the move in results. + +## Definitions + +virtual view - an imaginary materialized view of a shape's log +subquery view - an in-memory materialization of a subquery at a point in time, a shape will have a subquery view for each of it's direct subqueries. move-ins/outs don't imediately advance the subquery-view (rules below) +shape's log - a change stream a client of the shape consumes + +## Invariants + +### virtual view consistency (vvc) +the virtual view must match a snapshot at that lsn given the shape's where clause substituting in subquery views for the subqueries + +why? +- a missing row in the virtual view may mean you append an update into the log that should have been an insert + +how is the invariant maintained? +- move-ins queries use parameters instead of subqueries using appropriate subquery views: + - the inclusion part of the where clause should use just a moved-in values + - the exclusion part of the where clause should use the current view (which does not include the moved-in values) + - the tags use the subquery view once the move is has been applied +- only one move at a time (they're queued) (you can't move out AND then get move-in results back because the exclusions on the move-in query will be wrong and you could have missing rows) + - move-ins only advance the subquery view when the move-in response is spliced into the log + - move-outs can go into the log imediately and advance the subquery view imediately, providing there isn't a move-in in flight +- tags for rows must be calulated using the subquery views for that time + - logs can calculate them using the subquery view + - move-in queries can get the database to calculate them using the subquery view for the view as it will be once the move-in is applied passed in a a parameter +- for a move-in: + 1) the subquery views (extra_refs) for replication stream filter (lib/electric/shapes/filter.ex) are updated to include the moved in values (for the prototype we'll just have the filter allow all values) + 2) the replication stream is streamed to a file unchanged rather than converted and sent to the shape's log + 3) the move-in query is sent with the appropriate parameters + 4) when the move-in response is received, the point in the buffered replication stream is found (using xmin, xmax, xip_list of the move in) + 5) The shape's log is appended to with: + 1) the rows from the buffered replication stream up to the move-in point, converted (with convert_change) using the subquery view from before the move-in + 2) the move-in response rows + 3) the rows from the buffered replication stream after the move-in point, converted (with convert_change) using the subquery view with the move-in values included + 6) the replication stream can now go back to being immediately converted and sent to the shape's log, however now it's converted using the new subquery view that includes the moved in values + +### operation consistency +- there should be no inserts in the shape's log for rows already in the virtual view +- there should be no updates in the shape's log for rows not in the virtual view +- there should be no deletes in the shape's log for rows not in the virtual view + +how is the invariant maintained? +- vvc +- Shape.convert_change/3 (lib/electric/shapes/shape.ex) using the subquery views for the time of the change + + +## Notes + +this is a very complicated feature! we want well named modules, probably with their own data structures, that have easy to describe responsibilities, ideally just one responsibility, and that can have nice readable tests. Normal good coding practices but I really want to concentrate on getting these right. Ask me any questions you need to. Lets come up with a plan! + +- the buffered log must contain enough for AFTER the move in + - superset of before and after - we don't know this because a move-in might be late! - but we could know if there's a potential move-in and fall back to following + - `condition AND x in subquery` → `condition AND true` diff --git a/packages/sync-service/simple-subqueries.md b/packages/sync-service/simple-subqueries.md new file mode 100644 index 0000000000..d5cc20e010 --- /dev/null +++ b/packages/sync-service/simple-subqueries.md @@ -0,0 +1,173 @@ +# Simplified Subquery Algorithm (Review Draft) + +## Decision Summary + +This proposal is intentionally conservative: + +- Keep current row/tag semantics where possible. +- Replace subquery re-evaluation during streaming with in-memory subquery views. +- Serialize moves per shape with a single queue. +- Prioritize correctness and debuggability over throughput in v1. + +## Why Change + +The current model (see subqueries.md) has hard edge cases around subquery timing, ordering, and log correctness. + +The new model reduces global coordination by making each shape manage its own subquery-view timeline, with explicit transition points. + +## Core Model + +Treat each subquery as a runtime input, not as something continuously re-executed in SQL during replication processing. + +Conceptually: + +```sql +WHERE x IN (SELECT id FROM projects WHERE active = true) +``` + +becomes: + +```sql +WHERE x = ANY($subquery_view) +``` + +where `$subquery_view` is an in-memory materialized set maintained by the shape runtime. + +## Terms + +- **Subquery view**: In-memory materialization of one direct subquery result set. +- **Virtual view**: The row set clients should have after applying shape-log entries up to a point. +- **Move-in**: Values added to a subquery view. +- **Move-out**: Values removed from a subquery view. +- **Shape log**: Ordered stream of row changes and control messages. + +## Correctness Invariants + +### 1) Virtual View Consistency (VVC) + +At any log position, the virtual view must equal a database snapshot at the same logical point, using subquery views substituted into the shape predicate. + +### 2) Operation Consistency + +- No `insert` for rows already in virtual view. +- No `update` for rows not in virtual view. +- No `delete` for rows not in virtual view. + +Enforcement point: `Shape.convert_change/3` with the subquery view that is correct for the change time. + +## Normative Rules + +- A shape must process at most one active move operation at a time. +- A shape must use one shared move queue (not per-subquery queues). +- Each changed value must enqueue its own move operation (no batching). +- Move-out operations must be prioritized ahead of move-in operations when queued. +- While move-in is in flight, raw replication changes must be buffered and not immediately converted. +- Subquery view must not advance for move-in until splice commit. +- Move-out must be deferred until buffered pre-splice data is flushed. +- On buffer/resource breach during move-in, the shape is dropped (lose shape) in v1. + +## Algorithm + +## A) Steady state (no move in flight) + +1. Replication change arrives. +2. Convert via `Shape.convert_change/3` using current subquery views (`extra_refs`). +3. Append converted output to shape log. + +## B) Move-in + +Inputs: + +- `moved_in_values`: newly added values +- `current_view`: subquery view before move-in + +Steps: + +1. **Prepare superset filtering for buffering** + - Ensure buffering does not drop potentially relevant rows. + - v1 may use broad filtering for safety. +2. **Start raw buffering** + - Buffer raw replication changes with metadata sufficient to locate a splice boundary. +3. **Run move-in query** + - Include moved-in values, exclude rows already represented by `current_view`. + - Conceptual predicate: + +```sql +WHERE + AND x = ANY($moved_in_values) + AND NOT x = ANY($current_view) +``` + +4. **Locate splice boundary** + - Use returned snapshot metadata (`xmin`, `xmax`, `xip_list`) to split buffered stream. +5. **Emit in order** + - Pre-boundary buffered changes converted with old view (`current_view`). + - Move-in query rows (same log format as current implementation). + - Post-boundary buffered changes converted with new view (`current_view ∪ moved_in_values`). +6. **Commit view transition** + - Advance subquery view to include `moved_in_values`. + - Resume immediate conversion/log append. + +## C) Move-out + +If no move-in is in flight: + +1. Emit move-out control message (same tag-hash scheme as current implementation). +2. Remove moved-out values from subquery view. + +If move-in is in flight: + +1. Queue move-out. +2. Process after move-in splice completes and buffered pre-splice data is flushed. + +## Compatibility Decisions + +- Move-in splice row format: same as current implementation. +- Composite-key behavior and predicate semantics: same as current implementation. +- Move-out tag hashing: same as current implementation. + +## Nested Subqueries + +Apply recursively through dependency chain: + +- Each shape reasons only about direct subquery views. +- Upstream shape output materializes downstream subquery views. +- Each shape independently follows this queue + splice model. + +## Scope + +### In scope (v1) + +- Correctness-first single-shape queue model. +- Buffered move-in splice flow. +- Explicit subquery-view transitions. + +### Out of scope (v1) + +- Bounded buffering/spill strategy. +- Parallel moves within a shape. +- Aggressive filter minimization during move-in. +- Resumable recovery after buffer breach. + +## Suggested Implementation Boundaries + +- `SubqueryView`: view state and transitions. +- `MoveQueue`: serialization and prioritization. +- `MoveInBuffer`: raw buffering + boundary split. +- `MoveInPlanner`: predicate/parameter construction. +- `MoveSplicer`: ordered emission of pre/move-in/post segments. +- `TagContext`: view-timed tag inputs. + +(Names are placeholders; boundaries are the important part.) + +## Observability (minimum) + +- Move queue depth per shape. +- Move-in latency. +- Buffered change count/bytes. +- Pre/post splice segment sizes. +- Move failures and shape drops. + +## Remaining Open Question + +1. None currently. The algorithm decisions above are intentional for v1. diff --git a/packages/sync-service/subqueries.md b/packages/sync-service/subqueries.md new file mode 100644 index 0000000000..426389f3e5 --- /dev/null +++ b/packages/sync-service/subqueries.md @@ -0,0 +1,167 @@ +# How Subqueries Work + +## Overview + +Subqueries allow shape WHERE clauses to reference rows from other tables. Currently, only +`value IN (SELECT ...)` is supported. Each subquery becomes a **shape dependency** -- a +separate Shape that is tracked independently and whose result set feeds into the parent +shape's WHERE evaluation. + +```sql +-- Single-column subquery +WHERE project_id IN (SELECT id FROM projects WHERE active = true) + +-- Composite-key subquery +WHERE (org_id, team_id) IN (SELECT org_id, team_id FROM memberships WHERE user_id = '1') + +-- Nested subqueries (each level becomes its own dependency) +WHERE project_id IN ( + SELECT id FROM projects WHERE team_id IN ( + SELECT id FROM teams WHERE org_id = '42' + ) +) +``` + +Requires the `allow_subqueries` feature flag. Not allowed in subset WHERE clauses. + +## Parsing + +Entry point: `Shape.validate_where_clause/3` in `lib/electric/shapes/shape.ex`. + +``` +1. Parser.parse_query(where) -- PgQuery parses SQL into AST +2. Parser.extract_subqueries(where) -- Walker finds all PgQuery.SubLink nodes, + returns their inner SELECT statements +3. build_shape_dependencies(...) -- Each subquery SELECT becomes a full Shape + (recursively, so nested subqueries work) +4. build_dependency_refs(...) -- Builds type refs like + ["$sublink", "0"] => {:array, :int8} +5. Parser.validate_where_ast(...) -- Validates and compiles the WHERE expression, + with sublink_queries map for query reconstruction +``` + +### AST representation + +`Parser.node_to_ast/4` (`parser.ex:775-821`) matches `PgQuery.SubLink` nodes and converts +them to a `Func` AST node: + +```elixir +%Func{ + name: "sublink_membership_check", + implementation: &PgInterop.Sublink.member?/2, + type: :bool, + args: [ + testexpr, # Ref or RowExpr of Refs + %Ref{path: ["$sublink", "0"], type: {:array, :int8}} # placeholder for results + ] +} +``` + +### Validation rules + +- **SubLink type**: Only `:ANY_SUBLINK` (i.e. `IN (SELECT ...)`) +- **Operator**: Must be empty (plain `IN`, no `= ANY` etc.) +- **Left side**: Must be a column ref or a row of column refs -- no expressions +- **Type match**: Left-side type must match the subquery return type +- **Inner SELECT**: Must be a simple `SELECT cols FROM table [WHERE ...]` -- no DISTINCT, + GROUP BY, HAVING, WINDOW, ORDER BY, LIMIT, WITH, or locking clauses + +Anything else gets an error like `"only 'value IN (SELECT ...)' sublinks are supported right now"`. + +## Shape struct fields + +```elixir +defstruct [ + ... + shape_dependencies: [], # [Shape.t()] -- one per subquery + shape_dependencies_handles: [], # [String.t()] -- shape handles for each dep + tag_structure: [], # for generating row tags (move-out tracking) + subquery_comparison_expressions: %{}, # sublink path => Expr for comparing values +] +``` + +`shape_dependencies_handles` is populated later (not at parse time) when the dependency +shapes are registered with the shape cache. + +## Runtime evaluation + +### Record filtering (WhereClause) + +`WhereClause.includes_record?/3` (`lib/electric/shapes/where_clause.ex`) evaluates whether +a row matches the shape's WHERE clause: + +```elixir +def includes_record?(where_clause, record, extra_refs) do + with {:ok, refs} <- Runner.record_to_ref_values(where_clause.used_refs, record), + {:ok, evaluated} <- Runner.execute(where_clause, Map.merge(refs, extra_refs)) +``` + +The `extra_refs` map provides subquery results: +- `%{["$sublink", "0"] => [value1, value2, ...]}` for single-column +- `%{["$sublink", "0"] => [{v1, v2}, ...]}` for composite keys + +The `sublink_membership_check` function (`PgInterop.Sublink.member?/2`) does a simple +`Enum.member?/2` or `MapSet.member?/2` against the provided list/set. + +## Dependency layers + +`DependencyLayers` (`lib/electric/shapes/dependency_layers.ex`) ensures shapes are +processed in correct dependency order. Shapes are organized into layers: + +- Layer 0: shapes with no dependencies +- Layer N: shapes whose dependencies are all in layers < N + +When changes arrive, `ShapeLogCollector` publishes events layer-by-layer +(`shape_log_collector.ex:551`), so parent shapes always see updates before their dependents. + +## Move-in / move-out + +When the result set of a dependency shape changes, rows may need to enter or leave the +parent shape. This is handled by `Consumer.MoveHandling` and `Shape.SubqueryMoves`. + +### Move-in + +When new values appear in a dependency (`move_handling.ex:16-68`): + +1. `SubqueryMoves.move_in_where_clause/3` transforms the original WHERE clause by replacing + the subquery with the new values: + - Single column: `IN (SELECT id FROM ...)` becomes `= ANY ($1::text[]::int8[])` + - Composite key: becomes `IN (SELECT * FROM unnest($1::text[]::type1[], $2::text[]::type2[]))` +2. An async query runs against Postgres with this modified WHERE +3. Results are written as a "move-in snapshot" to storage +4. The snapshot is spliced into the main log, filtered against already-seen keys + +### Move-out + +When values disappear from a dependency (`move_handling.ex:74-96`): + +1. `SubqueryMoves.make_move_out_control_message/4` generates a control message with + `event: "move-out"` and a list of tag patterns +2. Each pattern contains a hash: `md5(stack_id <> shape_handle <> namespaced_value)` +3. The control message is appended to the shape log for clients to process + +### Tags + +Tags track *why* a row is in a shape (which dependency value matched). They are computed +both in Postgres (via `make_tags` in `querying.ex:153`) and in Elixir +(`SubqueryMoves.make_value_hash/3`), using the same hashing scheme: + +- Values are namespaced: `"v:" <> value` for non-null, `"NULL"` for null +- Composite keys concatenate `column_name:namespaced_value` parts +- The hash is `md5(stack_id <> shape_handle <> namespaced_parts)` encoded as lowercase hex + +The `tag_structure` field on Shape describes the column layout for tag generation, built by +`SubqueryMoves.move_in_tag_structure/1` which walks the WHERE AST looking for +`sublink_membership_check` nodes. + +## Not supported + +- `EXISTS (SELECT ...)` +- `NOT IN (SELECT ...)` +- Scalar subqueries +- `ANY`/`ALL` with comparison operators +- Subqueries in SELECT list or FROM clause +- Expressions on the left side of `IN` (only plain column refs) +- Subqueries in subset WHERE clauses +- Multiple independent subqueries have partial support (tag structure TODOs note DNF form + is needed, and move-out has a stub guard for single dependencies) From a1c2b4b9d31eb0646d95f68d9c34a490bdd8f7a9 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 9 Mar 2026 12:46:38 +0000 Subject: [PATCH 13/63] Docs: Document LSN-based move-in splice triggers --- packages/sync-service/simple-subqueries.md | 45 ++++++++++++++++++---- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/packages/sync-service/simple-subqueries.md b/packages/sync-service/simple-subqueries.md index d5cc20e010..8a2e3f5375 100644 --- a/packages/sync-service/simple-subqueries.md +++ b/packages/sync-service/simple-subqueries.md @@ -7,6 +7,7 @@ This proposal is intentionally conservative: - Keep current row/tag semantics where possible. - Replace subquery re-evaluation during streaming with in-memory subquery views. - Serialize moves per shape with a single queue. +- Use two splice-boundary signals for move-in (`visible_in_snapshot?` and LSN progress). - Prioritize correctness and debuggability over throughput in v1. ## Why Change @@ -39,6 +40,10 @@ where `$subquery_view` is an in-memory materialized set maintained by the shape - **Virtual view**: The row set clients should have after applying shape-log entries up to a point. - **Move-in**: Values added to a subquery view. - **Move-out**: Values removed from a subquery view. +- **Move-in snapshot**: Snapshot metadata (`xmin`, `xmax`, `xip_list`) returned by the move-in query. +- **Move-in LSN**: `current_lsn` captured by the move-in query. +- **LSN update event**: `%LsnUpdate{lsn}` emitted from replication keepalives. +- **Splice boundary**: The log point where move-in query rows are inserted. - **Shape log**: Ordered stream of row changes and control messages. ## Correctness Invariants @@ -64,6 +69,15 @@ Enforcement point: `Shape.convert_change/3` with the subquery view that is corre - While move-in is in flight, raw replication changes must be buffered and not immediately converted. - Subquery view must not advance for move-in until splice commit. - Move-out must be deferred until buffered pre-splice data is flushed. +- Move-in query must return both move-in snapshot metadata and `current_lsn`. +- Keepalives must publish `%LsnUpdate{lsn}` events alongside `%Relation{}` and `%TransactionFragment{}` events. +- In the prototype, `%LsnUpdate{}` events are broadcast to all consumers. +- A move-in splice boundary must be found by the first satisfied condition: + - first transaction where `Transaction.visible_in_snapshot?(txn, move_in_snapshot) == false` (splice before that transaction), + - or first `%LsnUpdate{lsn}` where `lsn >= move_in_lsn` (splice at current buffered tail). +- Splice commit requires both: boundary found and move-in query results available. +- If boundary is found first, query-result arrival must trigger splice. +- Shape-log writes for move-in splicing must be serialized in the consumer process, even when triggered by non-replication-stream events. - On buffer/resource breach during move-in, the shape is dropped (lose shape) in v1. ## Algorithm @@ -90,6 +104,7 @@ Steps: - Buffer raw replication changes with metadata sufficient to locate a splice boundary. 3. **Run move-in query** - Include moved-in values, exclude rows already represented by `current_view`. + - Capture both move-in snapshot metadata and `current_lsn` in the same query execution. - Conceptual predicate: ```sql @@ -98,13 +113,25 @@ WHERE AND NOT x = ANY($current_view) ``` -4. **Locate splice boundary** - - Use returned snapshot metadata (`xmin`, `xmax`, `xip_list`) to split buffered stream. -5. **Emit in order** - - Pre-boundary buffered changes converted with old view (`current_view`). - - Move-in query rows (same log format as current implementation). - - Post-boundary buffered changes converted with new view (`current_view ∪ moved_in_values`). -6. **Commit view transition** +4. **Watch for splice-boundary signals** + - Keep buffering raw replication changes. + - Track transaction trigger: first transaction where `Transaction.visible_in_snapshot?(txn, move_in_snapshot) == false`. + - Track LSN trigger: first `%LsnUpdate{lsn}` where `lsn >= move_in_lsn`. + - Whichever trigger is satisfied first determines the splice boundary. +5. **Wait for both prerequisites** + - Splice can execute only when both are true: + - splice boundary is known (from transaction trigger or LSN trigger), + - move-in query rows are available. + - This creates 3 valid splice triggers: + 1. A transaction where `Transaction.visible_in_snapshot?(txn, move_in_snapshot) == false`. + 2. An `%LsnUpdate{}` where `lsn >= move_in_lsn`. + 3. Query results arriving after (1) or (2) has already established the boundary. +6. **Emit in order** + - Convert and append pre-boundary buffered changes with old view (`current_view`). + - Append move-in query rows (same log format as current implementation). + - Convert and append post-boundary buffered changes with new view (`current_view ∪ moved_in_values`). + - For an LSN-trigger boundary, pre-boundary is all buffered data at trigger time. +7. **Commit view transition** - Advance subquery view to include `moved_in_values`. - Resume immediate conversion/log append. @@ -140,6 +167,7 @@ Apply recursively through dependency chain: - Correctness-first single-shape queue model. - Buffered move-in splice flow. +- LSN-aware splice boundaries using keepalive-driven `%LsnUpdate{}` events. - Explicit subquery-view transitions. ### Out of scope (v1) @@ -148,6 +176,7 @@ Apply recursively through dependency chain: - Parallel moves within a shape. - Aggressive filter minimization during move-in. - Resumable recovery after buffer breach. +- Shape-targeted routing for `%LsnUpdate{}` (prototype uses broadcast). ## Suggested Implementation Boundaries @@ -155,6 +184,7 @@ Apply recursively through dependency chain: - `MoveQueue`: serialization and prioritization. - `MoveInBuffer`: raw buffering + boundary split. - `MoveInPlanner`: predicate/parameter construction. +- `LsnBoundaryTracker`: tracks `move_in_lsn` vs `%LsnUpdate{}` progress. - `MoveSplicer`: ordered emission of pre/move-in/post segments. - `TagContext`: view-timed tag inputs. @@ -166,6 +196,7 @@ Apply recursively through dependency chain: - Move-in latency. - Buffered change count/bytes. - Pre/post splice segment sizes. +- Splice trigger type counts (transaction vs LSN vs results-arrival). - Move failures and shape drops. ## Remaining Open Question From d065f8992435b06fe65cc9e0ce51a78ee1e318d3 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 9 Mar 2026 13:03:07 +0000 Subject: [PATCH 14/63] Docs: Add prototype-issues.md --- packages/sync-service/prototype-issues.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 packages/sync-service/prototype-issues.md diff --git a/packages/sync-service/prototype-issues.md b/packages/sync-service/prototype-issues.md new file mode 100644 index 0000000000..b365381739 --- /dev/null +++ b/packages/sync-service/prototype-issues.md @@ -0,0 +1,6 @@ +## Excessive message passing and waking of consumers +- %LsnUpdate{} is sent to all consumers +- Filter treats `IN subquery` as `TRUE` + +## Excessive memory usage +- Subquery materialized views are held in consumer memory in addition to be held in memory in the Materializer From b0d0469a4dc19f4ca06f14957296b6d8603ce900 Mon Sep 17 00:00:00 2001 From: rob Date: Tue, 10 Mar 2026 11:17:34 +0000 Subject: [PATCH 15/63] Docs: Add filter algorythm --- .../simple-subqueries-filter-original.md | 46 ++++++++ .../sync-service/simple-subqueries-filter.md | 105 ++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 packages/sync-service/simple-subqueries-filter-original.md create mode 100644 packages/sync-service/simple-subqueries-filter.md diff --git a/packages/sync-service/simple-subqueries-filter-original.md b/packages/sync-service/simple-subqueries-filter-original.md new file mode 100644 index 0000000000..8d3bf43366 --- /dev/null +++ b/packages/sync-service/simple-subqueries-filter-original.md @@ -0,0 +1,46 @@ +## Shapes.Filter + +For subqueries, the Shapes.Filter should support subqueries using a reverse index per subquery. We'd use an ETS table, but conceptually the reverse index can be thought of as a map of values to shape handles: + +%{ + "1" => MapSet.new([handle1, handle2]), + "2" => MapSet.new([handle2, handle3]), + "3" => MapSet.new([handle1]) +} + +Then when a value comes in, we can look up the value in the reverse index and get the set of shape handles that match that value. + +Shapes that have not been indexed (for example if their where clause also has `LIKE` in it) end up in `other_shapes` and are iterated through using `WhereClause.includes_record?/3`.I propose we use the reverse index in this situation too (for simplicity and to avoid holding more in memory). Whereas currently for `x IN subquery` `includes_record?/3` gets all the values in the materialized view of the subquery which we keep in `refs` to see if the the value of `x` from `record` is in that set, we can instead look up the value of `x` in the reverse index and see if our shape handle is in the list. This will mean changing how `WhereClause.includes_record?/3` works, perhaps giving a function to work out subquery inclusion instead of a function for refs, and perhaps the shape handle can be passed in the closure of that function. + +The reverse-index will just provide a set of possible shape handles, the Filter will still need to filter this set for the shape handles relevant for the current WhereCondition since the WhereCondition may be on a branch where the shape could never reach. + + +## Managing the reverse index + +### Consumer independence + +Each consumer has subquery views at differnt times, so the consumer should manage the reverse index for it's subqueries. + +The consumer can add and remove values from the index independently of other shapes: + +```elixir +index = %{ "1" => MapSet.new(["handle1"]) } + +ReverseIndex.add_value(index, "handle2", _value = "1") + +# %{ "1" => MapSet.new([handle1, handle2]) } +``` + +### Move-ins + +While a move-in query is in flight we need to buffer the changes. These changes need to include: +- Relavant changes for the shape BEFORE the move-in +- Relavant changes for the shape AFTER the move-in + +For shapes without negation (e.g. `NOT IN subquery`) the reverse index should be the union of the before and after subquery views, so the consumer should add the moved-in value to the reverse index. For negation you need the intersection of the before and after subquery views, so you remove the move-in value. + +Consistency is maintained even if the consumer gets more changes than it needs since it will filter out ones it doesn't need with Shape.convert_change. The importnant thing is that is doesn't miss changes. + +### Move-outs + +In some scenarios we process move-outs the moment the the move-out message is received by the the consumer, so this could be mid-transaction and the Filter will have already filtered the changes for the rest of the transaction. This is not an issue because the shapes that do not have negation as the changes will already include enough for before and after the move-out, and for shapes with negation the move-out becomes a move-in and can follow move-in semantics. diff --git a/packages/sync-service/simple-subqueries-filter.md b/packages/sync-service/simple-subqueries-filter.md new file mode 100644 index 0000000000..54dcee7019 --- /dev/null +++ b/packages/sync-service/simple-subqueries-filter.md @@ -0,0 +1,105 @@ +## Shapes.Filter — Subquery Support via Reverse Index + +For subqueries, the Shapes.Filter should support subqueries using a reverse +index per subquery. We'd use an ETS table, but conceptually the reverse index +can be thought of as a map of values to shape handles: + +```elixir +%{ + "1" => MapSet.new([handle1, handle2]), + "2" => MapSet.new([handle2, handle3]), + "3" => MapSet.new([handle1]) +} +``` + +When a change arrives with a value for the subquery column, we look up that +value in the reverse index and get the set of shape handles whose subquery +view contains that value. + +### Integration with `WhereClause.includes_record?/3` + +Shapes that cannot be indexed (for example because their `WHERE` clause also +has `LIKE` in it) currently end up in `other_shapes` and are iterated through +using `WhereClause.includes_record?/3`. We should use the reverse index for +subquery evaluation in this path too, for simplicity and to avoid holding more +in memory. + +Currently, for `x IN subquery`, `includes_record?/3` gets all the values from +the materialized view of the subquery (kept in `refs`) and checks whether the +value of `x` from `record` is in that set. Instead, we look up the value of +`x` in the reverse index and check whether our shape handle is in the result +set. + +This means changing the interface of `includes_record?/3`: instead of +passing a `refs` map containing the full subquery value set, we pass a +function that determines subquery inclusion. The shape handle can be captured +in the closure of that function. + +### Candidate filtering + +The reverse index provides a set of _candidate_ shape handles. The Filter +must still verify each candidate against the full `WhereCondition` for that +table, because the `WhereCondition` tree may include non-subquery branches +that rule the shape out. + +## Managing the Reverse Index + +### Consumer independence + +Each consumer has subquery views at different times, so each consumer manages +the reverse index entries for its own shapes independently of other shapes: + +```elixir +index = %{"1" => MapSet.new(["handle1"])} + +ReverseIndex.add_value(index, "handle2", _value = "1") + +# => %{"1" => MapSet.new(["handle1", "handle2"])} +``` + +Because the index is an ETS table, updates by one consumer are immediately +visible to the Filter running in the EventRouter process. + +### Move-ins + +While a move-in query is in flight we buffer changes (see +`simple-subqueries.md`, section B). During buffering the reverse index must be +broad enough to capture changes relevant to _both_ the pre-splice and +post-splice views: + +- **Pre-splice changes** are converted with the old subquery view. +- **Post-splice changes** are converted with the new subquery view. + +The safe strategy depends on whether the shape uses negation: + +- **Shapes without negation** (i.e. shapes that do not use `NOT IN subquery`): + The reverse index should be the _union_ of the before and after subquery + views, so the consumer adds the moved-in value to the reverse index at the + start of the move-in. + +- **Shapes with negation** (i.e. shapes that use `NOT IN subquery`): + The reverse index should be the _intersection_ of the before and after + subquery views, so the consumer removes the moved-in value from the reverse + index at the start of the move-in. + +Consistency is maintained even if the Filter passes through more changes than +strictly necessary: `Shape.convert_change/3` will filter out any that do not +belong, using the correct subquery view for the change's position relative to +the splice boundary. The important invariant is that we never _miss_ a +relevant change. + +### Move-outs + +In some scenarios the consumer processes move-outs the moment the move-out +message is received, which can be mid-transaction. By that point the Filter +has already filtered changes for the remainder of the transaction using the +old reverse index state. This is safe: + +- **Shapes without negation**: the old index already included the moved-out + value, so changes for both before and after the move-out are captured. Any + extra changes are filtered out by `Shape.convert_change/3`. + +- **Shapes with negation**: a move-out from the subquery view means rows that + _were_ excluded now become included — effectively a move-in from the shape's + perspective. This case follows move-in semantics (buffering, splice + boundary, etc.). From 2a040435d324c5985bd1aa62a2eae4de87c1d436 Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 11 Mar 2026 15:13:12 +0000 Subject: [PATCH 16/63] Docs: Add RFC --- .../simple-subqueries-with-dnf-rfc.md | 496 ++++++++++++++++++ 1 file changed, 496 insertions(+) create mode 100644 packages/sync-service/simple-subqueries-with-dnf-rfc.md diff --git a/packages/sync-service/simple-subqueries-with-dnf-rfc.md b/packages/sync-service/simple-subqueries-with-dnf-rfc.md new file mode 100644 index 0000000000..adc9696ae3 --- /dev/null +++ b/packages/sync-service/simple-subqueries-with-dnf-rfc.md @@ -0,0 +1,496 @@ +# RFC: Positive DNF Subqueries on Top of the Simple Splice Model + +Status: draft + +Owner: sync-service prototype + +Related: +- `./simple-subqueries.md` +- `./simple-subqueries-filter.md` +- `https://raw.githubusercontent.com/electric-sql/electric/refs/heads/rob/arbitrary-boolean-expressions-with-subqueries/docs/rfcs/arbitrary-boolean-expressions-with-subqueries.md` + +## Summary + +Extend the current `simple-subqueries.md` model to support multiple positive +subqueries in the same `WHERE` clause, including cases like: + +```sql +WHERE x IN subquery1 OR y IN subquery2 +``` + +The core idea is: + +- keep the current "subquery views + exact splice point" model for move-ins +- normalize the `WHERE` clause to positive DNF +- plan move-ins per affected DNF disjunct +- populate `active_conditions` properly from DNF positions +- use DNF-shaped row tags plus position-aware move broadcasts +- keep `NOT`-with-subquery on the existing 409-on-move path for now + +This is intentionally prototype-first: + +- `Shapes.Filter` may continue to oversend +- move buffers stay in memory +- all move work is serialized per shape +- we optimize for consistency and understandable code, not throughput + +## Goals + +- Support `OR` across direct subqueries. +- Support arbitrary positive boolean expressions over: + - plain row predicates + - `value IN (SELECT ...)` + - row-value `IN (SELECT ...)` +- Populate `active_conditions` correctly for DNF shapes. +- Preserve the current virtual-view and operation-consistency invariants. +- Reuse the existing splice-at-boundary approach from `simple-subqueries.md`. +- Avoid the old `touch_tracker` design. + +## Non-goals + +- Supporting moves for shapes that combine `NOT` and subqueries +- `EXISTS`, scalar subqueries, `ANY`/`ALL`, or subqueries outside `WHERE` +- Fixing `Shapes.Filter` oversend in this RFC +- Disk-backed buffering, resumability, or recovery +- Parallel move processing within a shape + +## Scope + +This RFC covers shapes whose `WHERE` clause can be expressed as a positive DNF: + +```sql +(term AND term AND ...) +OR +(term AND term AND ...) +OR ... +``` + +where each `term` is either: + +- a normal row predicate, or +- a positive `IN (SELECT ...)` subquery predicate + +Shapes that combine `NOT` and subqueries remain on the current unsupported +path: when a subquery move would affect them, they invalidate and clients see a +409/refresh path as they do today. + +## Why DNF + +The current single-subquery model works because one dependency move can be +turned into one precise "what newly entered?" query: + +```sql +x IN moved_in_values AND NOT x IN current_view +``` + +That stops being sufficient for: + +```sql +WHERE x IN subquery1 OR y IN subquery2 +``` + +because a move in `subquery1` should only fetch rows that are newly included by +the `subquery1` side and were not already present via the `subquery2` side. + +DNF gives us the right planning unit: + +- each disjunct is one independent reason a row can be in the shape +- a move only affects the disjuncts that reference that dependency +- move-in queries can be restricted to those disjuncts +- move-out tags can remove only the reason that actually disappeared + +## Core Model + +### 1. Subquery views remain the source of truth + +Each shape keeps an in-memory materialized view for each direct subquery: + +```elixir +%{ + ["$sublink", "0"] => MapSet.new([...]), + ["$sublink", "1"] => MapSet.new([...]) +} +``` + +Replication-stream changes are always converted using the full subquery-view map +that is correct for that point in the log. + +### 2. The shape keeps one global move queue + +We keep the existing "one move at a time" rule, but now it applies across all +direct dependencies for the shape: + +- one active move operation per shape +- move-ins and move-outs from all dependencies share the same queue +- move-outs are not processed concurrently with an in-flight move-in + +This remains the simplest way to preserve view-timed conversion. + +### 3. Move-ins still splice exact query results into the log + +We keep the current buffering model: + +1. start buffering raw outer-table transactions +2. run a move-in query in a repeatable-read snapshot +3. capture snapshot metadata and `current_lsn` +4. find the splice boundary +5. write: + 1. pre-boundary buffered changes with the old subquery views + 2. move-in query rows + 3. post-boundary buffered changes with the new subquery views + +This removes the need for `touch_tracker`: stream/query ordering is now handled +by the splice boundary itself. + +### 4. DNF shapes carry tags and `active_conditions` + +For single-subquery shapes, the existing tag model is already enough. + +For DNF shapes, rows need: + +- one tag per disjunct +- one `active_conditions` entry per DNF position + +`active_conditions` is already part of the protocol today, but the current +single-disjunct implementation always emits `[true]`. This RFC makes it real: + +- row messages carry the actual truth value for each DNF position +- move broadcasts update subquery-backed positions for rows already on the + client +- clients re-evaluate inclusion from `tags` and `active_conditions` + +Move-ins are still handled server-side by query+splice for newly visible rows. +The broadcasts are needed so rows that were already present for one reason can +learn that another reason became true or false. + +## DNF Compilation + +We can reuse `Electric.Replication.Eval.Decomposer` as the basis, but for this +RFC we only accept decompositions whose literals are all positive. + +For a shape we compile and keep: + +- `disjuncts`: list of conjunctions +- `subexpressions`: metadata for each DNF position +- `position_count` +- `dependency_positions`: direct dependency handle -> positions +- `dependency_disjuncts`: direct dependency handle -> disjunct indexes + +Each position records: + +- the base AST +- whether it is a subquery position +- which direct dependency it belongs to, if any +- how to generate SQL for it +- how to generate the `active_conditions` value for it +- how to generate the tag slot for it + +Example: + +```sql +WHERE (x IN sq1 AND status = 'open') + OR (y IN sq2) +``` + +becomes two disjuncts: + +- `d0 = [x IN sq1, status = 'open']` +- `d1 = [y IN sq2]` + +If `sq1` changes, only `d0` is impacted. + +## Move-in Planning + +For a move-in on dependency `D` with values `V`: + +- `V` is the delta for this move, i.e. the values that were not in `D`'s + current view and will be present after this move is spliced + +1. identify the impacted disjuncts: every disjunct that mentions `D` +2. build the candidate move-in predicate: + - only the impacted disjuncts + - positions that belong to `D` are replaced with membership against `V` + - other subquery positions use the current view map +3. build the exclusion predicate from the disjuncts that could already have + been true before the move: + - unaffected disjuncts are included as-is, with subquery positions using the + current view map + - impacted disjuncts are omitted from the exclusion predicate when they + contain a triggering position replaced by `V`, because `V` is disjoint from + the current view for that position and those old disjunct instances are + therefore impossible +4. query: + +```sql +WHERE () AND NOT () +``` + +This gives the rows that become newly visible because of this move, while +excluding rows already present via some other disjunct. + +`move_in_values` is therefore not extra state beyond the move itself; it is the +thing that makes the query narrow to "rows newly relevant because of this +delta", rather than querying against the whole post-move view of the +dependency. + +Because move-in queries are parameterized only by: + +- `move_in_values` for the triggering dependency, and +- the in-memory subquery views for all direct dependencies at the start of the + move, + +the query does not depend on live subqueries while it is in flight. Subsequent +moves are queued, do not overlap, and do not advance the shape's subquery views +until their own splice point. + +In addition to the move-in query, the shape emits position-aware `move-in` +broadcasts for the triggering dependency values. These broadcasts are how +clients update `active_conditions` for rows that were already in the shape via +another disjunct and therefore are excluded from the move-in query. + +### Example + +```sql +WHERE x IN sq1 OR y IN sq2 +``` + +If `sq1` moves in `a`, the move-in query becomes conceptually: + +```sql +WHERE x = ANY($moved_in_sq1_values) + AND NOT (y = ANY($sq2_current_view)) +``` + +If the shape is: + +```sql +WHERE (x IN sq1 AND status = 'open') OR y IN sq2 +``` + +then a move in `sq1` becomes: + +```sql +WHERE x = ANY($moved_in_sq1_values) + AND status = 'open' + AND NOT (y = ANY($sq2_current_view)) +``` + +The `sq1_current_view` branch drops out because `moved_in_sq1_values` is, by +definition, disjoint from `sq1_current_view`. + +In practice we should generate this from compiled DNF metadata, not by string +replacement on the original SQL. + +## Move-in Runtime + +The move-in state machine stays the same structurally, but its state must now +carry a full view map rather than one subquery view: + +- `views_before_move` +- `views_after_move` +- `trigger_dependency_handle` +- `trigger_positions` +- `move_in_values` +- buffered transactions +- snapshot metadata +- move-in rows +- move-in LSN +- splice boundary + +Steady-state conversion becomes: + +```elixir +Shape.convert_change(shape, change, + stack_id: stack_id, + shape_handle: shape_handle, + extra_refs: {views, views} +) +``` + +During a splice: + +- pre-boundary buffered changes use `views_before_move` +- query rows are already computed for the move +- post-boundary buffered changes use `views_after_move` + +## Tags And `active_conditions` + +Each row message carries: + +- `tags`: one tag per disjunct +- `active_conditions`: one boolean per DNF position + +`active_conditions[position]` is the truth value of that position for the row +at the time the row message is emitted. + +- for normal row predicates, it is computed directly from the row +- for subquery predicates in the initial snapshot, it is computed by SQL using + parameters for the current in-memory subquery views, not live subqueries +- for replication-stream changes, it is computed against the correct in-memory + subquery views for that log point +- for move-in query rows, it is computed against `views_after_move` + +Each tag has one stable slot per DNF position. A participating position gets a +non-empty segment; a non-participating position gets an empty segment. + +For subquery positions, the segment contains the existing hashed value used for +move matching. + +For non-subquery positions, the segment is a fixed non-empty sentinel such as +`"1"`. Only emptiness matters for those positions; they are never targeted by +move broadcasts. + +Example: + +```sql +WHERE (x IN sq1 AND status = 'open') OR (y IN sq2) +``` + +Possible row metadata: + +```json +{ + "tags": ["hash(x)/1/", "//hash(y)"], + "active_conditions": [true, true, false] +} +``` + +Here: + +- position 0 is `x IN sq1` +- position 1 is `status = 'open'` +- position 2 is `y IN sq2` + +If a disjunct has multiple subquery positions, its tag still has one slot per +DNF position, for example: + +```sql +WHERE (x IN sq1 AND z IN sq2) OR (y IN sq3) +``` + +could produce: + +```text +["hash(x)/hash(z)/", "//hash(y)"] +``` + +Clients evaluate inclusion as: + +1. for each tag, take the positions with non-empty segments +2. AND the corresponding `active_conditions` +3. OR the per-tag results + +This is why `active_conditions` must be correct even for rows that were already +present before a move. + +## Move Broadcasts + +Move broadcasts are position-aware and operate on the existing tag hashes. + +### Move-in broadcast + +When values move into dependency `D`, the shape emits `move-in` broadcasts for +the affected DNF positions and values. + +Clients use these to set the corresponding `active_conditions[position] = true` +for already-present rows whose tag has a matching value at that position. + +The move-in query still runs, but only for rows that are newly visible: + +- rows already present via another disjunct are excluded by + `NOT ()` +- those rows still need the `move-in` broadcast so their + `active_conditions` become accurate + +### Move-out broadcast + +When values move out of dependency `D`, the shape emits `move-out` broadcasts +for the affected DNF positions and values. + +Clients use these to set `active_conditions[position] = false` for matching +rows and then re-evaluate inclusion. + +## Move-out Handling + +When values move out of dependency `D`: + +1. identify the DNF positions for `D` +2. emit move-out patterns for those positions and values +3. remove those values from the in-memory subquery view after the operation is + logically applied + +If a move-in is already in flight, the move-out stays queued until that move-in +has been spliced. This is the same serialization rule as in +`simple-subqueries.md`, now applied across all direct dependencies of the +shape. + +Rows that still have another disjunct tag stay in the shape. + +Rows that lose their last tag are removed. + +This is also why move-ins need broadcasts as well as query rows: `x IN sq1 OR y +IN sq2` must not leave stale `active_conditions` on rows that were already +present when `sq1` becomes true. + +## Filter Behaviour + +`Shapes.Filter` is allowed to stay conservative. + +For this prototype: + +- the filter may continue to route all subquery shapes for a table +- reverse-index work is optional optimization, not part of correctness +- while a move-in is buffering, it is acceptable to admit a broad superset of + root-table changes + +Correctness is enforced later by `Shape.convert_change/3` with the appropriate +view map for the change's position relative to the splice boundary, plus +position-aware move broadcasts for client-side `active_conditions`. + +## Nested Subqueries + +Nested subqueries still work recursively: + +- each shape only reasons about its direct subqueries +- upstream shapes materialize the values for downstream shapes +- DNF compilation and move planning happen independently at each level + +## Prototype Simplifications + +These are intentional for now: + +- keep one global move queue per shape +- keep move buffers in memory +- allow filter oversend +- keep `%LsnUpdate{}` broadcast broad if that is easiest +- use the DNF path whenever considering subqueries, including the current + single-subquery case +- continue to 409/invalidate on moves for shapes that combine `NOT` and + subqueries + +## Suggested Implementation Shape + +- `DnfPlan` or similar compiled metadata held alongside the shape, for example + in consumer state or another per-shape runtime structure +- `SubqueryViews` map keyed by subquery ref +- one `MoveQueue` per shape across all dependencies +- `MovePlanner`: + - impacted disjunct lookup + - predicate generation + - tag generation metadata + - position-aware move-in and move-out broadcast metadata +- `Buffering`: + - before/after view maps + - snapshot and LSN tracking + - buffered transactions +- `SpliceRow`: + - replace the assumption that query rows are only inserts if needed later +- `Materializer`: + - multiple tags per row + - position-aware move-out matching +- `Shape.convert_change/3` and snapshot query generation: + - compute real `active_conditions` from DNF positions instead of all-`true` + +## Open Decisions To Confirm + +None currently. From 2bfc7d145c7c591c62904794986378440fd32601 Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 11 Mar 2026 17:15:52 +0000 Subject: [PATCH 17/63] Docs: Add to prototype-issues.md --- packages/sync-service/prototype-issues.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/sync-service/prototype-issues.md b/packages/sync-service/prototype-issues.md index b365381739..7863c8f269 100644 --- a/packages/sync-service/prototype-issues.md +++ b/packages/sync-service/prototype-issues.md @@ -4,3 +4,5 @@ ## Excessive memory usage - Subquery materialized views are held in consumer memory in addition to be held in memory in the Materializer +- The log buffer is held in memory while the move-query is in flight but could get very large and so should be on disk +- the before and after views are held in consumer memory From 59a2c82e17b2fc9152b99718df8ef330bd6649f8 Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 11 Mar 2026 17:17:06 +0000 Subject: [PATCH 18/63] Docs: Update RFC --- .../simple-subqueries-with-dnf-rfc.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/packages/sync-service/simple-subqueries-with-dnf-rfc.md b/packages/sync-service/simple-subqueries-with-dnf-rfc.md index adc9696ae3..db74f5d774 100644 --- a/packages/sync-service/simple-subqueries-with-dnf-rfc.md +++ b/packages/sync-service/simple-subqueries-with-dnf-rfc.md @@ -97,7 +97,7 @@ DNF gives us the right planning unit: - each disjunct is one independent reason a row can be in the shape - a move only affects the disjuncts that reference that dependency - move-in queries can be restricted to those disjuncts -- move-out tags can remove only the reason that actually disappeared +- move broadcasts can flip only the condition positions that actually changed ## Core Model @@ -324,8 +324,8 @@ Each row message carries: at the time the row message is emitted. - for normal row predicates, it is computed directly from the row -- for subquery predicates in the initial snapshot, it is computed by SQL using - parameters for the current in-memory subquery views, not live subqueries +- for subquery predicates in the initial snapshot, it is computed by SQL on the + existing snapshot query path - for replication-stream changes, it is computed against the correct in-memory subquery views for that log point - for move-in query rows, it is computed against `views_after_move` @@ -424,9 +424,14 @@ has been spliced. This is the same serialization rule as in `simple-subqueries.md`, now applied across all direct dependencies of the shape. -Rows that still have another disjunct tag stay in the shape. +Tags stay on the row; they describe which disjunct positions the row can +participate in. Subquery moves change `active_conditions`, not the tags +themselves. -Rows that lose their last tag are removed. +Rows stay in the shape while at least one tag still evaluates to true against +the current `active_conditions`. + +Rows leave the shape when no tag evaluates to true anymore. This is also why move-ins need broadcasts as well as query rows: `x IN sq1 OR y IN sq2` must not leave stale `active_conditions` on rows that were already From 2c5b00a0b61ce41f8fed06fcd27cb275ea03b963 Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 11 Mar 2026 17:33:00 +0000 Subject: [PATCH 19/63] Docs: Add implimentation plan --- .../simple-subqueries-with-dnf-plan.md | 424 ++++++++++++++++++ 1 file changed, 424 insertions(+) create mode 100644 packages/sync-service/simple-subqueries-with-dnf-plan.md diff --git a/packages/sync-service/simple-subqueries-with-dnf-plan.md b/packages/sync-service/simple-subqueries-with-dnf-plan.md new file mode 100644 index 0000000000..c38486a42f --- /dev/null +++ b/packages/sync-service/simple-subqueries-with-dnf-plan.md @@ -0,0 +1,424 @@ +# Implementation Plan: Simple Subqueries With DNF + +Related: +- `./simple-subqueries-with-dnf-rfc.md` +- `./simple-subqueries.md` + +Existing assets we should reuse: +- `Electric.Replication.Eval.Decomposer` +- `Electric.Replication.Eval.SqlGenerator` + +## Goal + +Implement the RFC in staged slices, keeping the current splice-at-boundary +model, using DNF whenever subqueries are involved, and preserving the current +`NOT` + subquery invalidation / 409-on-move behaviour. + +## Ground Rules + +- Do not add DNF metadata to `Shape`. +- Keep `Shapes.Filter` conservative for now. +- Keep buffering in memory. +- Keep one global move queue per shape. +- For move-in queries, do not use live subqueries in the SQL. Use parameters + derived from the current in-memory subquery views. + +## High-Level Shape + +The implementation breaks into five runtime concerns: + +1. compile a DNF sidecar plan from a shape +2. compute row metadata from that plan: + - inclusion + - tags + - `active_conditions` +3. generate parameterized SQL for: + - move-in queries +4. generalize the subquery consumer state machine from one dependency to N +5. teach the materializer to handle: + - multiple tags per row + - real `active_conditions` + - `move-in` and `move-out` broadcasts + +## Stage 1: DNF Sidecar Plan + +Create a sidecar runtime plan, for example `Electric.Shapes.DnfPlan` or +`Electric.Shapes.Consumer.DnfPlan`, built from: + +- `shape.where.eval` +- `shape.shape_dependencies` +- `shape.shape_dependencies_handles` +- `shape.subquery_comparison_expressions` + +The plan should contain at least: + +- `disjuncts` +- `position_count` +- `positions` +- `dependency_positions` +- `dependency_disjuncts` +- per-position SQL AST / SQL text +- per-position tag metadata + +Each position should know: + +- whether it is a subquery position +- which dependency handle it belongs to, if any +- how to evaluate its boolean value for `active_conditions` +- how to produce its tag slot + +Important validation: + +- if the decomposition contains any negated subquery position, mark the plan as + "unsupported on move" and keep the current invalidation semantics +- do not reject the shape at parse time; this is the existing 409-on-move path + +Suggested tests: + +- single-subquery shape +- `x IN sq1 OR y IN sq2` +- `(x IN sq1 AND status = 'open') OR y IN sq2` +- `x IN sq1 AND y IN sq2` +- composite-key subquery positions +- nested subqueries still compile level by level + +## Stage 2: Row Metadata Projection + +Replace the current "all tags true => `active_conditions = [true, ...]`" logic +with a plan-driven projection step. + +Current code path to replace: + +- `Shape.convert_change/3` +- `Shape.fill_move_tags/4` +- `Shape.make_active_conditions/1` + +Implement a helper that, given: + +- a `DnfPlan` +- a row +- a view map for direct subqueries + +computes: + +- `active_conditions` +- whether the row is included +- `tags` for the currently satisfied disjuncts + +For updates, it must compute both old and new row metadata so we can derive: + +- old inclusion vs new inclusion +- `removed_tags` + +Important design point: + +- `tags` are structural row metadata +- subquery moves change `active_conditions`, not the tags themselves +- tags only change when row contents change or the set of satisfied disjuncts + changes because of the row itself + +Implementation direction: + +- keep the no-subquery fast path for shapes without dependencies +- for shapes with dependencies, derive inclusion from the DNF projection rather + than calling `WhereClause.includes_record?/3` separately and then filling tags + afterwards + +Suggested tests: + +- insert/update/delete conversion across multiple disjuncts +- update that changes which disjuncts are satisfied +- correct `removed_tags` +- correct `active_conditions` for row-only predicates and subquery predicates +- single-subquery regression + +## Stage 3: Parameterized SQL Generation + +Generalize `Electric.Shapes.Querying` so snapshot and move-in queries are built +from the DNF sidecar plan instead of the current `shape.where.query` string. + +Current code paths to replace or split: + +- `Querying.stream_initial_data/5` +- `Querying.query_move_in/5` +- `Querying.json_like_select/4` +- `Querying.make_tags/3` + +### 3A. Initial snapshot + +For subquery shapes, initial snapshot queries should: + +- stay on the existing live-subquery path +- compute row inclusion in SQL +- compute `active_conditions` in SQL +- compute `tags` in SQL + +This likely needs a new query builder returning: + +- SQL string +- params list + +only if the existing snapshot JSON builder cannot be extended cleanly. + +### 3B. Move-in query + +Build move-in SQL from the DNF plan: + +- candidate predicate from impacted disjuncts, with triggering positions + replaced by `move_in_values` +- exclusion predicate from only the disjuncts that could already have been true +- `active_conditions` computed against `views_after_move` +- `tags` computed for the inserted rows + +The move-in query should return rows ready to append to the log with correct +headers, not partially interpreted state. + +### 3C. Move broadcasts + +Add control-message builders for: + +- `move-in` +- `move-out` + +using: + +- DNF position indexes +- hashed subquery values + +Suggested tests: + +- generated SQL / params for `x IN sq1 OR y IN sq2` +- generated SQL / params for `(x IN sq1 AND status='open') OR y IN sq2` +- broadcast payloads for single-column and composite-key subquery positions + +## Stage 4: Generalize Subquery Runtime + +Rework the current single-dependency subquery state machine into an N-dependency +runtime. + +Current code paths to generalize: + +- `lib/electric/shapes/consumer/state.ex` +- `lib/electric/shapes/consumer.ex` +- `lib/electric/shapes/consumer/subqueries.ex` +- `lib/electric/shapes/consumer/subqueries/steady.ex` +- `lib/electric/shapes/consumer/subqueries/buffering.ex` + +### 4A. Initialization + +Today `initialize_subquery_runtime/1` only handles: + +- exactly one dependency +- no OR-with-subquery +- no NOT-with-subquery + +Change this to: + +- initialize DNF runtime for any shape with subqueries +- wait for all dependency materializers +- fetch all current link-value views +- keep the existing invalidation path only for `NOT` + subquery shapes + +### 4B. Runtime state + +Replace single-dependency fields like: + +- `dependency_handle` +- `subquery_ref` +- `subquery_view` + +with: + +- `views` +- `dependency_handle_to_ref` +- `dnf_plan` +- trigger-specific move fields in buffering state + +Queue items should become something like: + +- `{:move_in, dependency_handle, values}` +- `{:move_out, dependency_handle, values}` + +### 4C. Splice flow + +Keep the current splice machinery, but operate on full view maps: + +- pre-boundary buffered txns use `views_before_move` +- move-in control messages are appended to the outer shape's log at the splice + point +- move-in query rows follow with `views_after_move` +- post-boundary buffered txns use `views_after_move` + +The exact ordering at the splice point should be: + +1. pre-boundary buffered transactions +2. move-in control messages for already-present rows +3. move-in query rows for newly visible rows +4. post-boundary buffered transactions + +That ordering matches the `active_conditions` semantics: + +- existing rows learn the new true positions at the boundary +- newly visible rows arrive with already-correct metadata + +### 4D. Remove invalidation cases + +Once the DNF runtime is in place, remove the current invalidation for: + +- OR + subquery +- multiple sibling subqueries + +Keep invalidation for: + +- `NOT` + subquery + +Suggested tests: + +- state machine for multiple dependencies +- serialized move queue across dependencies +- OR case no longer invalidates +- `NOT` + subquery still invalidates on move +- single-subquery regression + +## Stage 5: Materializer Upgrade + +This is the most important supporting change, because the current materializer +assumes: + +- one move tag per row +- `move-out` means remove the row +- `pos` is ignored +- `active_conditions` are not parsed or stored + +Current code paths: + +- `lib/electric/shapes/consumer/materializer.ex` + +### 5A. Stored row state + +Change the materializer's row index to retain enough metadata to re-evaluate +inclusion when broadcasts arrive: + +- row value +- tags +- `active_conditions` +- whether the row is currently included + +### 5B. Tag index + +Replace the current naive tag index with a position-aware index, for example: + +- `{position, hash} -> MapSet` + +This allows `move-in` and `move-out` broadcasts to target only the rows whose +tag contains the matching subquery value at that position. + +### 5C. Broadcast handling + +Add `move-in` event support to the decoder and runtime. + +On broadcast: + +- look up matching keys by `(position, hash)` +- flip the relevant `active_conditions[position]` +- re-evaluate row inclusion from `tags` + `active_conditions` +- only emit materializer `move_in` / `move_out` value events when the row's + inclusion actually changes + +This is what makes `x IN sq1 OR y IN sq2` work: + +- move-in on `sq1` can activate an already-present row without reinserting it +- move-out on `sq1` does not remove the row if the `sq2` tag still evaluates + true + +Suggested tests: + +- multiple tags per row +- `move-in` broadcast on already-present row +- `move-out` broadcast that does not remove the row because another disjunct + still holds +- `move-out` broadcast that does remove the row because no disjunct remains true +- composite-key tag indexing + +## Stage 6: Consumer / Log Integration + +Wire the new pieces through the consumer and log-writing path. + +Current code paths: + +- `lib/electric/log_items.ex` +- `lib/electric/shapes/consumer.ex` + +Work items: + +- make sure `active_conditions` are preserved on all row operations +- add `move-in` control message append path +- keep `move-out` control messages, but now interpret them as position flips, + not tag deletion +- make sure new control messages flow through both: + - storage-backed materializer replay + - live `new_changes` notifications + +Suggested tests: + +- log item encoding includes real `active_conditions` +- materializer replay from stored log sees `move-in` and `move-out` +- no protocol regression for existing clients + +## Stage 7: End-to-End Test Matrix + +Add higher-level coverage once the pieces exist. + +### Core scenarios + +- `x IN sq1 OR y IN sq2` +- `(x IN sq1 AND status = 'open') OR y IN sq2` +- `x IN sq1 AND y IN sq2` +- row already present via one disjunct, then another disjunct moves in +- row loses one active position but remains via another disjunct +- row loses its last active reason and leaves the shape + +### Regressions + +- current single-subquery move-in path +- current single-subquery move-out path +- composite-key subqueries +- nested subqueries +- `NOT` + subquery still invalidates / 409s on move + +### Suggested test locations + +- `test/electric/replication/eval/` for DNF plan / SQL generation +- `test/electric/plug/router_test.exs` for a router-level integration test of + `x IN subquery1 OR y IN subquery2` covering: + - initial snapshot + - move-in that adds rows via one side + - move-in on one side for rows already present via the other side + - move-out that removes one reason but keeps the row + - move-out that removes the last remaining reason +- `test/electric/shapes/consumer/subqueries_test.exs` +- `test/electric/shapes/consumer_test.exs` +- `test/electric/shapes/querying_test.exs` +- `test/electric/shapes/shape_test.exs` +- `test/electric/shape_cache_test.exs` + +## Recommended Landing Order + +1. DNF sidecar plan + unit tests +2. row metadata projection (`tags` + real `active_conditions`) +3. parameterized snapshot SQL and move-in SQL generation +4. materializer support for multiple tags and move broadcasts +5. generalized consumer subquery runtime +6. remove OR / multiple-subquery invalidation +7. end-to-end regressions + +This order keeps the early work local and testable, and delays the invasive +consumer-state change until the metadata, SQL, and materializer semantics are +already nailed down. + +## Prototype Notes To Keep + +- broad `Shapes.Filter` routing is acceptable +- broad `%LsnUpdate{}` broadcast is acceptable +- in-memory buffering is acceptable +- performance is secondary to getting the semantics readable and consistent From 5c76383aeab1860e9de362457450a258c4327839 Mon Sep 17 00:00:00 2001 From: rob Date: Sat, 14 Mar 2026 11:18:57 +0000 Subject: [PATCH 20/63] Docs: Add negation plan --- packages/sync-service/negation.md | 300 ++++++++++++++++++++++++++++++ 1 file changed, 300 insertions(+) create mode 100644 packages/sync-service/negation.md diff --git a/packages/sync-service/negation.md b/packages/sync-service/negation.md new file mode 100644 index 0000000000..ef89b2707a --- /dev/null +++ b/packages/sync-service/negation.md @@ -0,0 +1,300 @@ +# Plan: Negated Subquery Moves + +## Goal + +Support negated subquery predicates in the DNF runtime, including: + +- `x NOT IN (SELECT ...)` +- `NOT (x = 7 OR y IN (SELECT ...))` +- mixed positive/negative DNF shapes where the outer shape should no longer + fall back to 409-on-move invalidation + +The key requirement is that negation must work with the existing move +broadcasts, move-in splice flow, and DNF `active_conditions` model. + +## Current State + +Most of the DNF work is already in place: + +- `Electric.Shapes.DnfPlan` already preserves per-position negation metadata. +- `Shape.convert_change/3` already uses `DnfPlan.project_row/6`, which can + evaluate negated positions from a concrete subquery view map. +- the materializer already handles position-aware `move-in` / `move-out` + broadcasts by flipping `active_conditions[pos]`. + +The remaining blocker is that the runtime still treats "dependency move +direction" and "outer shape effect" as the same thing. That is true for +positive subqueries, but false for negated ones: + +- positive subquery: + - dependency move-in -> outer move-in + - dependency move-out -> outer move-out +- negated subquery: + - dependency move-in -> outer move-out + - dependency move-out -> outer move-in + +That mismatch currently shows up in three places: + +- `lib/electric/shapes/consumer.ex` + - `initialize_subquery_runtime/1` still skips runtime setup for + `%DnfPlan{has_negated_subquery: true}` +- `lib/electric/shapes/querying.ex` + - `dnf_plan_for_metadata/2` still suppresses DNF metadata for negated plans +- `lib/electric/shapes/consumer/state.ex` + - `not_with_subquery?` is still computed even though the DNF runtime should + own this now + +## Design Direction + +Keep `views` as the actual dependency view, not the outer shape's +"allowed-values" view. + +That matters because: + +- `DnfPlan.project_row/6` expects actual dependency results in `extra_refs` +- move-in query rows must be returned with `active_conditions` computed against + the actual post-move dependency view +- streamed transactions before/after the splice boundary must keep using the + real before/after dependency views + +So the runtime needs two separate concepts: + +1. dependency delta + - did the dependency view add a value or remove a value? +2. outer effect + - does that delta make outer rows move in or move out? + +For positive subqueries those are aligned. For negated subqueries they invert. + +## Work Plan + +### 1. Remove the Negation Guards + +Delete the remaining "negation means unsupported" branches: + +- remove `State.not_with_subquery?` and `has_not_with_subquery?/1` +- stop treating `%DnfPlan{has_negated_subquery: true}` as a reason to leave + `subquery_state` unset in `lib/electric/shapes/consumer.ex` +- stop rejecting negated DNF plans in `lib/electric/shapes/querying.ex` + +At that point, negated shapes will enter the same runtime as positive ones. + +`Shape.convert_change/3` should not need special negation work beyond this, +because it already goes through `DnfPlan.project_row/6`. + +### 2. Teach `DnfPlan` About Dependency Polarity + +The runtime needs dependency-level polarity metadata, not just a boolean +`has_negated_subquery`. + +Add plan helpers such as: + +- dependency polarity per dependency index +- `effect_for_dependency_delta(plan, dep_index, :view_add | :view_remove)` + returning `:move_in | :move_out` + +Also add an explicit validation or test that all positions for one dependency +share the same negation semantics. That should hold today because one +dependency handle represents one subquery occurrence, but it is worth pinning +down. If it does not hold, the mapping has to become position-based rather +than dependency-based. + +`has_negated_subquery` can then either be removed or kept as informational +metadata only. It should stop driving runtime feature gating. + +### 3. Split Queue Semantics Into View Delta vs Outer Effect + +This is the main consumer change. + +Today `MoveQueue`, `Subqueries.drain_queue/2`, and `Buffering.from_steady/5` +all assume: + +- `move_in` means "add values to the dependency view and run a move-in query" +- `move_out` means "remove values from the dependency view and emit a move-out broadcast" + +That only works for positive subqueries. + +Refactor the queue/runtime state to track actual dependency deltas explicitly, +for example: + +- `:view_add` +- `:view_remove` + +Then derive the outer effect from plan polarity when draining: + +- positive dep: + - `:view_add` -> outer `move-in` + - `:view_remove` -> outer `move-out` +- negated dep: + - `:view_add` -> outer `move-out` + - `:view_remove` -> outer `move-in` + +Important detail: redundancy elimination in `MoveQueue` should keep operating +against the actual dependency view, not the outer effect. Otherwise a negated +dependency move-out would be incorrectly treated as a redundant `move-in` +because the value is still present in the pre-move dependency view. + +### 4. Generalize Buffering State for Negated Move-Ins + +Buffering currently assumes "outer move-in" implies "dependency view add". + +That needs to change. Store the trigger as actual delta metadata, not as +`move_in_values` alone, for example: + +- trigger dependency index +- trigger delta kind (`:view_add` or `:view_remove`) +- trigger delta values +- `views_before_move` +- `views_after_move` + +For negated subqueries: + +- dependency `view_add` should become an immediate outer `move-out` + - update `views` by adding the value + - emit a `move-out` broadcast + - no query +- dependency `view_remove` should become a buffered outer `move-in` + - `views_before_move` contains the value + - `views_after_move` removes the value + - the move-in query and post-splice changes use `views_after_move` + +This is the place where we "convert move-ins to move-outs and vice versa" in +the consumer, but without losing track of the real dependency-view transition. + +### 5. Make Move-In Query Generation Delta-Aware + +`DnfPlan.move_in_where_clause/5` is still positive-subquery-shaped. + +For negated outer move-ins, the trigger comes from a dependency removal, and +the candidate rows are those matching the removed values, not rows satisfying +`NOT membership` over the removed-value set. + +So the move query builder should accept: + +- dependency index +- actual delta kind +- delta values +- `views_before_move` +- `views_after_move` if needed for active-condition SQL + +The trigger-position rule should be: + +- when a dependency delta causes an outer move-in, the trigger position is + replaced by membership against the delta values themselves +- the negated/non-negated meaning is still applied when computing + `active_conditions` for the returned rows against the post-move view + +Concretely: + +- positive subquery + dependency add: + - candidate uses `x IN moved_in_values` +- negated subquery + dependency remove: + - candidate also uses `x IN removed_values` + +The difference is not in candidate matching; it is in which dependency delta +produces an outer move-in, and in which post-move view is used for the +returned metadata. + +### 6. Keep Broadcast Semantics, but Emit the Right One + +The materializer already knows how to interpret: + +- `move-in` as `active_conditions[pos] = true` +- `move-out` as `active_conditions[pos] = false` + +That means negation support should mostly be achieved by making the consumer +emit the correct broadcast for the outer effect: + +- negated dependency add -> emit outer `move-out` +- negated dependency remove -> emit outer `move-in` + +No new negation-specific materializer protocol is needed. + +What does need verification: + +- rows that leave the shape on a negated `move-out` can still be reintroduced + later by a negated `move-in` query +- rows that remain included via another disjunct only get their + `active_conditions` flipped, not spuriously deleted + +### 7. Initial Snapshot Metadata Must Use DNF for Negated Shapes + +Once the query-side guard is removed, initial snapshot queries for negated +subquery shapes should emit: + +- real `tags` +- real `active_conditions` + +using the same DNF metadata path as positive shapes. + +Without that, later move broadcasts cannot update already-present rows +correctly. + +The existing `DnfPlan.active_conditions_sql/1` and `tags_sql/3` look close to +what we need already; the main change is to let negated plans reach that path +and add tests around the emitted metadata. + +## Tests To Add Or Rewrite + +### Unit + +- `test/electric/shapes/dnf_plan_test.exs` + - dependency polarity metadata + - negated plan no longer treated as unsupported + - move-query SQL for `NOT IN` + - move-query SQL for `NOT (x = 7 OR y IN subquery)` + +- `test/electric/shapes/querying_test.exs` + - initial snapshot metadata for negated shapes includes correct `tags` and + `active_conditions` + +- `test/electric/shapes/consumer/subqueries/move_queue_test.exs` + - queue reduction remains based on actual dependency view deltas + - negated dependency remove survives as an outer move-in + - negated dependency add survives as an outer move-out + +- `test/electric/shapes/consumer/subqueries_test.exs` + - negated dependency add updates the view immediately and emits move-out + - negated dependency remove buffers, splices, and emits move-in + - pre/post-boundary txn conversion uses the correct before/after actual views + +- `test/electric/shapes/consumer/materializer_test.exs` + - negated move-out deletes only when the row loses its last active reason + - negated move-in can re-activate an already present row via broadcast + +### Integration + +- replace the current 409 expectation in + `test/electric/plug/router_test.exs` for `NOT IN` with real move semantics +- add router/integration coverage for: + - `parent_id NOT IN (SELECT id FROM parent WHERE excluded = true)` + - `NOT (value = 7 OR parent_id IN (SELECT ...))` + - a mixed DNF case where one disjunct is negated and another positive + +### Oracle / Property + +The oracle harness already generates `NOT IN` cases, but currently marks them +as unoptimized. Once the runtime lands, update +`test/support/oracle_harness/where_clause_generator.ex` so supported negated +subquery shapes participate in optimized-vs-oracle comparisons. + +## Recommended Landing Order + +1. remove `not_with_subquery?` and the runtime/query gating +2. add dependency polarity metadata to `DnfPlan` +3. refactor `MoveQueue` / `Subqueries` / `Buffering` to track actual view deltas +4. generalize move-in query generation to use delta-aware trigger semantics +5. enable initial snapshot DNF metadata for negated shapes +6. replace 409 tests with semantic integration coverage +7. widen oracle/property coverage + +## Non-Goals + +- no new invalidation path +- no filter/event-router redesign +- no change to the client protocol beyond using the existing `move-in` / + `move-out` events correctly + +The existing materializer protocol should be sufficient if the consumer emits +the right event name and keeps the actual dependency views consistent across the +splice. From 527a37cc84c30d4da850f351563077186375f162 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 14:31:22 +0000 Subject: [PATCH 21/63] Update client --- .../lib/electric/client/message.ex | 69 ++++++++ .../elixir-client/lib/electric/client/poll.ex | 14 ++ .../lib/electric/client/tag_tracker.ex | 90 ++++------ .../test/electric/client/tag_tracker_test.exs | 166 +++++++++++------- 4 files changed, 223 insertions(+), 116 deletions(-) diff --git a/packages/elixir-client/lib/electric/client/message.ex b/packages/elixir-client/lib/electric/client/message.ex index 777699b601..3d96a809cf 100644 --- a/packages/elixir-client/lib/electric/client/message.ex +++ b/packages/elixir-client/lib/electric/client/message.ex @@ -262,6 +262,57 @@ defmodule Electric.Client.Message do end end + defmodule MoveInMessage do + @moduledoc """ + Represents a move-in event from the server. + + Move-in events are sent when the server's subquery filter has changed and + rows may now be included in the shape. The `patterns` field contains position + and hash information that the client uses to update `active_conditions` on + tracked rows. + """ + + defstruct [:patterns, :handle, :request_timestamp] + + @type pattern :: %{pos: non_neg_integer(), value: String.t()} + @type t :: %__MODULE__{ + patterns: [pattern()], + handle: Client.shape_handle(), + request_timestamp: DateTime.t() + } + + def from_message( + %{"headers" => %{"event" => "move-in", "patterns" => patterns}}, + handle, + request_timestamp + ) do + %__MODULE__{ + patterns: normalize_patterns(patterns), + handle: handle, + request_timestamp: request_timestamp + } + end + + def from_message( + %{headers: %{event: "move-in", patterns: patterns}}, + handle, + request_timestamp + ) do + %__MODULE__{ + patterns: normalize_patterns(patterns), + handle: handle, + request_timestamp: request_timestamp + } + end + + defp normalize_patterns(patterns) do + Enum.map(patterns, fn + %{"pos" => pos, "value" => value} -> %{pos: pos, value: value} + %{pos: _, value: _} = pattern -> pattern + end) + end + end + defguard is_insert(msg) when is_struct(msg, ChangeMessage) and msg.headers.operation == :insert def parse(%{"value" => _} = msg, shape_handle, value_mapper_fun, request_timestamp) do @@ -299,6 +350,24 @@ defmodule Electric.Client.Message do [MoveOutMessage.from_message(msg, shape_handle, request_timestamp)] end + def parse( + %{"headers" => %{"event" => "move-in"}} = msg, + shape_handle, + _value_mapper_fun, + request_timestamp + ) do + [MoveInMessage.from_message(msg, shape_handle, request_timestamp)] + end + + def parse( + %{headers: %{event: "move-in"}} = msg, + shape_handle, + _value_mapper_fun, + request_timestamp + ) do + [MoveInMessage.from_message(msg, shape_handle, request_timestamp)] + end + def parse("", _handle, _value_mapper_fun, _request_timestamp) do [] end diff --git a/packages/elixir-client/lib/electric/client/poll.ex b/packages/elixir-client/lib/electric/client/poll.ex index 1409b66ddb..59b3751c9e 100644 --- a/packages/elixir-client/lib/electric/client/poll.ex +++ b/packages/elixir-client/lib/electric/client/poll.ex @@ -267,6 +267,20 @@ defmodule Electric.Client.Poll do {:messages, synthetic_deletes, %{state | tag_to_keys: tag_to_keys, key_data: key_data}} end + defp handle_message( + %Message.MoveInMessage{patterns: patterns}, + state + ) do + {tag_to_keys, key_data} = + TagTracker.handle_move_in( + state.tag_to_keys, + state.key_data, + patterns + ) + + {:skip, %{state | tag_to_keys: tag_to_keys, key_data: key_data}} + end + defp handle_schema(%Fetch.Response{schema: schema}, client, %{value_mapper_fun: nil} = state) when is_map(schema) do {parser_module, parser_opts} = client.parser diff --git a/packages/elixir-client/lib/electric/client/tag_tracker.ex b/packages/elixir-client/lib/electric/client/tag_tracker.ex index bab9c5bf7b..fff6d1d3c1 100644 --- a/packages/elixir-client/lib/electric/client/tag_tracker.ex +++ b/packages/elixir-client/lib/electric/client/tag_tracker.ex @@ -159,78 +159,56 @@ defmodule Electric.Client.TagTracker do patterns, request_timestamp ) do - # First pass: collect all keys that match any pattern and remove those entries - {matched_keys_with_entries, updated_tag_to_keys} = - Enum.reduce(patterns, {%{}, tag_to_keys}, fn %{pos: pos, value: value}, - {keys_acc, ttk_acc} -> + # First pass: collect all keys that match any pattern (without modifying tag_to_keys) + matched_keys_with_entries = + Enum.reduce(patterns, %{}, fn %{pos: pos, value: value}, keys_acc -> tag_key = {pos, value} - case Map.pop(ttk_acc, tag_key) do - {nil, ttk_acc} -> - {keys_acc, ttk_acc} - - {keys_in_tag, ttk_acc} -> - updated_keys_acc = - Enum.reduce(keys_in_tag, keys_acc, fn key, acc -> - removed = Map.get(acc, key, MapSet.new()) - Map.put(acc, key, MapSet.put(removed, tag_key)) - end) + case Map.get(tag_to_keys, tag_key) do + nil -> + keys_acc - {updated_keys_acc, ttk_acc} + keys_in_tag -> + Enum.reduce(keys_in_tag, keys_acc, fn key, acc -> + Map.update(acc, key, MapSet.new([tag_key]), &MapSet.put(&1, tag_key)) + end) end end) - # Second pass: for each matched key, update state and check visibility - {keys_to_delete, updated_key_data, orphaned_entries} = - Enum.reduce(matched_keys_with_entries, {[], key_data, []}, fn {key, removed_entries}, - {deletes, kd_acc, orphans} -> + # Second pass: evaluate visibility, update key_data and tag_to_keys together + {keys_to_delete, updated_key_data, updated_tag_to_keys} = + Enum.reduce(matched_keys_with_entries, {[], key_data, tag_to_keys}, fn {key, + removed_entries}, + {deletes, kd_acc, + ttk_acc} -> case Map.get(kd_acc, key) do nil -> - {deletes, kd_acc, orphans} + {deletes, kd_acc, ttk_acc} %{tags: current_entries, msg: msg} = data -> - remaining_entries = MapSet.difference(current_entries, removed_entries) - - # Determine if key should be deleted - {should_delete, updated_data} = - if data.active_conditions != nil and disjunct_positions != nil do - # DNF mode: deactivate positions and check visibility - deactivated_positions = - MapSet.new(removed_entries, fn {pos, _} -> pos end) - - updated_ac = - data.active_conditions - |> Enum.with_index() - |> Enum.map(fn {val, idx} -> - if MapSet.member?(deactivated_positions, idx), do: false, else: val - end) - - visible = row_visible?(updated_ac, disjunct_positions) - - {not visible, %{data | tags: remaining_entries, active_conditions: updated_ac}} - else - # Old mode: delete if no remaining entries - {MapSet.size(remaining_entries) == 0, %{data | tags: remaining_entries}} - end + deactivated_positions = + MapSet.new(removed_entries, fn {pos, _} -> pos end) + + updated_ac = + data.active_conditions + |> Enum.with_index() + |> Enum.map(fn {val, idx} -> + if MapSet.member?(deactivated_positions, idx), do: false, else: val + end) + + visible = row_visible?(updated_ac, disjunct_positions) + + if not visible do + ttk_acc = remove_key_from_tags(ttk_acc, current_entries, key) - if should_delete do - {[{key, msg} | deletes], Map.delete(kd_acc, key), - [{key, remaining_entries} | orphans]} + {[{key, msg} | deletes], Map.delete(kd_acc, key), ttk_acc} else - {deletes, Map.put(kd_acc, key, updated_data), orphans} + updated_data = %{data | active_conditions: updated_ac} + {deletes, Map.put(kd_acc, key, updated_data), ttk_acc} end end end) - # Third pass: clean up remaining entries from tag_to_keys for deleted keys. - # The first pass only removed matched entries via Map.pop; remaining entries - # for deleted keys would otherwise persist as stale references, causing - # phantom synthetic deletes when matching future deactivation patterns. - updated_tag_to_keys = - Enum.reduce(orphaned_entries, updated_tag_to_keys, fn {key, remaining}, ttk -> - remove_key_from_tags(ttk, remaining, key) - end) - # Generate synthetic delete messages synthetic_deletes = Enum.map(keys_to_delete, fn {key, original_msg} -> diff --git a/packages/elixir-client/test/electric/client/tag_tracker_test.exs b/packages/elixir-client/test/electric/client/tag_tracker_test.exs index c63dc6f14e..1b6c6560e9 100644 --- a/packages/elixir-client/test/electric/client/tag_tracker_test.exs +++ b/packages/elixir-client/test/electric/client/tag_tracker_test.exs @@ -112,14 +112,23 @@ defmodule Electric.Client.TagTrackerTest do describe "generate_synthetic_deletes/5" do test "generates deletes for keys matching pattern" do - # Set up: two keys with tag_a - msg1 = make_change_msg("key1", :insert, tags: ["tag_a"], value: %{"id" => "1"}) - msg2 = make_change_msg("key2", :insert, tags: ["tag_a"], value: %{"id" => "2"}) + msg1 = + make_change_msg("key1", :insert, + tags: ["tag_a"], + active_conditions: [true], + value: %{"id" => "1"} + ) + + msg2 = + make_change_msg("key2", :insert, + tags: ["tag_a"], + active_conditions: [true], + value: %{"id" => "2"} + ) {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) - # Move-out for tag_a patterns = [%{pos: 0, value: "tag_a"}] timestamp = DateTime.utc_now() @@ -140,31 +149,39 @@ defmodule Electric.Client.TagTrackerTest do assert new_key_data == %{} end - test "does not delete keys with remaining tags" do - # Set up: key1 has tag_a and tag_b - msg = make_change_msg("key1", :insert, tags: ["tag_a", "tag_b"], value: %{"id" => "1"}) + test "does not delete keys still visible via another disjunct" do + # key1 has two disjuncts: pos 0 and pos 1 + msg = + make_change_msg("key1", :insert, + tags: ["tag_a/", "/tag_b"], + active_conditions: [true, true], + value: %{"id" => "1"} + ) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) - # Move-out only for tag_a + # Move-out only for pos 0 patterns = [%{pos: 0, value: "tag_a"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) - # No synthetic deletes - key1 still has tag_b assert deletes == [] + assert new_key_data["key1"].active_conditions == [false, true] - # tag_a removed, tag_b remains - assert new_tag_to_keys == %{ - {0, "tag_b"} => MapSet.new(["key1"]) - } - - assert new_key_data["key1"].tags == MapSet.new([{0, "tag_b"}]) + # tag_to_keys entries preserved for move-in broadcasts + assert Map.has_key?(new_tag_to_keys, {0, "tag_a"}) + assert Map.has_key?(new_tag_to_keys, {1, "tag_b"}) end test "handles non-existent tag pattern" do - msg = make_change_msg("key1", :insert, tags: ["tag_a"]) + msg = + make_change_msg("key1", :insert, + tags: ["tag_a"], + active_conditions: [true] + ) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) patterns = [%{pos: 0, value: "nonexistent_tag"}] @@ -179,8 +196,17 @@ defmodule Electric.Client.TagTrackerTest do end test "handles multiple patterns in one call" do - msg1 = make_change_msg("key1", :insert, tags: ["tag_a"]) - msg2 = make_change_msg("key2", :insert, tags: ["tag_b"]) + msg1 = + make_change_msg("key1", :insert, + tags: ["tag_a"], + active_conditions: [true] + ) + + msg2 = + make_change_msg("key2", :insert, + tags: ["tag_b"], + active_conditions: [true] + ) {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) @@ -481,69 +507,89 @@ defmodule Electric.Client.TagTrackerTest do assert hd(deletes).key == "key1" end - test "delete on empty tag set for simple shapes (no active_conditions)" do - # Insert row with a single-position tag but NO active_conditions + test "move-out preserves tag_to_keys so move-in can re-activate" do + # Row with two disjuncts: pos 0 and pos 1 msg = make_change_msg("key1", :insert, - tags: ["hash1"], + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true], value: %{"id" => "1", "name" => "User 1"} ) {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) - assert kd["key1"].active_conditions == nil - # Move-out at position 0 — no active_conditions: tag removed, tag set empty → delete - patterns = [%{pos: 0, value: "hash1"}] - timestamp = DateTime.utc_now() + # Move-out at pos 0 — row stays visible via disjunct 1 + patterns = [%{pos: 0, value: "hash_a"}] - {deletes, new_ttk, new_kd} = - TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) - assert length(deletes) == 1 - assert hd(deletes).key == "key1" - assert new_kd == %{} - assert new_ttk == %{} + assert deletes == [] + assert kd["key1"].active_conditions == [false, true] + + # Move-in at pos 0 — should find key1 via preserved tag_to_keys entry + patterns = [%{pos: 0, value: "hash_a"}] + {ttk, kd} = TagTracker.handle_move_in(ttk, kd, patterns) + + assert kd["key1"].active_conditions == [true, true] + + # Now both disjuncts active again; move-out at pos 1 alone should not delete + patterns = [%{pos: 1, value: "hash_b"}] + + {deletes, _ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert deletes == [] + assert kd["key1"].active_conditions == [true, false] end - test "mixed rows: some with active_conditions, some without" do - # Row 1: DNF shape (with active_conditions) - msg1 = + test "deleted row cleans up all tag_to_keys entries" do + # Row with entries at pos 0 and pos 1 in a single disjunct + msg = make_change_msg("key1", :insert, - tags: ["hash_a/", "/hash_b"], + tags: ["hash_a/hash_b"], active_conditions: [true, true], - value: %{"id" => "1", "name" => "DNF User"} + value: %{"id" => "1"} ) - # Row 2: simple shape (single-position tag, no active_conditions) - msg2 = - make_change_msg("key2", :insert, - tags: ["hash_a"], - value: %{"id" => "2", "name" => "Simple User"} - ) + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + assert Map.has_key?(ttk, {0, "hash_a"}) + assert Map.has_key?(ttk, {1, "hash_b"}) - {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) - {ttk, kd, dp} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + # Move-out at pos 0 — single disjunct [0,1] fails → row deleted + patterns = [%{pos: 0, value: "hash_a"}] - assert Map.has_key?(kd, "key1") - assert Map.has_key?(kd, "key2") + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) - # Move-out at position 0 with value hash_a - # DNF row: disjunct 0 ([0]) fails, but disjunct 1 ([1]) still satisfied → stays - # Simple row: tag "hash_a" at pos 0 removed, tag set empty → deleted - patterns = [%{pos: 0, value: "hash_a"}] - timestamp = DateTime.utc_now() + assert length(deletes) == 1 + assert kd == %{} + # Both entries cleaned, not just the matched {0, "hash_a"} + refute Map.has_key?(ttk, {0, "hash_a"}) + refute Map.has_key?(ttk, {1, "hash_b"}) + end - {deletes, _ttk, new_kd} = - TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + test "multiple patterns deactivating same row in one call" do + # Row with single disjunct needing both pos 0 and pos 1 + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, true], + value: %{"id" => "1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) - # DNF row stays, simple row deleted - deleted_keys = Enum.map(deletes, & &1.key) |> MapSet.new() - assert MapSet.member?(deleted_keys, "key2") - refute MapSet.member?(deleted_keys, "key1") + # Both positions deactivated in one call + patterns = [%{pos: 0, value: "hash_a"}, %{pos: 1, value: "hash_b"}] - assert Map.has_key?(new_kd, "key1") - assert new_kd["key1"].active_conditions == [false, true] - refute Map.has_key?(new_kd, "key2") + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + assert kd == %{} + assert ttk == %{} end test "disjunct_positions derived once and reused across keys" do From 0ba19de7ac92db4bbb534f22bd9dec51fd6a710d Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 14:32:49 +0000 Subject: [PATCH 22/63] Docs: Add negation.md --- packages/sync-service/negation.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/sync-service/negation.md b/packages/sync-service/negation.md index ef89b2707a..71f68d66c2 100644 --- a/packages/sync-service/negation.md +++ b/packages/sync-service/negation.md @@ -17,7 +17,7 @@ broadcasts, move-in splice flow, and DNF `active_conditions` model. Most of the DNF work is already in place: - `Electric.Shapes.DnfPlan` already preserves per-position negation metadata. -- `Shape.convert_change/3` already uses `DnfPlan.project_row/6`, which can +- `Shape.convert_change/3` already uses `DnfPlan.get_row_metadata/6`, which can evaluate negated positions from a concrete subquery view map. - the materializer already handles position-aware `move-in` / `move-out` broadcasts by flipping `active_conditions[pos]`. @@ -51,7 +51,7 @@ Keep `views` as the actual dependency view, not the outer shape's That matters because: -- `DnfPlan.project_row/6` expects actual dependency results in `extra_refs` +- `DnfPlan.get_row_metadata/6` expects actual dependency results in `extra_refs` - move-in query rows must be returned with `active_conditions` computed against the actual post-move dependency view - streamed transactions before/after the splice boundary must keep using the @@ -80,7 +80,7 @@ Delete the remaining "negation means unsupported" branches: At that point, negated shapes will enter the same runtime as positive ones. `Shape.convert_change/3` should not need special negation work beyond this, -because it already goes through `DnfPlan.project_row/6`. +because it already goes through `DnfPlan.get_row_metadata/6`. ### 2. Teach `DnfPlan` About Dependency Polarity From c127affec66a53b8eff7a04808bb523c12dce50f Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 14:33:53 +0000 Subject: [PATCH 23/63] Update sync-server --- .changeset/wild-walls-fly.md | 5 + .../sync-service/lib/electric/log_items.ex | 30 +- .../lib/electric/replication/changes.ex | 12 +- .../electric/replication/eval/decomposer.ex | 280 ++++++ .../replication/eval/sql_generator.ex | 369 ++++++++ .../lib/electric/shapes/consumer.ex | 570 ++++++++++-- .../shapes/consumer/change_handling.ex | 144 --- .../electric/shapes/consumer/materializer.ex | 441 ++++++++-- .../electric/shapes/consumer/move_handling.ex | 143 --- .../lib/electric/shapes/consumer/move_ins.ex | 358 -------- .../lib/electric/shapes/consumer/state.ex | 156 +--- .../electric/shapes/consumer/subqueries.ex | 441 ++++++++++ .../shapes/consumer/subqueries/buffering.ex | 179 ++++ .../shapes/consumer/subqueries/move_queue.ex | 109 +++ .../shapes/consumer/subqueries/query_row.ex | 11 + .../consumer/subqueries/state_machine.ex | 4 + .../shapes/consumer/subqueries/steady.ex | 62 ++ .../lib/electric/shapes/dnf_plan.ex | 661 ++++++++++++++ .../lib/electric/shapes/querying.ex | 116 ++- .../sync-service/lib/electric/shapes/shape.ex | 268 ++++-- .../electric/shapes/shape/subquery_moves.ex | 208 ----- .../test/electric/plug/router_test.exs | 203 ++++- .../replication/eval/decomposer_test.exs | 487 ++++++++++ .../replication/eval/sql_generator_test.exs | 621 +++++++++++++ .../shapes/consumer/change_handling_test.exs | 419 --------- .../shapes/consumer/materializer_test.exs | 215 +++++ .../shapes/consumer/move_ins_test.exs | 629 ------------- .../electric/shapes/consumer/state_test.exs | 169 ---- .../consumer/subqueries/move_queue_test.exs | 110 +++ .../shapes/consumer/subqueries_test.exs | 756 ++++++++++++++++ .../test/electric/shapes/consumer_test.exs | 133 ++- .../test/electric/shapes/dnf_plan_test.exs | 829 ++++++++++++++++++ .../test/electric/shapes/querying_test.exs | 89 +- .../shapes/shape/subquery_moves_test.exs | 218 ----- .../test/electric/shapes/shape_test.exs | 135 +++ .../test/support/pg_expression_generator.ex | 108 +++ 36 files changed, 6899 insertions(+), 2789 deletions(-) create mode 100644 .changeset/wild-walls-fly.md create mode 100644 packages/sync-service/lib/electric/replication/eval/decomposer.ex create mode 100644 packages/sync-service/lib/electric/replication/eval/sql_generator.ex delete mode 100644 packages/sync-service/lib/electric/shapes/consumer/change_handling.ex delete mode 100644 packages/sync-service/lib/electric/shapes/consumer/move_handling.ex delete mode 100644 packages/sync-service/lib/electric/shapes/consumer/move_ins.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries/move_queue.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries/query_row.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex create mode 100644 packages/sync-service/lib/electric/shapes/dnf_plan.ex delete mode 100644 packages/sync-service/lib/electric/shapes/shape/subquery_moves.ex create mode 100644 packages/sync-service/test/electric/replication/eval/decomposer_test.exs create mode 100644 packages/sync-service/test/electric/replication/eval/sql_generator_test.exs delete mode 100644 packages/sync-service/test/electric/shapes/consumer/change_handling_test.exs delete mode 100644 packages/sync-service/test/electric/shapes/consumer/move_ins_test.exs create mode 100644 packages/sync-service/test/electric/shapes/consumer/subqueries/move_queue_test.exs create mode 100644 packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs create mode 100644 packages/sync-service/test/electric/shapes/dnf_plan_test.exs delete mode 100644 packages/sync-service/test/electric/shapes/shape/subquery_moves_test.exs diff --git a/.changeset/wild-walls-fly.md b/.changeset/wild-walls-fly.md new file mode 100644 index 0000000000..cab843e171 --- /dev/null +++ b/.changeset/wild-walls-fly.md @@ -0,0 +1,5 @@ +--- +'@core/sync-service': patch +--- + +chore: improve Storage contract to have less coupling on snapshot appends diff --git a/packages/sync-service/lib/electric/log_items.ex b/packages/sync-service/lib/electric/log_items.ex index 474775ee0a..7af61b6219 100644 --- a/packages/sync-service/lib/electric/log_items.ex +++ b/packages/sync-service/lib/electric/log_items.ex @@ -49,7 +49,11 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) - |> put_if_true(:active_conditions, change.active_conditions) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }} ] end @@ -70,7 +74,11 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) - |> put_if_true(:active_conditions, change.active_conditions) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }} ] end @@ -92,7 +100,11 @@ defmodule Electric.LogItems do |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) |> put_if_true(:removed_tags, change.move_tags != [], change.removed_move_tags) - |> put_if_true(:active_conditions, change.active_conditions) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) } |> Map.merge(put_update_values(change, pk_cols, replica))} ] @@ -120,7 +132,11 @@ defmodule Electric.LogItems do change.move_tags != [], change.move_tags ++ change.removed_move_tags ) - |> put_if_true(:active_conditions, change.active_conditions) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }}, {new_offset, %{ @@ -137,7 +153,11 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) - |> put_if_true(:active_conditions, change.active_conditions) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }} ] end diff --git a/packages/sync-service/lib/electric/replication/changes.ex b/packages/sync-service/lib/electric/replication/changes.ex index 218bf02d56..717d7b7df0 100644 --- a/packages/sync-service/lib/electric/replication/changes.ex +++ b/packages/sync-service/lib/electric/replication/changes.ex @@ -189,7 +189,7 @@ defmodule Electric.Replication.Changes do :key, last?: false, move_tags: [], - active_conditions: nil + active_conditions: [] ] @type t() :: %__MODULE__{ @@ -199,7 +199,7 @@ defmodule Electric.Replication.Changes do key: String.t() | nil, last?: boolean(), move_tags: [Changes.tag()], - active_conditions: [boolean()] | nil + active_conditions: [boolean()] } end @@ -215,7 +215,7 @@ defmodule Electric.Replication.Changes do removed_move_tags: [], changed_columns: MapSet.new(), last?: false, - active_conditions: nil + active_conditions: [] ] @type t() :: %__MODULE__{ @@ -229,7 +229,7 @@ defmodule Electric.Replication.Changes do removed_move_tags: [Changes.tag()], changed_columns: MapSet.t(), last?: boolean(), - active_conditions: [boolean()] | nil + active_conditions: [boolean()] } def new(attrs) do @@ -272,7 +272,7 @@ defmodule Electric.Replication.Changes do :key, move_tags: [], last?: false, - active_conditions: nil + active_conditions: [] ] @type t() :: %__MODULE__{ @@ -282,7 +282,7 @@ defmodule Electric.Replication.Changes do key: String.t() | nil, move_tags: [Changes.tag()], last?: boolean(), - active_conditions: [boolean()] | nil + active_conditions: [boolean()] } end diff --git a/packages/sync-service/lib/electric/replication/eval/decomposer.ex b/packages/sync-service/lib/electric/replication/eval/decomposer.ex new file mode 100644 index 0000000000..391ceffe33 --- /dev/null +++ b/packages/sync-service/lib/electric/replication/eval/decomposer.ex @@ -0,0 +1,280 @@ +defmodule Electric.Replication.Eval.Decomposer do + @moduledoc """ + Decomposes a query to an expanded DNF form. + + Takes a where clause part of a query and decomposes it into a list of expressions + in a Disjunctive Normal Form (DNF). Each expression is a conjunction of comparisons. + + To avoid duplication, it returns a list of lists, where the outer list is a list of disjuncts (conjunctions), + and the inner list is a list of comparisons. Each comparison (i.e. the sub-expression of the original where clause) + is represented by an Erlang reference, which is then mentioned in the map of references to the AST of the + referenced subexpression. + + ## NOT handling + + To properly convert to DNF, NOT expressions are pushed down to leaf expressions using De Morgan's laws: + - `NOT (a OR b)` becomes `(NOT a) AND (NOT b)` + - `NOT (a AND b)` becomes `(NOT a) OR (NOT b)` + + Because of this, leaf expressions in the result can be either: + - `ref` - a positive reference to a subexpression + - `{:not, ref}` - a negated reference to a subexpression + - `nil` - this position is not part of this disjunct + + The subexpressions map always contains the base (non-negated) form of each expression. + + ## Expanded format + + The "expanded" part means that each inner list MUST be the same length, equal to the total count of expressions + across all disjuncts. Each position in the inner list corresponds to a specific expression slot from the original + query structure, and contains either a reference (possibly negated) to that subexpression or `nil` if that + expression is not part of the given disjunct. + + References allow deduplication: if the same subexpression appears in multiple disjuncts, they will share the + same reference (but occupy different positions, since positions correspond to the original query structure). + + ## Examples + + For the query (already in a normalized form): + + ```sql + WHERE (a = 1 AND b = 2) OR (c = 3 AND d = 4) OR (a = 1 AND c = 3) + ``` + + Has 3 disjuncts with 2 + 2 + 2 = 6 total expression slots. It will be decomposed into: + + ``` + [[r1, r2, nil, nil, nil, nil], + [nil, nil, r3, r4, nil, nil], + [nil, nil, nil, nil, r1, r3]] + ``` + + Where: + - Positions 0-1 correspond to disjunct 1's expressions (`a = 1`, `b = 2`) + - Positions 2-3 correspond to disjunct 2's expressions (`c = 3`, `d = 4`) + - Positions 4-5 correspond to disjunct 3's expressions (`a = 1`, `c = 3`) + - `r1` appears at positions 0 and 4 (same subexpression `a = 1`) + - `r3` appears at positions 2 and 5 (same subexpression `c = 3`) + + The reference map will contain: `r1 => "a = 1"`, `r2 => "b = 2"`, `r3 => "c = 3"`, `r4 => "d = 4"`. + + For a query with NOT that needs De Morgan transformation: + + ```sql + WHERE NOT (a = 1 OR b = 2) + ``` + + Becomes `(NOT a = 1) AND (NOT b = 2)` - a single disjunct with two negated terms: + + ``` + [[{:not, r1}, {:not, r2}]] + ``` + + And for: + + ```sql + WHERE NOT (a = 1 AND b = 2) + ``` + + Becomes `(NOT a = 1) OR (NOT b = 2)` - two disjuncts: + + ``` + [[{:not, r1}, nil], + [nil, {:not, r2}]] + ``` + """ + + alias Electric.Replication.Eval.Parser + alias Electric.Replication.Eval.Parser.Func + alias Electric.Replication.Eval.SqlGenerator + + @type position :: non_neg_integer() + @type literal :: {position(), :positive | :negated} + @type conjunction :: [literal()] + @type dnf :: [conjunction()] + + @type subexpression :: %{ + ast: Parser.tree_part(), + is_subquery: boolean(), + negated: boolean() + } + + @type decomposition :: %{ + disjuncts: dnf(), + disjuncts_positions: [[position()]], + subexpressions: %{position() => subexpression()}, + position_count: non_neg_integer() + } + + @max_disjuncts 100 + + @spec decompose(query :: Parser.tree_part()) :: {:ok, decomposition()} | {:error, term()} + def decompose(query) do + internal_dnf = to_dnf(query, false) + + if length(internal_dnf) > @max_disjuncts do + {:error, + "WHERE clause too complex for DNF decomposition " <> + "(#{length(internal_dnf)} disjuncts exceeds limit of #{@max_disjuncts})"} + else + {expanded, ref_subexpressions} = expand(internal_dnf) + {:ok, to_decomposition(expanded, ref_subexpressions)} + end + end + + # Convert AST to internal DNF representation + # negated? tracks whether we're inside a NOT context (for De Morgan transformations) + defp to_dnf(%Func{name: name, args: args}, negated) when name in ~w(and or not) do + case {name, negated} do + {"or", false} -> + # OR: concatenate disjuncts from all branches + Enum.flat_map(args, &to_dnf(&1, false)) + + {"or", true} -> + # NOT OR => AND (De Morgan's law) + # NOT (a OR b) = NOT a AND NOT b + args_dnfs = Enum.map(args, &to_dnf(&1, true)) + cross_product(args_dnfs) + + {"and", false} -> + # AND: cross-product of disjuncts from all branches + args_dnfs = Enum.map(args, &to_dnf(&1, false)) + cross_product(args_dnfs) + + {"and", true} -> + # NOT AND => OR (De Morgan's law) + # NOT (a AND b) = NOT a OR NOT b + Enum.flat_map(args, &to_dnf(&1, true)) + + {"not", _} -> + # NOT: flip the negation state (handles double negation automatically) + [arg] = args + to_dnf(arg, not negated) + end + end + + defp to_dnf(leaf, negated) do + # Leaf expression: single disjunct with single term + [[{leaf, negated}]] + end + + # Cross-product of multiple DNF forms + # Used for AND distribution: (A1 OR A2) AND (B1 OR B2) => (A1 AND B1) OR (A1 AND B2) OR (A2 AND B1) OR (A2 AND B2) + defp cross_product([]), do: [[]] + + defp cross_product([dnf | rest]) do + rest_product = cross_product(rest) + + for disjunct <- dnf, rest_disjunct <- rest_product do + disjunct ++ rest_disjunct + end + end + + # Expand internal DNF to fixed-width format with references + defp expand(internal_dnf) do + # Calculate width of each disjunct and total width + widths = Enum.map(internal_dnf, &length/1) + total_width = Enum.sum(widths) + + # Calculate start positions for each disjunct: [0, w1, w1+w2, ...] + start_positions = calc_start_positions(widths) + + # Build subexpressions map with deduplication based on SQL string + {ast_to_ref, subexpressions} = build_subexpressions(internal_dnf) + + # Expand each disjunct to full width + disjuncts = + internal_dnf + |> Enum.zip(start_positions) + |> Enum.map(fn {disjunct, start_pos} -> + # Create a list of nils of the total width + row = List.duplicate(nil, total_width) + + # Fill in the terms at the appropriate positions + disjunct + |> Enum.with_index() + |> Enum.reduce(row, fn {{ast, negated}, term_idx}, row -> + pos = start_pos + term_idx + ref = Map.fetch!(ast_to_ref, deparse(ast)) + term = if negated, do: {:not, ref}, else: ref + List.replace_at(row, pos, term) + end) + end) + + {disjuncts, subexpressions} + end + + defp calc_start_positions(widths) do + widths + |> Enum.reduce({[], 0}, fn width, {positions, acc} -> + {positions ++ [acc], acc + width} + end) + |> elem(0) + end + + defp build_subexpressions(internal_dnf) do + internal_dnf + |> List.flatten() + |> Enum.map(fn {ast, _negated} -> ast end) + |> Enum.reduce({%{}, %{}}, fn ast, {ast_to_ref, subexpressions} -> + key = deparse(ast) + + case Map.fetch(ast_to_ref, key) do + {:ok, _ref} -> + {ast_to_ref, subexpressions} + + :error -> + ref = make_ref() + {Map.put(ast_to_ref, key, ref), Map.put(subexpressions, ref, ast)} + end + end) + end + + # Convert AST node back to SQL string for deduplication + defp deparse(ast) do + SqlGenerator.to_sql(ast) + end + + # Convert ref-based expanded format to position-indexed decomposition + defp to_decomposition(expanded, ref_subexpressions) do + position_count = if expanded == [[]], do: 0, else: length(hd(expanded)) + subexpressions = build_position_subexpressions(expanded, ref_subexpressions, position_count) + + disjuncts = + Enum.map(expanded, fn row -> + row + |> Enum.with_index() + |> Enum.flat_map(fn + {nil, _pos} -> [] + {{:not, _ref}, pos} -> [{pos, :negated}] + {_ref, pos} -> [{pos, :positive}] + end) + end) + + disjuncts_positions = Enum.map(disjuncts, fn conj -> Enum.map(conj, &elem(&1, 0)) end) + + %{ + disjuncts: disjuncts, + disjuncts_positions: disjuncts_positions, + subexpressions: subexpressions, + position_count: position_count + } + end + + defp build_position_subexpressions(_expanded, _ref_subexpressions, 0), do: %{} + + defp build_position_subexpressions(expanded, ref_subexpressions, position_count) do + Map.new(0..(position_count - 1), fn pos -> + term = Enum.find_value(expanded, fn row -> Enum.at(row, pos) end) + {ref, negated} = ref_and_polarity(term) + ast = Map.fetch!(ref_subexpressions, ref) + {pos, %{ast: ast, is_subquery: is_subquery?(ast), negated: negated}} + end) + end + + defp ref_and_polarity({:not, ref}), do: {ref, true} + defp ref_and_polarity(ref) when is_reference(ref), do: {ref, false} + + defp is_subquery?(%Func{name: "sublink_membership_check"}), do: true + defp is_subquery?(_), do: false +end diff --git a/packages/sync-service/lib/electric/replication/eval/sql_generator.ex b/packages/sync-service/lib/electric/replication/eval/sql_generator.ex new file mode 100644 index 0000000000..ed7cd14ca6 --- /dev/null +++ b/packages/sync-service/lib/electric/replication/eval/sql_generator.ex @@ -0,0 +1,369 @@ +defmodule Electric.Replication.Eval.SqlGenerator do + @moduledoc """ + Converts a parsed WHERE clause AST back into a SQL string. + + This is the inverse of `Parser` — where `Parser` turns SQL text into an AST, + `SqlGenerator` turns that AST back into SQL text. Used whenever the server + needs to embed a condition in a generated query (snapshot active_conditions, + move-in exclusion clauses, etc.). + + Uses precedence-aware parenthesization to produce minimal, readable SQL. + Parentheses are only added when needed to preserve the AST's evaluation order. + + Must handle every AST node type that `Parser` can produce. Raises + `ArgumentError` for unrecognised nodes so gaps are caught at shape + creation time, but the property-based round-trip test (see Tests below) + enforces that no parseable expression triggers this error. + """ + + alias Electric.Replication.Eval.Parser.{Const, Ref, Func, Array, RowExpr} + + # PostgreSQL operator precedence (higher number = tighter binding) + # See: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-PRECEDENCE + @prec_or 10 + @prec_and 20 + @prec_not 30 + @prec_is 40 + @prec_comparison 50 + @prec_like_in 60 + @prec_other_op 70 + @prec_addition 80 + @prec_multiplication 90 + @prec_exponent 100 + @prec_unary 110 + @prec_cast 130 + @prec_atom 1000 + + @doc """ + Convert an AST node to a SQL string. + + Handles: comparison operators (=, <>, <, >, <=, >=), pattern matching + (LIKE, ILIKE, NOT LIKE, NOT ILIKE), nullability (IS NULL, IS NOT NULL), + membership (IN), logical operators (AND, OR, NOT), boolean tests + (IS TRUE, IS FALSE, IS UNKNOWN, etc.), column references, constants + (strings, integers, floats, booleans, NULL), type casts, arithmetic + operators (+, -, *, /, ^, |/, @, &, |, #, ~), string concatenation (||), + array operators (@>, <@, &&), array/slice access, DISTINCT/NOT DISTINCT, + ANY/ALL, and sublink membership checks. + + Raises `ArgumentError` for unrecognised AST nodes. + """ + @spec to_sql(term()) :: String.t() + def to_sql(ast) do + {sql, _prec} = to_sql_prec(ast) + sql + end + + # --- Private: precedence-aware SQL generation --- + # Each clause returns {sql_string, precedence_level} + + # Comparison operators + defp to_sql_prec(%Func{name: "\"=\"", args: [left, right]}), + do: binary_op(left, "=", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\"<>\"", args: [left, right]}), + do: binary_op(left, "<>", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\"<\"", args: [left, right]}), + do: binary_op(left, "<", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\">\"", args: [left, right]}), + do: binary_op(left, ">", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\"<=\"", args: [left, right]}), + do: binary_op(left, "<=", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\">=\"", args: [left, right]}), + do: binary_op(left, ">=", right, @prec_comparison) + + # Pattern matching + defp to_sql_prec(%Func{name: "\"~~\"", args: [left, right]}), + do: binary_op(left, "LIKE", right, @prec_like_in) + + defp to_sql_prec(%Func{name: "\"~~*\"", args: [left, right]}), + do: binary_op(left, "ILIKE", right, @prec_like_in) + + defp to_sql_prec(%Func{name: "\"!~~\"", args: [left, right]}), + do: binary_op(left, "NOT LIKE", right, @prec_like_in) + + defp to_sql_prec(%Func{name: "\"!~~*\"", args: [left, right]}), + do: binary_op(left, "NOT ILIKE", right, @prec_like_in) + + # Nullability — parser produces "is null"/"is not null" from constant folding + # and "IS_NULL"/"IS_NOT_NULL" from NullTest on column refs + defp to_sql_prec(%Func{name: name, args: [arg]}) when name in ["is null", "IS_NULL"], + do: postfix_op(arg, "IS NULL", @prec_is) + + defp to_sql_prec(%Func{name: name, args: [arg]}) when name in ["is not null", "IS_NOT_NULL"], + do: postfix_op(arg, "IS NOT NULL", @prec_is) + + # Boolean tests + defp to_sql_prec(%Func{name: "IS_TRUE", args: [arg]}), + do: postfix_op(arg, "IS TRUE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_NOT_TRUE", args: [arg]}), + do: postfix_op(arg, "IS NOT TRUE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_FALSE", args: [arg]}), + do: postfix_op(arg, "IS FALSE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_NOT_FALSE", args: [arg]}), + do: postfix_op(arg, "IS NOT FALSE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_UNKNOWN", args: [arg]}), + do: postfix_op(arg, "IS UNKNOWN", @prec_is) + + defp to_sql_prec(%Func{name: "IS_NOT_UNKNOWN", args: [arg]}), + do: postfix_op(arg, "IS NOT UNKNOWN", @prec_is) + + # Membership (IN with literal array) + defp to_sql_prec(%Func{name: "in", args: [left, %Array{elements: elements}]}) do + values = Enum.map_join(elements, ", ", &to_sql/1) + {"#{wrap(left, @prec_like_in)} IN (#{values})", @prec_like_in} + end + + # Sublink membership check (IN with subquery) — rendered as a placeholder + # since the actual subquery SQL is not in the AST + defp to_sql_prec(%Func{name: "sublink_membership_check", args: [left, %Ref{path: path}]}) do + sublink_ref = Enum.join(path, ".") + {"#{wrap(left, @prec_like_in)} IN (SELECT #{sublink_ref})", @prec_like_in} + end + + # Logical operators + defp to_sql_prec(%Func{name: "not", args: [inner]}), + do: prefix_op("NOT", inner, @prec_not) + + defp to_sql_prec(%Func{name: "and", args: args}) do + conditions = Enum.map_join(args, " AND ", &wrap(&1, @prec_and)) + {conditions, @prec_and} + end + + defp to_sql_prec(%Func{name: "or", args: args}) do + conditions = Enum.map_join(args, " OR ", &wrap(&1, @prec_or)) + {conditions, @prec_or} + end + + # DISTINCT / NOT DISTINCT — args are [left, right, comparison_func] + defp to_sql_prec(%Func{name: "values_distinct?", args: [left, right | _]}), + do: binary_op(left, "IS DISTINCT FROM", right, @prec_is) + + defp to_sql_prec(%Func{name: "values_not_distinct?", args: [left, right | _]}), + do: binary_op(left, "IS NOT DISTINCT FROM", right, @prec_is) + + # ANY / ALL — arg is a single Func with map_over_array_in_pos + defp to_sql_prec(%Func{name: "any", args: [%Func{} = inner]}) do + {op_sql, left, right} = extract_mapped_operator(inner) + {"#{wrap(left, @prec_comparison)} #{op_sql} ANY(#{to_sql(right)})", @prec_comparison} + end + + defp to_sql_prec(%Func{name: "all", args: [%Func{} = inner]}) do + {op_sql, left, right} = extract_mapped_operator(inner) + {"#{wrap(left, @prec_comparison)} #{op_sql} ALL(#{to_sql(right)})", @prec_comparison} + end + + # Arithmetic binary operators + defp to_sql_prec(%Func{name: "\"+\"", args: [left, right]}), + do: binary_op(left, "+", right, @prec_addition) + + defp to_sql_prec(%Func{name: "\"-\"", args: [left, right]}), + do: binary_op(left, "-", right, @prec_addition) + + defp to_sql_prec(%Func{name: "\"*\"", args: [left, right]}), + do: binary_op(left, "*", right, @prec_multiplication) + + defp to_sql_prec(%Func{name: "\"/\"", args: [left, right]}), + do: binary_op(left, "/", right, @prec_multiplication) + + defp to_sql_prec(%Func{name: "\"^\"", args: [left, right]}), + do: binary_op_right(left, "^", right, @prec_exponent) + + # Bitwise binary operators + defp to_sql_prec(%Func{name: "\"&\"", args: [left, right]}), + do: binary_op(left, "&", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"|\"", args: [left, right]}), + do: binary_op(left, "|", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"#\"", args: [left, right]}), + do: binary_op(left, "#", right, @prec_other_op) + + # Unary operators + defp to_sql_prec(%Func{name: "\"+\"", args: [arg]}), + do: prefix_op("+", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"-\"", args: [arg]}), + do: prefix_op("-", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"~\"", args: [arg]}), + do: prefix_op("~", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"|/\"", args: [arg]}), + do: prefix_op("|/", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"@\"", args: [arg]}), + do: prefix_op("@", arg, @prec_unary) + + # String concatenation + defp to_sql_prec(%Func{name: "\"||\"", args: [left, right]}), + do: binary_op(left, "||", right, @prec_other_op) + + # Array operators + defp to_sql_prec(%Func{name: "\"@>\"", args: [left, right]}), + do: binary_op(left, "@>", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"<@\"", args: [left, right]}), + do: binary_op(left, "<@", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"&&\"", args: [left, right]}), + do: binary_op(left, "&&", right, @prec_other_op) + + # Named functions (lower, upper, like, ilike, array_*, justify_*, timezone, casts, etc.) + # These are Func nodes where the name is a plain identifier (no quotes around operators) + defp to_sql_prec(%Func{name: name, args: args}) + when name in ~w(lower upper like ilike array_cat array_prepend array_append array_ndims + justify_days justify_hours justify_interval timezone + index_access slice_access) do + arg_list = Enum.map_join(args, ", ", &to_sql/1) + {"#{name}(#{arg_list})", @prec_atom} + end + + # Type cast functions (e.g., "int4_to_bool", "text_to_int4") + defp to_sql_prec(%Func{name: name, args: [arg]}) when is_binary(name) do + if String.contains?(name, "_to_") do + target_type = name |> String.split("_to_") |> List.last() + {"#{wrap(arg, @prec_cast)}::#{target_type}", @prec_cast} + else + raise ArgumentError, + "SqlGenerator.to_sql/1: unsupported AST node: %Func{name: #{inspect(name)}}. " <> + "This WHERE clause contains an operator or expression type that " <> + "cannot be converted back to SQL for active_conditions generation." + end + end + + # Column references + defp to_sql_prec(%Ref{path: path}) do + {Enum.map_join(path, ".", &~s|"#{&1}"|), @prec_atom} + end + + # Constants + defp to_sql_prec(%Const{value: nil}), do: {"NULL", @prec_atom} + defp to_sql_prec(%Const{value: true}), do: {"true", @prec_atom} + defp to_sql_prec(%Const{value: false}), do: {"false", @prec_atom} + + defp to_sql_prec(%Const{value: value}) when is_binary(value) do + escaped = String.replace(value, "'", "''") + {"'#{escaped}'", @prec_atom} + end + + defp to_sql_prec(%Const{value: value}) when is_integer(value) or is_float(value), + do: {"#{value}", @prec_atom} + + # Constant-folded arrays (parser evaluates e.g. ARRAY[1, 2] to %Const{value: [1, 2]}) + defp to_sql_prec(%Const{value: value}) when is_list(value) do + elements = Enum.map_join(value, ", ", &const_list_element_to_sql/1) + {"ARRAY[#{elements}]", @prec_atom} + end + + # Date/time/interval constants — the parser constant-folds typed literals + # (e.g. '2024-01-01'::date) into Const nodes with Elixir struct values. + defp to_sql_prec(%Const{value: %Date{} = d}), do: {"'#{Date.to_iso8601(d)}'::date", @prec_atom} + defp to_sql_prec(%Const{value: %Time{} = t}), do: {"'#{Time.to_iso8601(t)}'::time", @prec_atom} + + defp to_sql_prec(%Const{value: %NaiveDateTime{} = ndt}), + do: {"'#{NaiveDateTime.to_iso8601(ndt)}'::timestamp", @prec_atom} + + defp to_sql_prec(%Const{value: %DateTime{} = dt}), + do: {"'#{DateTime.to_iso8601(dt)}'::timestamptz", @prec_atom} + + defp to_sql_prec(%Const{value: %PgInterop.Interval{} = i}), + do: {"'#{PgInterop.Interval.format(i)}'::interval", @prec_atom} + + # Row expressions — e.g. ROW(a, b) or (a, b) in row comparisons + defp to_sql_prec(%RowExpr{elements: elements}) do + values = Enum.map_join(elements, ", ", &to_sql/1) + {"ROW(#{values})", @prec_atom} + end + + # Array literals + defp to_sql_prec(%Array{elements: elements}) do + values = Enum.map_join(elements, ", ", &to_sql/1) + {"ARRAY[#{values}]", @prec_atom} + end + + # Catch-all — fail loudly so unsupported operators are caught at shape + # creation time, not at query time. + defp to_sql_prec(other) do + raise ArgumentError, + "SqlGenerator.to_sql/1: unsupported AST node: #{inspect(other)}. " <> + "This WHERE clause contains an operator or expression type that " <> + "cannot be converted back to SQL for active_conditions generation." + end + + # --- Precedence helpers --- + + # Binary operator, left-associative: left child at prec, right child at prec+1 + defp binary_op(left, op, right, prec) do + {"#{wrap(left, prec)} #{op} #{wrap(right, prec + 1)}", prec} + end + + # Binary operator, right-associative: left child at prec+1, right child at prec + defp binary_op_right(left, op, right, prec) do + {"#{wrap(left, prec + 1)} #{op} #{wrap(right, prec)}", prec} + end + + # Prefix unary operator: operand at same prec (same-level nesting is fine) + defp prefix_op(op, operand, prec) do + {"#{op} #{wrap(operand, prec)}", prec} + end + + # Postfix unary operator: operand must be strictly higher precedence to avoid + # ambiguity (e.g. `x IS DISTINCT FROM y IS NULL` is ambiguous) + defp postfix_op(operand, op, prec) do + {"#{wrap(operand, prec + 1)} #{op}", prec} + end + + # Wrap an AST node in parens if its precedence is lower than the context + defp wrap(ast, context_prec) do + {sql, prec} = to_sql_prec(ast) + if prec < context_prec, do: "(#{sql})", else: sql + end + + # --- Unchanged helpers --- + + # Helper for rendering constant-folded array elements (plain Elixir values, not AST nodes) + defp const_list_element_to_sql(nil), do: "NULL" + defp const_list_element_to_sql(true), do: "true" + defp const_list_element_to_sql(false), do: "false" + + defp const_list_element_to_sql(value) when is_binary(value) do + escaped = String.replace(value, "'", "''") + "'#{escaped}'" + end + + defp const_list_element_to_sql(value) when is_integer(value) or is_float(value), + do: "#{value}" + + defp const_list_element_to_sql(value) when is_list(value) do + elements = Enum.map_join(value, ", ", &const_list_element_to_sql/1) + "ARRAY[#{elements}]" + end + + # Helper for ANY/ALL: extract the operator, left operand, and array right operand + # from a Func with map_over_array_in_pos set + defp extract_mapped_operator(%Func{name: name, args: [left, right]}) do + op_sql = + case name do + ~s|"="| -> "=" + ~s|"<>"| -> "<>" + ~s|"<"| -> "<" + ~s|">"| -> ">" + ~s|"<="| -> "<=" + ~s|">="| -> ">=" + ~s|"~~"| -> "LIKE" + ~s|"~~*"| -> "ILIKE" + other -> String.trim(other, "\"") + end + + {op_sql, left, right} + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 79fa09c5b8..544a221b92 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -1,12 +1,13 @@ defmodule Electric.Shapes.Consumer do use GenServer, restart: :temporary - alias Electric.Shapes.Consumer.ChangeHandling alias Electric.Shapes.Consumer.InitialSnapshot - alias Electric.Shapes.Consumer.MoveHandling - alias Electric.Shapes.Consumer.MoveIns alias Electric.Shapes.Consumer.PendingTxn alias Electric.Shapes.Consumer.State + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.Buffering + alias Electric.Shapes.Consumer.Subqueries.QueryRow + alias Electric.Shapes.DnfPlan import Electric.Shapes.Consumer.State, only: :macros require Electric.Replication.LogOffset @@ -16,6 +17,7 @@ defmodule Electric.Shapes.Consumer do alias Electric.Shapes.Consumer.Materializer alias Electric.Shapes.ConsumerRegistry alias Electric.LogItems + alias Electric.LsnTracker alias Electric.Postgres.Inspector alias Electric.Replication.Changes alias Electric.Replication.Changes.Transaction @@ -124,6 +126,7 @@ defmodule Electric.Shapes.Consumer do metadata = [shape_handle: shape_handle, stack_id: stack_id] Logger.metadata(metadata) Electric.Telemetry.Sentry.set_tags_context(metadata) + {:ok, _} = LsnTracker.subscribe_to_global_lsn_updates(stack_id) # Shape initialization will be complete when we receive a message {:initialize_shape, # , } which the ShapeCache is expected to send as soon as this process @@ -132,6 +135,62 @@ defmodule Electric.Shapes.Consumer do end @impl GenServer + def handle_continue({:init_consumer, config}, state) do + %{ + stack_id: stack_id, + shape_handle: shape_handle + } = state + + {:ok, shape} = ShapeCache.ShapeStatus.fetch_shape_by_handle(stack_id, shape_handle) + + state = State.initialize_shape(state, shape, config) + + stack_storage = ShapeCache.Storage.for_stack(stack_id) + storage = ShapeCache.Storage.for_shape(shape_handle, stack_storage) + + # TODO: Remove. Only needed for InMemoryStorage + case ShapeCache.Storage.start_link(storage) do + {:ok, _pid} -> :ok + :ignore -> :ok + end + + writer = ShapeCache.Storage.init_writer!(storage, shape) + + state = State.initialize(state, storage, writer) + + if all_materializers_alive?(state) do + state = initialize_subquery_runtime(state) + + if subscribe(state, config.action) do + Logger.debug("Writer for #{shape_handle} initialized") + + # We start the snapshotter even if there's a snapshot because it also performs the call + # to PublicationManager.add_shape/3. We *could* do that call here and avoid spawning a + # process if the shape already has a snapshot but the current semantics rely on being able + # to wait for the snapshot asynchronously and if we called publication manager here it would + # block and prevent await_snapshot_start calls from adding snapshot subscribers. + + {:ok, _pid} = + Shapes.DynamicConsumerSupervisor.start_snapshotter( + stack_id, + %{ + stack_id: stack_id, + shape: shape, + shape_handle: shape_handle, + storage: storage, + otel_ctx: config.otel_ctx + } + ) + + {:noreply, state} + else + stop_and_clean(state) + end + else + stop_and_clean(state) + end + end + def handle_continue(:stop_and_clean, state) do stop_and_clean(state) end @@ -243,28 +302,34 @@ defmodule Electric.Shapes.Consumer do state = State.initialize(state, storage, writer) - if all_materializers_alive?(state) && subscribe(state, opts.action) do - Logger.debug("Writer for #{shape_handle} initialized") - - # We start the snapshotter even if there's a snapshot because it also performs the call - # to PublicationManager.add_shape/3. We *could* do that call here and avoid spawning a - # process if the shape already has a snapshot but the current semantics rely on being able - # to wait for the snapshot asynchronously and if we called publication manager here it would - # block and prevent await_snapshot_start calls from adding snapshot subscribers. - - {:ok, _pid} = - Shapes.DynamicConsumerSupervisor.start_snapshotter( - stack_id, - %{ - stack_id: stack_id, - shape: shape, - shape_handle: shape_handle, - storage: storage, - otel_ctx: Map.get(opts, :otel_ctx, nil) - } - ) + if all_materializers_alive?(state) do + state = initialize_subquery_runtime(state) + + if subscribe(state, opts.action) do + Logger.debug("Writer for #{shape_handle} initialized") + + # We start the snapshotter even if there's a snapshot because it also performs the call + # to PublicationManager.add_shape/3. We *could* do that call here and avoid spawning a + # process if the shape already has a snapshot but the current semantics rely on being able + # to wait for the snapshot asynchronously and if we called publication manager here it would + # block and prevent await_snapshot_start calls from adding snapshot subscribers. + + {:ok, _pid} = + Shapes.DynamicConsumerSupervisor.start_snapshotter( + stack_id, + %{ + stack_id: stack_id, + shape: shape, + shape_handle: shape_handle, + storage: storage, + otel_ctx: Map.get(opts, :otel_ctx, nil) + } + ) - {:noreply, state} + {:noreply, state} + else + stop_and_clean(state) + end else stop_and_clean(state) end @@ -277,6 +342,16 @@ defmodule Electric.Shapes.Consumer do {:noreply, state, state.hibernate_after} end + def handle_info({:global_last_seen_lsn, _lsn} = event, state) do + case handle_event(event, state) do + %{terminating?: true} = state -> + {:noreply, state, {:continue, :stop_and_clean}} + + state -> + {:noreply, state, state.hibernate_after} + end + end + # This is part of the storage module contract - messages tagged storage should be applied to the writer state. def handle_info({ShapeCache.Storage, message}, state) do writer = ShapeCache.Storage.apply_message(state.writer, message) @@ -291,63 +366,57 @@ defmodule Electric.Shapes.Consumer do "Consumer reacting to #{length(move_in)} move ins and #{length(move_out)} move outs from its #{dep_handle} dependency" end) - feature_flags = Electric.StackConfig.lookup(state.stack_id, :feature_flags, []) - tagged_subqueries_enabled? = "tagged_subqueries" in feature_flags - - # We need to invalidate the consumer in the following cases: - # - tagged subqueries are disabled since we cannot support causally correct event processing of 3+ level dependency trees - # so we just invalidating this middle shape instead - # - the where clause has an OR combined with the subquery so we can't tell if the move ins/outs actually affect the shape or not - # - the where clause has a NOT combined with the subquery (e.g. NOT IN) since move-in to the subquery - # should cause move-out from the outer shape, which isn't implemented - # - the shape has multiple subqueries at the same level since we can't correctly determine - # which dependency caused the move-in/out - should_invalidate? = - not tagged_subqueries_enabled? or state.or_with_subquery? or state.not_with_subquery? or - length(state.shape.shape_dependencies) > 1 + # Invalidate if subquery runtime was not initialized because the DNF plan + # failed to compile. All supported subquery shapes, including negated ones, + # should have an active subquery runtime. + should_invalidate? = is_nil(state.subquery_state) if should_invalidate? do stop_and_clean(state) else - {state, notification} = - state - |> MoveHandling.process_move_ins(dep_handle, move_in) - |> MoveHandling.process_move_outs(dep_handle, move_out) + {state, notification, _num_changes, _total_size} = + apply_subquery_event( + state, + {:materializer_changes, dep_handle, %{move_in: move_in, move_out: move_out}} + ) - :ok = notify_new_changes(state, notification) + if notification do + :ok = notify_new_changes(state, notification) + end - {:noreply, state} + {:noreply, state, state.hibernate_after} end end - def handle_info({:pg_snapshot_known, name, snapshot}, state) do - Logger.debug(fn -> "Snapshot known for move-in #{name}" end) + def handle_info({:pg_snapshot_known, snapshot}, state) do + Logger.debug(fn -> "Snapshot known for active move-in" end) - # Update the snapshot in waiting_move_ins - move_handling_state = MoveIns.set_snapshot(state.move_handling_state, name, snapshot) + {state, notification, _num_changes, _total_size} = + apply_subquery_event(state, {:pg_snapshot_known, snapshot}) - # Garbage collect touches visible in all known snapshots - state = %{state | move_handling_state: move_handling_state} - state = State.gc_touch_tracker(state) + if notification do + :ok = notify_new_changes(state, notification) + end {:noreply, state, state.hibernate_after} end - def handle_info({:query_move_in_complete, name, key_set, snapshot}, state) do + def handle_info({:query_move_in_complete, rows, move_in_lsn}, state) do Logger.debug(fn -> - "Consumer query move in complete for #{name} with #{length(key_set)} keys" + "Consumer query move in complete for #{state.shape_handle} with #{length(rows)} rows" end) - {state, notification} = MoveHandling.query_complete(state, name, key_set, snapshot) - :ok = notify_new_changes(state, notification) + {state, notification, _num_changes, _total_size} = + apply_subquery_event(state, {:query_move_in_complete, rows, move_in_lsn}) - # Garbage collect touches after query completes (no buffer consumption needed) - state = State.gc_touch_tracker(state) + if notification do + :ok = notify_new_changes(state, notification) + end {:noreply, state, state.hibernate_after} end - def handle_info({:query_move_in_error, _, error, stacktrace}, state) do + def handle_info({:query_move_in_error, error, stacktrace}, state) do Logger.error( "Error querying move in for #{state.shape_handle}: #{Exception.format(:error, error, stacktrace)}" ) @@ -465,6 +534,19 @@ defmodule Electric.Shapes.Consumer do |> mark_for_removal() end + defp handle_event({:global_last_seen_lsn, _lsn}, %{subquery_state: nil} = state), + do: state + + defp handle_event({:global_last_seen_lsn, _lsn} = event, state) do + {state, notification, _num_changes, _total_size} = apply_subquery_event(state, event) + + if notification do + :ok = notify_new_changes(state, notification) + end + + state + end + defp handle_event(%TransactionFragment{} = txn_fragment, state) do Logger.debug(fn -> "Txn fragment received in Shapes.Consumer: #{inspect(txn_fragment)}" end) handle_txn_fragment(txn_fragment, state) @@ -649,7 +731,7 @@ defmodule Electric.Shapes.Consumer do end end - defp convert_fragment_changes(changes, stack_id, shape_handle, shape) do + defp convert_fragment_changes(changes, stack_id, shape_handle, shape, extra_refs \\ nil) do Enum.reduce_while(changes, {[], 0}, fn %Changes.TruncatedRelation{}, _acc -> {:halt, :includes_truncate} @@ -658,7 +740,11 @@ defmodule Electric.Shapes.Consumer do # Apply Shape.convert_change to each change to: # 1. Filter out changes not matching the shape's table # 2. Apply WHERE clause filtering - case Shape.convert_change(shape, change, stack_id: stack_id, shape_handle: shape_handle) do + case Shape.convert_change(shape, change, + stack_id: stack_id, + shape_handle: shape_handle, + extra_refs: extra_refs + ) do [] -> {:cont, acc} @@ -780,49 +866,84 @@ defmodule Electric.Shapes.Consumer do defp do_handle_txn(%Transaction{xid: xid, changes: changes} = txn, state) do %{shape: shape, writer: writer} = state - state = State.remove_completed_move_ins(state, txn) + if is_nil(state.subquery_state) do + case convert_txn_changes(changes, xid, state) do + :includes_truncate -> + handle_txn_with_truncate(txn.xid, state) - extra_refs_full = - Materializer.get_all_as_refs(shape, state.stack_id) + {[], 0} -> + Logger.debug(fn -> + "No relevant changes found for #{inspect(shape)} in txn #{txn.xid}" + end) - extra_refs_before_move_ins = - Enum.reduce(state.move_handling_state.in_flight_values, extra_refs_full, fn {key, value}, - acc -> - if is_map_key(acc, key), - do: Map.update!(acc, key, &MapSet.difference(&1, value)), - else: acc - end) + consider_flushed(state, txn.last_log_offset) - Logger.debug(fn -> "Extra refs: #{inspect(extra_refs_before_move_ins)}" end) + {[], 0, _} -> + Logger.debug(fn -> + "No relevant changes found for #{inspect(shape)} in txn #{txn.xid}" + end) - case ChangeHandling.process_changes( - changes, - state, - %{xid: xid, extra_refs: {extra_refs_before_move_ins, extra_refs_full}} - ) do - :includes_truncate -> - handle_txn_with_truncate(txn.xid, state) + consider_flushed(state, txn.last_log_offset) - {_, state, 0, _} -> - Logger.debug(fn -> - "No relevant changes found for #{inspect(shape)} in txn #{txn.xid}" - end) + {changes, num_changes, last_log_offset} -> + timestamp = System.monotonic_time() - consider_flushed(state, txn.last_log_offset) + {lines, total_size} = prepare_log_entries(changes, xid, shape) + writer = ShapeCache.Storage.append_to_log!(lines, writer) - {changes, state, num_changes, last_log_offset} -> - timestamp = System.monotonic_time() + OpenTelemetry.add_span_attributes(%{ + num_bytes: total_size, + actual_num_changes: num_changes + }) + + updated_state = %{ + state + | writer: writer, + latest_offset: last_log_offset, + txn_offset_mapping: + state.txn_offset_mapping ++ [{last_log_offset, txn.last_log_offset}] + } - {lines, total_size} = prepare_log_entries(changes, xid, shape) - writer = ShapeCache.Storage.append_to_log!(lines, writer) + :ok = notify_new_changes(updated_state, changes, last_log_offset) + + lag = calculate_replication_lag(txn.commit_timestamp) + OpenTelemetry.add_span_attributes(replication_lag: lag) + + Electric.Telemetry.OpenTelemetry.execute( + [:electric, :storage, :transaction_stored], + %{ + duration: System.monotonic_time() - timestamp, + bytes: total_size, + count: 1, + operations: num_changes, + replication_lag: lag + }, + Map.new(State.telemetry_attrs(updated_state)) + ) + + updated_state + end + else + handle_txn_with_subqueries(txn, state) + end + end + + defp handle_txn_with_subqueries(%Transaction{} = txn, state) do + timestamp = System.monotonic_time() + was_buffering = match?(%Buffering{}, state.subquery_state) + + {state, notification, num_changes, total_size} = + apply_subquery_event(state, txn, default_xid: txn.xid) + + cond do + notification -> + :ok = notify_new_changes(state, notification) OpenTelemetry.add_span_attributes(%{ num_bytes: total_size, actual_num_changes: num_changes }) - :ok = notify_new_changes(state, changes, last_log_offset) - lag = calculate_replication_lag(txn.commit_timestamp) OpenTelemetry.add_span_attributes(replication_lag: lag) @@ -838,16 +959,234 @@ defmodule Electric.Shapes.Consumer do Map.new(State.telemetry_attrs(state)) ) - %{ - state - | writer: writer, - latest_offset: last_log_offset, - txn_offset_mapping: - state.txn_offset_mapping ++ [{last_log_offset, txn.last_log_offset}] - } + state + + was_buffering or match?(%Buffering{}, state.subquery_state) -> + state + + true -> + Logger.debug(fn -> + "No relevant changes found for #{inspect(state.shape)} in txn #{txn.xid}" + end) + + consider_flushed(state, txn.last_log_offset) end end + defp convert_txn_changes(changes, _xid, state) do + case convert_fragment_changes( + changes, + state.stack_id, + state.shape_handle, + state.shape, + txn_conversion_extra_refs(state) + ) do + :includes_truncate -> + :includes_truncate + + {[], 0} -> + {[], 0, nil} + + {reversed_changes, num_changes, last_log_offset} -> + converted_changes = + reversed_changes + |> maybe_mark_last_change(%{}) + |> Enum.reverse() + + {converted_changes, num_changes, last_log_offset} + + result -> + result + end + end + + defp txn_conversion_extra_refs(%State{shape: shape, stack_id: stack_id, subquery_state: nil}) do + if Shape.are_deps_filled(shape) do + refs = Materializer.get_all_as_refs(shape, stack_id) + {refs, refs} + end + end + + defp txn_conversion_extra_refs(_state), do: nil + + defp apply_subquery_event(state, event, opts \\ []) do + previous_subquery_state = state.subquery_state + {outputs, subquery_state} = Subqueries.handle_event(state.subquery_state, event) + + state = %{state | subquery_state: subquery_state} + state = maybe_start_move_in_query(state, previous_subquery_state) + + case append_subquery_outputs(state, outputs, event, previous_subquery_state, opts) do + {state, nil, 0, 0} -> + {state, nil, 0, 0} + + {state, range, num_changes, total_size} -> + {state, {range, state.latest_offset}, num_changes, total_size} + end + end + + defp maybe_start_move_in_query( + %{subquery_state: %Buffering{} = buffering_state} = state, + _previous_subquery_state + ) do + if should_start_move_in_query?(buffering_state) do + Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) + |> Subqueries.query_move_in_async(state, buffering_state, self()) + + %{state | subquery_state: %{buffering_state | query_started?: true}} + else + state + end + end + + defp maybe_start_move_in_query(state, _previous_subquery_state), do: state + + defp should_start_move_in_query?(%Buffering{query_started?: false}), do: true + defp should_start_move_in_query?(_), do: false + + defp append_subquery_outputs(state, [], event, previous_subquery_state, _opts) do + state = finalize_subquery_flush_tracking(state, event, previous_subquery_state, nil) + {state, nil, 0, 0} + end + + defp append_subquery_outputs(state, outputs, event, previous_subquery_state, opts) do + outputs = insert_move_in_snapshot_end(outputs) + previous_offset = state.latest_offset + + {state, latest_offset, total_size, num_changes} = + Enum.reduce(outputs, {state, previous_offset, 0, 0}, fn output, + {state, current_offset, size, count} -> + case output do + %QueryRow{key: key, json: json} -> + json = IO.iodata_to_binary(json) + offset = LogOffset.increment(current_offset) + + writer = + ShapeCache.Storage.append_to_log!([{offset, key, :insert, json}], state.writer) + + {%{state | writer: writer, latest_offset: offset}, offset, size + byte_size(json), + count + 1} + + %{headers: %{control: _}} = control_message -> + append_control_message_output(state, size, count, control_message) + + %{headers: %{event: _}} = control_message -> + append_control_message_output(state, size, count, control_message) + + %Changes.TruncatedRelation{} -> + raise ArgumentError, "unexpected truncate emitted from subquery state machine" + + %Changes.NewRecord{} = change -> + append_change_output(state, size, count, change, opts[:default_xid]) + + %Changes.UpdatedRecord{} = change -> + append_change_output(state, size, count, change, opts[:default_xid]) + + %Changes.DeletedRecord{} = change -> + append_change_output(state, size, count, change, opts[:default_xid]) + end + end) + + state = + finalize_subquery_flush_tracking(state, event, previous_subquery_state, latest_offset) + + {state, {previous_offset, latest_offset}, num_changes, total_size} + end + + defp append_control_message_output(state, size, count, control_message) do + encoded = Jason.encode!(control_message) + + {{_, offset}, writer} = + ShapeCache.Storage.append_control_message!(encoded, state.writer) + + {%{state | writer: writer, latest_offset: offset}, offset, size + byte_size(encoded), + count + 1} + end + + defp append_change_output(state, size, count, change, xid) do + lines = + change + |> LogItems.from_change( + xid, + Shape.pk(state.shape, change.relation), + state.shape.replica + ) + |> Enum.map(fn {offset, %{key: key} = log_item} -> + {offset, key, log_item.headers.operation, Jason.encode!(log_item)} + end) + + writer = ShapeCache.Storage.append_to_log!(lines, state.writer) + last_offset = lines |> List.last() |> elem(0) + size_increase = Enum.reduce(lines, 0, fn {_, _, _, json}, acc -> acc + byte_size(json) end) + + {%{state | writer: writer, latest_offset: last_offset}, last_offset, size + size_increase, + count + 1} + end + + defp finalize_subquery_flush_tracking(state, event, previous_subquery_state, latest_offset) do + case {previous_subquery_state, state.subquery_state, event} do + {%Buffering{move_in_values: move_in_values}, %Buffering{move_in_values: move_in_values}, + _event} -> + state + + {%Buffering{} = buffering_state, _current_subquery_state, _event} -> + buffered_txns = buffered_txns_for_flush(buffering_state, event) + maybe_track_buffered_flush(state, buffered_txns, latest_offset) + + {_subquery_state, _current_subquery_state, %Transaction{last_log_offset: last_log_offset}} -> + maybe_track_txn_flush(state, last_log_offset, latest_offset) + + _ -> + state + end + end + + defp buffered_txns_for_flush(%Buffering{buffered_txns: buffered_txns}, %Transaction{} = txn), + do: buffered_txns ++ [txn] + + defp buffered_txns_for_flush(%Buffering{buffered_txns: buffered_txns}, _event), + do: buffered_txns + + defp maybe_track_buffered_flush(state, [], _latest_offset), do: state + + defp maybe_track_buffered_flush(state, buffered_txns, nil) do + buffered_txns + |> List.last() + |> then(&consider_flushed(state, &1.last_log_offset)) + end + + defp maybe_track_buffered_flush(state, buffered_txns, latest_offset) do + last_log_offset = buffered_txns |> List.last() |> Map.fetch!(:last_log_offset) + + %{ + state + | txn_offset_mapping: state.txn_offset_mapping ++ [{latest_offset, last_log_offset}] + } + end + + defp insert_move_in_snapshot_end(outputs) do + {before_query_rows, remaining} = Enum.split_while(outputs, &(not match?(%QueryRow{}, &1))) + + case Enum.split_while(remaining, &match?(%QueryRow{}, &1)) do + {[], _rest} -> + outputs + + {query_rows, rest} -> + before_query_rows ++ query_rows ++ [snapshot_end_control_message()] ++ rest + end + end + + defp snapshot_end_control_message do + %{headers: %{control: "snapshot-end"}} + end + + defp maybe_track_txn_flush(state, last_log_offset, nil), + do: consider_flushed(state, last_log_offset) + + defp maybe_track_txn_flush(state, last_log_offset, latest_offset) do + %{state | txn_offset_mapping: state.txn_offset_mapping ++ [{latest_offset, last_log_offset}]} + end + defp handle_txn_with_truncate(xid, state) do # TODO: This is a very naive way to handle truncations: if ANY relevant truncates are # present in the transaction, we're considering the whole transaction empty, and @@ -1013,6 +1352,45 @@ defmodule Electric.Shapes.Consumer do end end + defp initialize_subquery_runtime( + %State{shape: %Shape{shape_dependencies_handles: dep_handles}} = state + ) + when dep_handles != [] do + case DnfPlan.compile(state.shape) do + {:ok, dnf_plan} -> + # Initialize the DNF runtime for all supported subquery shapes. + {views, dep_handle_to_ref} = + dep_handles + |> Enum.with_index() + |> Enum.reduce({%{}, %{}}, fn {handle, index}, {views, mapping} -> + materializer_opts = %{stack_id: state.stack_id, shape_handle: handle} + :ok = Materializer.wait_until_ready(materializer_opts) + view = Materializer.get_link_values(materializer_opts) + ref = ["$sublink", Integer.to_string(index)] + {Map.put(views, ref, view), Map.put(mapping, handle, {index, ref})} + end) + + %{ + state + | subquery_state: + Subqueries.new( + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + dnf_plan: dnf_plan, + views: views, + dependency_handle_to_ref: dep_handle_to_ref + ) + } + + _other -> + # :no_subqueries or {:error, _} - no subquery runtime needed + state + end + end + + defp initialize_subquery_runtime(state), do: state + defp all_materializers_alive?(state) do Enum.all?(state.shape.shape_dependencies_handles, fn shape_handle -> name = Materializer.name(state.stack_id, shape_handle) diff --git a/packages/sync-service/lib/electric/shapes/consumer/change_handling.ex b/packages/sync-service/lib/electric/shapes/consumer/change_handling.ex deleted file mode 100644 index c7c81ed8d5..0000000000 --- a/packages/sync-service/lib/electric/shapes/consumer/change_handling.ex +++ /dev/null @@ -1,144 +0,0 @@ -defmodule Electric.Shapes.Consumer.ChangeHandling do - alias Electric.Shapes.Consumer.MoveIns - alias Electric.Replication.Eval.Runner - alias Electric.Shapes.Shape - alias Electric.Shapes.WhereClause - alias Electric.Replication.LogOffset - alias Electric.LogItems - alias Electric.Shapes.Consumer.State - alias Electric.Shapes.Consumer - alias Electric.Replication.Changes - - require Electric.Shapes.Shape - - @spec process_changes(list(Changes.change()), State.t(), context) :: - {filtered_changes :: list(Changes.change()), state :: State.t(), - count :: non_neg_integer(), last_log_offset :: LogOffset.t() | nil} - | :includes_truncate - when context: map() - def process_changes(changes, state, ctx) - when is_map_key(ctx, :xid) do - do_process_changes(changes, state, ctx, [], 0) - end - - def do_process_changes(changes, state, ctx, acc, count) - - def do_process_changes([], state, _, _, 0), do: {[], state, 0, nil} - - def do_process_changes([], state, _, [head | tail], total_ops), - do: - {Enum.reverse([%{head | last?: true} | tail]), state, total_ops, - LogItems.expected_offset_after_split(head)} - - def do_process_changes([%Changes.TruncatedRelation{} | _], _, _, _, _), do: :includes_truncate - - # We're special casing processing without dependencies, as it's very common so we can optimize it. - def do_process_changes([change | rest], %State{shape: shape} = state, ctx, acc, count) - when not Shape.has_dependencies(shape) do - case Shape.convert_change(shape, change, - stack_id: state.stack_id, - shape_handle: state.shape_handle, - extra_refs: ctx.extra_refs - ) do - [] -> - do_process_changes(rest, state, ctx, acc, count) - - [change] -> - state = State.track_change(state, ctx.xid, change) - do_process_changes(rest, state, ctx, [change | acc], count + 1) - end - end - - def do_process_changes( - [change | rest], - %State{shape: shape, stack_id: stack_id, shape_handle: shape_handle} = state, - ctx, - acc, - count - ) do - if not change_visible_in_resolved_move_ins?(change, state, ctx) and - not change_will_be_covered_by_move_in?(change, state, ctx) do - case Shape.convert_change(shape, change, - stack_id: stack_id, - shape_handle: shape_handle, - extra_refs: ctx.extra_refs - ) do - [] -> - do_process_changes(rest, state, ctx, acc, count) - - [change] -> - state = State.track_change(state, ctx.xid, change) - do_process_changes(rest, state, ctx, [change | acc], count + 1) - end - else - do_process_changes(rest, state, ctx, acc, count) - end - end - - defp change_visible_in_resolved_move_ins?(change, state, ctx) do - Consumer.MoveIns.change_already_visible?(state.move_handling_state, ctx.xid, change) - end - - defp change_will_be_covered_by_move_in?(%Changes.DeletedRecord{}, _, _), do: false - - defp change_will_be_covered_by_move_in?(change, state, ctx) do - # First check if the new record's sublink values are in pending move-ins - referenced_values = get_referenced_values(change, state) - - if change_visible_in_unresolved_move_ins_for_values?(referenced_values, state, ctx) do - # For UpdatedRecords where the sublink value changed, we must NOT skip the change. - # The move-in query will return this row as an INSERT, which doesn't carry - # removed_move_tags. Without the tag transition from the WAL change, the client - # will retain the old tag, causing the row to not be properly cleaned up on - # subsequent move-outs. - if is_struct(change, Changes.UpdatedRecord) and - sublink_value_changed?(change, state) do - false - else - # Even if the sublink value is in a pending move-in, we should only skip - # this change if the new record actually matches the full WHERE clause. - # The move-in query uses the full WHERE clause, so if the record doesn't - # match other non-subquery conditions in the WHERE clause, the move-in - # won't return this row and we need to process this change normally. - case ctx.extra_refs do - {_extra_refs_old, extra_refs_new} -> - WhereClause.includes_record?(state.shape.where, change.record, extra_refs_new) - - _ -> - # If extra_refs is not a tuple (e.g., empty map in tests), fall back to - # the old behavior of skipping the change - true - end - end - else - false - end - end - - defp sublink_value_changed?( - %Changes.UpdatedRecord{record: new_record, old_record: old_record}, - state - ) do - Enum.any?(state.shape.subquery_comparison_expressions, fn {_path, expr} -> - {:ok, new_value} = Runner.execute_for_record(expr, new_record) - {:ok, old_value} = Runner.execute_for_record(expr, old_record) - new_value != old_value - end) - end - - defp get_referenced_values(change, state) do - state.shape.subquery_comparison_expressions - |> Map.new(fn {path, expr} -> - {:ok, value} = Runner.execute_for_record(expr, change.record) - {path, value} - end) - end - - defp change_visible_in_unresolved_move_ins_for_values?(referenced_values, state, ctx) do - MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state.move_handling_state, - referenced_values, - ctx.xid - ) - end -end diff --git a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex index 69164b3ad6..d71dffc8e5 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex @@ -1,13 +1,4 @@ defmodule Electric.Shapes.Consumer.Materializer do - # TODOS: - # - [x] Keep lockstep with the consumer - # - [ ] Think about initial materialization needing to finish before we can continue - # - [ ] - # - [ ] Use the `get_link_values` - - # NOTES: - # - Consumer does txn buffering until pg snapshot is known - # The lifecycle of a materializer is linked to its source consumer. If the consumer # goes down for any reason other than a clean supervisor/stack shutdown then we # need to invalidate all dependent outer shapes. @@ -57,11 +48,46 @@ defmodule Electric.Shapes.Consumer.Materializer do GenServer.call(name(state), :wait_until_ready, :infinity) end - def get_link_values(opts) do + @doc """ + Creates the per-stack ETS table that caches link values for all materializers + in a stack. Called by `ConsumerRegistry` during stack initialization. Idempotent — + safe to call when the table already exists. + """ + @spec init_link_values_table(stack_id :: term()) :: :ets.table() | :undefined + def init_link_values_table(stack_id) do + :ets.new(link_values_table_name(stack_id), [ + :named_table, + :public, + :set, + read_concurrency: true, + write_concurrency: true + ]) + rescue + ArgumentError -> :ets.whereis(link_values_table_name(stack_id)) + end + + @doc """ + Returns the current set of materialized link values for a shape. + Checks the shared ETS cache first (written after each committed transaction); + falls back to a synchronous GenServer call if the cache has no entry yet. + """ + def get_link_values(%{stack_id: stack_id, shape_handle: shape_handle} = opts) do + table = link_values_table_name(stack_id) + + case :ets.lookup(table, shape_handle) do + [{^shape_handle, values}] -> values + _ -> genserver_get_link_values(opts) + end + rescue + ArgumentError -> genserver_get_link_values(opts) + end + + defp genserver_get_link_values(opts) do GenServer.call(name(opts), :get_link_values) catch - :exit, _reason -> - raise ~s|Materializer for stack "#{opts.stack_id}" and handle "#{opts.shape_handle}" is not available| + :exit, reason -> + raise "Materializer for stack #{inspect(opts.stack_id)} and handle " <> + "#{inspect(opts.shape_handle)} is not available: #{inspect(reason)}" end def get_all_as_refs(shape, stack_id) when are_deps_filled(shape) do @@ -149,6 +175,8 @@ defmodule Electric.Shapes.Consumer.Materializer do |> decode_json_stream() |> apply_changes(state) + write_link_values(state) + {:noreply, %{state | offset: offset}} end @@ -170,9 +198,7 @@ defmodule Electric.Shapes.Consumer.Materializer do end def handle_call(:get_link_values, _from, %{value_counts: value_counts} = state) do - values = MapSet.new(Map.keys(value_counts)) - - {:reply, values, state} + {:reply, link_values_from_counts(value_counts), state} end def handle_call(:wait_until_ready, _from, state) do @@ -232,6 +258,52 @@ defmodule Electric.Shapes.Consumer.Materializer do {:noreply, %{state | subscribers: MapSet.delete(state.subscribers, pid)}} end + @spec link_values_table_name(Electric.stack_id()) :: atom() + def link_values_table_name(stack_id) do + :"Electric.Materializer.LinkValues:#{stack_id}" + end + + @doc """ + Removes the cached link values for `shape_handle` from the shared ETS table. + Safe to call even if the table does not exist (e.g. after a stack shutdown). + """ + @spec delete_link_values(Electric.stack_id(), Electric.shape_handle()) :: :ok + def delete_link_values(stack_id, shape_handle) do + :ets.delete(link_values_table_name(stack_id), shape_handle) + :ok + rescue + ArgumentError -> + Logger.debug(fn -> + "delete_link_values: link-values table for stack #{inspect(stack_id)} " <> + "not found when deleting handle #{inspect(shape_handle)}" + end) + + :ok + end + + defp link_values_from_counts(value_counts) do + MapSet.new(Map.keys(value_counts)) + end + + defp write_link_values(%{ + stack_id: stack_id, + shape_handle: shape_handle, + value_counts: value_counts + }) do + :ets.insert( + link_values_table_name(stack_id), + {shape_handle, link_values_from_counts(value_counts)} + ) + rescue + ArgumentError -> + Logger.warning( + "write_link_values: link-values ETS table missing for stack #{inspect(stack_id)} " <> + "— cache will fall back to GenServer calls for handle #{inspect(shape_handle)}" + ) + + :ok + end + defp decode_json_stream(stream) do stream |> Stream.map(&Jason.decode!/1) @@ -246,21 +318,28 @@ defmodule Electric.Shapes.Consumer.Materializer do } -> case operation do "insert" -> - %Changes.NewRecord{key: key, record: value, move_tags: Map.get(headers, "tags", [])} + %Changes.NewRecord{ + key: key, + record: value, + move_tags: Map.get(headers, "tags", []), + active_conditions: Map.get(headers, "active_conditions", []) + } "update" -> %Changes.UpdatedRecord{ key: key, record: value, move_tags: Map.get(headers, "tags", []), - removed_move_tags: Map.get(headers, "removed_tags", []) + removed_move_tags: Map.get(headers, "removed_tags", []), + active_conditions: Map.get(headers, "active_conditions", []) } "delete" -> %Changes.DeletedRecord{ key: key, old_record: value, - move_tags: Map.get(headers, "tags", []) + move_tags: Map.get(headers, "tags", []), + active_conditions: Map.get(headers, "active_conditions", []) } end @@ -271,6 +350,14 @@ defmodule Electric.Shapes.Consumer.Materializer do end) %{headers: %{event: "move-out", patterns: patterns}} + + %{"headers" => %{"event" => "move-in", "patterns" => patterns}} -> + patterns = + Enum.map(patterns, fn %{"pos" => pos, "value" => value} -> + %{pos: pos, value: value} + end) + + %{headers: %{event: "move-in", patterns: patterns}} end) end @@ -318,6 +405,8 @@ defmodule Electric.Shapes.Consumer.Materializer do end end + write_link_values(state) + %{state | pending_events: %{}} end @@ -367,20 +456,41 @@ defmodule Electric.Shapes.Consumer.Materializer do changes, {{state.index, state.tag_indices}, {state.value_counts, []}}, fn - %Changes.NewRecord{key: key, record: record, move_tags: move_tags}, + %Changes.NewRecord{ + key: key, + record: record, + move_tags: move_tags, + active_conditions: ac + }, {{index, tag_indices}, counts_and_events} -> {value, original_string} = cast!(record, state) if is_map_key(index, key), do: raise("Key #{key} already exists") - index = Map.put(index, key, value) + included? = evaluate_inclusion(move_tags, ac) + + index = + Map.put(index, key, %{ + value: value, + tags: move_tags, + active_conditions: ac, + included?: included? + }) + tag_indices = add_row_to_tag_indices(tag_indices, key, move_tags) - {{index, tag_indices}, increment_value(counts_and_events, value, original_string)} + + counts_and_events = + if included?, + do: increment_value(counts_and_events, value, original_string), + else: counts_and_events + + {{index, tag_indices}, counts_and_events} %Changes.UpdatedRecord{ key: key, old_key: old_key, record: record, move_tags: move_tags, - removed_move_tags: removed_move_tags + removed_move_tags: removed_move_tags, + active_conditions: ac }, {{index, tag_indices}, counts_and_events} -> # When the primary key doesn't change, old_key may be nil; default to key @@ -390,46 +500,97 @@ defmodule Electric.Shapes.Consumer.Materializer do columns_present = Enum.any?(state.columns, &is_map_key(record, &1)) has_tag_updates = removed_move_tags != [] pk_changed = old_key != key + has_ac_update = ac != [] and is_map_key(index, old_key) + + if columns_present or has_tag_updates or has_ac_update or pk_changed do + old_entry = Map.fetch!(index, old_key) - if columns_present or has_tag_updates or pk_changed do - # When PK changes, old_key must be removed from all tag indices it - # belongs to (both removed and retained tags), not just removed_move_tags + # When the primary key changes, re-index every existing tag for the new key. tags_to_remove = if pk_changed, - do: removed_move_tags ++ move_tags, + do: old_entry.tags, else: removed_move_tags + new_tags = + if has_tag_updates or move_tags != [], do: move_tags, else: old_entry.tags + + new_ac = if ac != [], do: ac, else: old_entry.active_conditions + new_included? = evaluate_inclusion(new_tags, new_ac) + tag_indices = tag_indices |> remove_row_from_tag_indices(old_key, tags_to_remove) - |> add_row_to_tag_indices(key, move_tags) + |> add_row_to_tag_indices(key, new_tags) if columns_present do {value, original_string} = cast!(record, state) - {old_value, index} = Map.pop!(index, old_key) - index = Map.put(index, key, value) - - # Skip decrement/increment dance if value hasn't changed to avoid - # spurious move_out/move_in events when only the tag changed - if old_value == value do - {{index, tag_indices}, counts_and_events} - else - {{index, tag_indices}, - counts_and_events - |> decrement_value(old_value, value_to_string(old_value, state)) - |> increment_value(value, original_string)} + old_value = old_entry.value + + index = + index + |> Map.delete(old_key) + |> Map.put(key, %{ + value: value, + tags: new_tags, + active_conditions: new_ac, + included?: new_included? + }) + + cond do + old_entry.included? and new_included? and old_value != value -> + {{index, tag_indices}, + counts_and_events + |> decrement_value(old_value, value_to_string(old_value, state)) + |> increment_value(value, original_string)} + + old_entry.included? and not new_included? -> + {{index, tag_indices}, + decrement_value( + counts_and_events, + old_value, + value_to_string(old_value, state) + )} + + not old_entry.included? and new_included? -> + {{index, tag_indices}, + increment_value(counts_and_events, value, original_string)} + + true -> + # Skip decrement/increment dance if value hasn't changed to avoid + # spurious move_out/move_in events when only the tag changed + {{index, tag_indices}, counts_and_events} end else - # PK changed but tracked column not in record — re-key the index entry index = - if pk_changed do - {value, index} = Map.pop!(index, old_key) - Map.put(index, key, value) - else - index - end - - {{index, tag_indices}, counts_and_events} + index + |> Map.delete(old_key) + |> Map.put(key, %{ + old_entry + | tags: new_tags, + active_conditions: new_ac, + included?: new_included? + }) + + cond do + old_entry.included? and not new_included? -> + {{index, tag_indices}, + decrement_value( + counts_and_events, + old_entry.value, + value_to_string(old_entry.value, state) + )} + + not old_entry.included? and new_included? -> + {{index, tag_indices}, + increment_value( + counts_and_events, + old_entry.value, + value_to_string(old_entry.value, state) + )} + + true -> + {{index, tag_indices}, counts_and_events} + end end else # Nothing relevant to this materializer has been updated @@ -438,22 +599,43 @@ defmodule Electric.Shapes.Consumer.Materializer do %Changes.DeletedRecord{key: key, move_tags: move_tags}, {{index, tag_indices}, counts_and_events} -> - {value, index} = Map.pop!(index, key) - + {entry, index} = Map.pop!(index, key) tag_indices = remove_row_from_tag_indices(tag_indices, key, move_tags) - {{index, tag_indices}, - decrement_value(counts_and_events, value, value_to_string(value, state))} - - %{headers: %{event: "move-out", patterns: patterns}}, - {{index, tag_indices}, counts_and_events} -> - {keys, tag_indices} = pop_keys_from_tag_indices(tag_indices, patterns) + if entry.included? do + {{index, tag_indices}, + decrement_value( + counts_and_events, + entry.value, + value_to_string(entry.value, state) + )} + else + {{index, tag_indices}, counts_and_events} + end - {index, counts_and_events} = - Enum.reduce(keys, {index, counts_and_events}, fn key, {index, counts_and_events} -> - {value, index} = Map.pop!(index, key) - {index, decrement_value(counts_and_events, value, value_to_string(value, state))} - end) + %{headers: %{event: event, patterns: patterns}}, + {{index, tag_indices}, counts_and_events} + when event in ["move-out", "move-in"] -> + new_condition = event == "move-in" + affected = collect_affected_keys(tag_indices, patterns) + + {{index, tag_indices}, counts_and_events} = + Enum.reduce( + affected, + {{index, tag_indices}, counts_and_events}, + fn {key, matched_positions}, acc -> + entry = Map.fetch!(index, key) + + process_move_event( + entry, + key, + matched_positions, + new_condition, + acc, + state + ) + end + ) {{index, tag_indices}, counts_and_events} end @@ -485,39 +667,138 @@ defmodule Electric.Shapes.Consumer.Materializer do end end + # Position-aware tag indexing: tags are "/" separated strings where each slot + # corresponds to a DNF position. Non-empty slots are indexed as {pos, hash}. + # For backward compat, flat tags (no "/") are treated as position 0. defp add_row_to_tag_indices(tag_indices, key, move_tags) do - # For now we only support one move tag per row (i.e. no `OR`s in the where clause if there's a subquery) Enum.reduce(move_tags, tag_indices, fn tag, acc when is_binary(tag) -> - Map.update(acc, tag, MapSet.new([key]), &MapSet.put(&1, key)) + tag + |> parse_tag_slots() + |> Enum.reduce(acc, fn + {"", _pos}, acc -> + acc + + {hash, pos}, acc -> + Map.update(acc, {pos, hash}, MapSet.new([key]), &MapSet.put(&1, key)) + end) end) end defp remove_row_from_tag_indices(tag_indices, key, move_tags) do Enum.reduce(move_tags, tag_indices, fn tag, acc when is_binary(tag) -> - case Map.fetch(acc, tag) do - {:ok, v} -> - new_mapset = MapSet.delete(v, key) - - if MapSet.size(new_mapset) == 0 do - Map.delete(acc, tag) - else - Map.put(acc, tag, new_mapset) + tag + |> parse_tag_slots() + |> Enum.reduce(acc, fn + {"", _pos}, acc -> + acc + + {hash, pos}, acc -> + case Map.fetch(acc, {pos, hash}) do + {:ok, v} -> + new_mapset = MapSet.delete(v, key) + + if MapSet.size(new_mapset) == 0 do + Map.delete(acc, {pos, hash}) + else + Map.put(acc, {pos, hash}, new_mapset) + end + + :error -> + acc end + end) + end) + end + + defp parse_tag_slots(tag) do + tag |> String.split("/") |> Enum.with_index() + end - :error -> + # Collect keys affected by move patterns, returning %{key => MapSet} + defp collect_affected_keys(tag_indices, patterns) do + Enum.reduce(patterns, %{}, fn %{pos: pos, value: value}, acc -> + case Map.get(tag_indices, {pos, value}) do + nil -> acc + + keys -> + Enum.reduce(keys, acc, fn key, acc -> + Map.update(acc, key, MapSet.new([pos]), &MapSet.put(&1, pos)) + end) end end) end - defp pop_keys_from_tag_indices(tag_indices, patterns) do - # This implementation is naive while we support only one tag per row and no composite tags. - Enum.reduce(patterns, {MapSet.new(), tag_indices}, fn %{pos: _pos, value: value}, - {keys, acc} -> - case Map.pop(acc, value) do - {nil, acc} -> {keys, acc} - {v, acc} -> {MapSet.union(keys, v), acc} - end + defp process_move_event(entry, key, matched_positions, new_condition, {{idx, ti}, ce}, state) do + case entry.active_conditions do + [] when new_condition == false -> + # No DNF, move-out: remove row entirely (backward compat) + ti = remove_row_from_tag_indices(ti, key, entry.tags) + idx = Map.delete(idx, key) + {{idx, ti}, decrement_value(ce, entry.value, value_to_string(entry.value, state))} + + [] -> + # No DNF, move-in: no-op + {{idx, ti}, ce} + + ac -> + # DNF: flip matched positions, re-evaluate inclusion + new_ac = flip_active_conditions(ac, matched_positions, new_condition) + new_included? = evaluate_inclusion(entry.tags, new_ac) + + cond do + entry.included? and not new_included? -> + # Remove row entirely to avoid stale tag_indices. If the row + # should become included again later, it will re-enter via a + # move-in query or NewRecord with fresh tags and ac. + ti = remove_row_from_tag_indices(ti, key, entry.tags) + idx = Map.delete(idx, key) + {{idx, ti}, decrement_value(ce, entry.value, value_to_string(entry.value, state))} + + not entry.included? and new_included? -> + idx = + Map.put(idx, key, %{ + entry + | active_conditions: new_ac, + included?: new_included? + }) + + {{idx, ti}, increment_value(ce, entry.value, value_to_string(entry.value, state))} + + true -> + idx = + Map.put(idx, key, %{ + entry + | active_conditions: new_ac, + included?: new_included? + }) + + {{idx, ti}, ce} + end + end + end + + defp flip_active_conditions(ac, positions, new_value) do + ac + |> Enum.with_index() + |> Enum.map(fn {val, idx} -> + if MapSet.member?(positions, idx), do: new_value, else: val + end) + end + + # Evaluate whether a row is included based on its tags and active_conditions. + # A row is included if any disjunct (tag) has all participating positions active. + defp evaluate_inclusion([], _ac), do: true + defp evaluate_inclusion(_tags, []), do: true + + defp evaluate_inclusion(tags, ac) do + Enum.any?(tags, fn tag -> + tag + |> parse_tag_slots() + |> Enum.all?(fn + {"", _pos} -> true + {_hash, pos} -> Enum.at(ac, pos, true) + end) end) end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/move_handling.ex b/packages/sync-service/lib/electric/shapes/consumer/move_handling.ex deleted file mode 100644 index da44e60e36..0000000000 --- a/packages/sync-service/lib/electric/shapes/consumer/move_handling.ex +++ /dev/null @@ -1,143 +0,0 @@ -defmodule Electric.Shapes.Consumer.MoveHandling do - @moduledoc false - alias Electric.Replication.LogOffset - alias Electric.ShapeCache.Storage - alias Electric.Shapes.Consumer.State - alias Electric.Shapes.PartialModes - alias Electric.Shapes.Shape - alias Electric.Shapes.Shape.SubqueryMoves - alias Electric.Shapes.Consumer.MoveIns - - require Logger - - @spec process_move_ins(State.t(), Shape.handle(), list(term())) :: State.t() - def process_move_ins(state, _, []), do: state - - def process_move_ins(%State{} = state, dep_handle, new_values) do - # Something moved in in a dependency shape. We need to query the DB for relevant values. - formed_where_clause = - Shape.SubqueryMoves.move_in_where_clause( - state.shape, - dep_handle, - Enum.map(new_values, &elem(&1, 1)) - ) - - storage = state.storage - name = Electric.Utils.uuid4() - consumer_pid = self() - - # Start async query - don't block on snapshot - Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) - |> PartialModes.query_move_in_async( - state.shape_handle, - state.shape, - formed_where_clause, - stack_id: state.stack_id, - consumer_pid: consumer_pid, - results_fn: fn stream, pg_snapshot -> - task_pid = self() - - # Process query results - stream - |> Stream.transform( - fn -> [] end, - fn [key, _, _] = item, acc -> {[item], [key | acc]} end, - fn acc -> send(task_pid, {:acc, acc, pg_snapshot}) end - ) - |> Storage.write_move_in_snapshot!(name, storage) - - # Return accumulated keys and snapshot - receive(do: ({:acc, acc, snapshot} -> {acc, snapshot})) - end, - move_in_name: name - ) - - index = Enum.find_index(state.shape.shape_dependencies_handles, &(&1 == dep_handle)) - - # Add to waiting WITHOUT blocking (snapshot will be set later via message) - move_handling_state = - MoveIns.add_waiting( - state.move_handling_state, - name, - {["$sublink", Integer.to_string(index)], MapSet.new(Enum.map(new_values, &elem(&1, 0)))} - ) - - Logger.debug("Move-in #{name} has been triggered from #{dep_handle}") - - %{state | move_handling_state: move_handling_state} - end - - @spec process_move_outs(State.t(), Shape.handle(), list(term())) :: - {State.t(), changes :: term()} - def process_move_outs(state, _, []), do: {state, nil} - - def process_move_outs(state, dep_handle, removed_values) do - message = - SubqueryMoves.make_move_out_control_message( - state.shape, - state.stack_id, - state.shape_handle, - [ - {dep_handle, removed_values} - ] - ) - - # TODO: This leaks the message abstraction, and I'm OK with it for now because I'll be refactoring this code path for the multi-subqueries shortly - move_handling_state = - MoveIns.move_out_happened( - state.move_handling_state, - MapSet.new(message.headers.patterns |> Enum.map(& &1[:value])) - ) - - {{_, upper_bound}, writer} = Storage.append_control_message!(message, state.writer) - - {%{state | move_handling_state: move_handling_state, writer: writer}, - {[message], upper_bound}} - end - - def query_complete(%State{} = state, name, key_set, snapshot) do - touch_tracker = state.move_handling_state.touch_tracker - tags_to_skip = state.move_handling_state.moved_out_tags[name] || MapSet.new() - - # 1. Splice stored snapshot into main log with filtering - {{lower_bound, upper_bound}, writer} = - Storage.append_move_in_snapshot_to_log!( - name, - state.writer, - fn key, tags -> - (tags != [] and Enum.all?(tags, &MapSet.member?(tags_to_skip, &1))) or - MoveIns.should_skip_query_row?(touch_tracker, snapshot, key) - end - ) - - # 2. Move from "waiting" to "filtering" - {visibility_snapshot, move_handling_state} = - MoveIns.change_to_filtering(state.move_handling_state, name, MapSet.new(key_set)) - - {{_, upper_bound}, writer} = - if is_nil(visibility_snapshot) do - {{nil, upper_bound}, writer} - else - append_snapshot_end_control(snapshot, writer) - end - - state = %{state | move_handling_state: move_handling_state, writer: writer} - - {state, {{lower_bound, upper_bound}, upper_bound}} - end - - @spec append_snapshot_end_control(MoveIns.pg_snapshot(), Storage.writer_state()) :: - {{LogOffset.t(), LogOffset.t()}, Storage.writer_state()} - defp append_snapshot_end_control({xmin, xmax, xip_list}, writer) do - control_message = %{ - headers: %{ - control: "snapshot-end", - xmin: Integer.to_string(xmin), - xmax: Integer.to_string(xmax), - xip_list: Enum.map(xip_list, &Integer.to_string/1) - } - } - - Storage.append_control_message!(control_message, writer) - end -end diff --git a/packages/sync-service/lib/electric/shapes/consumer/move_ins.ex b/packages/sync-service/lib/electric/shapes/consumer/move_ins.ex deleted file mode 100644 index c0c554cb70..0000000000 --- a/packages/sync-service/lib/electric/shapes/consumer/move_ins.ex +++ /dev/null @@ -1,358 +0,0 @@ -defmodule Electric.Shapes.Consumer.MoveIns do - alias Electric.Replication.Changes - alias Electric.Replication.Changes.Transaction - alias Electric.Postgres.Xid - alias Electric.Postgres.SnapshotQuery - - require Xid - - defstruct waiting_move_ins: %{}, - filtering_move_ins: [], - touch_tracker: %{}, - move_in_buffering_snapshot: nil, - in_flight_values: %{}, - moved_out_tags: %{}, - maximum_resolved_snapshot: nil, - minimum_unresolved_snapshot: nil - - @type pg_snapshot() :: SnapshotQuery.pg_snapshot() - @type move_in_name() :: String.t() - @type in_flight_values() :: %{term() => MapSet.t()} - - @typedoc """ - Information needed to reason about move-in handling and correct stream processing. - - - `waiting_move_ins`: Information about move-ins we're waiting for. That means a move-in was triggered, but - query results are not yet available. The map value has pg snapshot and actual values that were - moved in and thus should be skipped in where clause evaluation until the results are appended to the log - - `filtering_move_ins`: Information about move-ins we're filtering. That means a move-in has resolved and was - added to the shape log, and we need to skip changes that are already visible there. - - `touch_tracker`: A map of keys to xids of transactions that have touched them. This is used to skip changes - inside move-in query results that are already visible in the shape log. - - `move_in_buffering_snapshot`: A snapshot that is a union of all the "waiting" move-in snapshots. This is used to - reduce a check whether something is visible in any of the "waiting" move-in snapshots - down to a single check instead of checking each snapshot individually. - - `in_flight_values`: A precalculated map of all moved-in values that caused a move-in and thus should be skipped in - where clause evaluation until the results are appended to the log. - - `moved_out_tags`: A map of move-in names to sets of tags that were moved out while the move-in was happening and thus - should be skipped when appending move-in results to the log. - - `maximum_resolved_snapshot`: Stores the maximum snapshot of resolved move-ins that weren't immediately appended as - snapshot-end control messages, to be appended when the last concurrent move-in resolves. - - `minimum_unresolved_snapshot`: Stores the minimum snapshot of unresolved move-ins. - """ - @type t() :: %__MODULE__{ - waiting_move_ins: %{move_in_name() => {pg_snapshot() | nil, {term(), MapSet.t()}}}, - filtering_move_ins: list({pg_snapshot(), keys :: list(String.t())}), - touch_tracker: %{String.t() => pos_integer()}, - move_in_buffering_snapshot: nil | pg_snapshot(), - in_flight_values: in_flight_values(), - moved_out_tags: %{move_in_name() => MapSet.t(String.t())}, - maximum_resolved_snapshot: nil | pg_snapshot(), - minimum_unresolved_snapshot: nil | pg_snapshot() - } - def new() do - %__MODULE__{} - end - - @doc """ - Add information about a new move-in to the state for which we're waiting. - Snapshot is initially nil and will be set later when the query begins. - """ - @spec add_waiting(t(), move_in_name(), {term(), MapSet.t()}) :: t() - def add_waiting( - %__MODULE__{waiting_move_ins: waiting_move_ins} = state, - name, - moved_values - ) do - new_waiting_move_ins = Map.put(waiting_move_ins, name, {nil, moved_values}) - new_buffering_snapshot = make_move_in_buffering_snapshot(new_waiting_move_ins) - - %{ - state - | waiting_move_ins: new_waiting_move_ins, - move_in_buffering_snapshot: new_buffering_snapshot, - in_flight_values: make_in_flight_values(new_waiting_move_ins), - moved_out_tags: Map.put(state.moved_out_tags, name, MapSet.new()) - } - end - - # TODO: this assumes a single subquery for now - def move_out_happened(state, new_tags) do - moved_out_tags = - Map.new(state.moved_out_tags, fn {name, tags} -> {name, MapSet.union(tags, new_tags)} end) - - %{state | moved_out_tags: moved_out_tags} - end - - @doc """ - Set the snapshot for a waiting move-in when it becomes known. - """ - @spec set_snapshot(t(), move_in_name(), pg_snapshot()) :: t() - def set_snapshot(%__MODULE__{waiting_move_ins: waiting_move_ins} = state, name, snapshot) do - new_move_ins = - Map.update!(waiting_move_ins, name, fn {_, moved_values} -> {snapshot, moved_values} end) - - new_buffering_snapshot = make_move_in_buffering_snapshot(new_move_ins) - - %{ - state - | waiting_move_ins: new_move_ins, - move_in_buffering_snapshot: new_buffering_snapshot, - minimum_unresolved_snapshot: min_snapshot(state.minimum_unresolved_snapshot, snapshot) - } - end - - @spec make_move_in_buffering_snapshot(%{move_in_name() => pg_snapshot()}) :: nil | pg_snapshot() - # The fake global snapshot allows us to check if a transaction is not visible in any of the pending snapshots - # instead of checking each snapshot individually. - defp make_move_in_buffering_snapshot(waiting_move_ins) when waiting_move_ins == %{}, do: nil - - defp make_move_in_buffering_snapshot(waiting_move_ins) do - snapshots = - waiting_move_ins - |> Map.values() - |> Enum.map(fn {snapshot, _} -> snapshot end) - |> Enum.reject(&is_nil/1) - - case snapshots do - [] -> - nil - - _ -> - Enum.reduce(snapshots, {:infinity, -1, []}, fn {xmin, xmax, xip_list}, - {global_xmin, global_xmax, global_xip_list} -> - {Kernel.min(global_xmin, xmin), Kernel.max(global_xmax, xmax), - global_xip_list ++ xip_list} - end) - end - end - - defp make_in_flight_values(waiting_move_ins) do - waiting_move_ins - |> Map.values() - |> Enum.map(fn {_, moved_values} -> moved_values end) - |> Enum.reduce(%{}, fn {key, value}, acc -> - Map.update(acc, key, value, &MapSet.union(&1, value)) - end) - end - - @doc """ - Change a move-in from "waiting" to "filtering", marking it as complete and return best-effort visibility boundary. - """ - @spec change_to_filtering(t(), move_in_name(), MapSet.t(String.t())) :: - {visibility_boundary :: nil | pg_snapshot(), t()} - def change_to_filtering(%__MODULE__{} = state, name, key_set) do - {{snapshot, _}, waiting_move_ins} = Map.pop!(state.waiting_move_ins, name) - filtering_move_ins = [{snapshot, key_set} | state.filtering_move_ins] - buffering_snapshot = make_move_in_buffering_snapshot(waiting_move_ins) - - {boundary, maximum_resolved_snapshot} = - cond do - waiting_move_ins == %{} -> {max_snapshot(state.maximum_resolved_snapshot, snapshot), nil} - is_minimum_snapshot?(state, snapshot) -> {snapshot, state.maximum_resolved_snapshot} - true -> {nil, max_snapshot(state.maximum_resolved_snapshot, snapshot)} - end - - new_state = %{ - state - | waiting_move_ins: waiting_move_ins, - filtering_move_ins: filtering_move_ins, - move_in_buffering_snapshot: buffering_snapshot, - in_flight_values: make_in_flight_values(waiting_move_ins), - moved_out_tags: Map.delete(state.moved_out_tags, name), - minimum_unresolved_snapshot: find_minimum_unresolved_snapshot(waiting_move_ins), - maximum_resolved_snapshot: maximum_resolved_snapshot - } - - {boundary, new_state} - end - - defp find_minimum_unresolved_snapshot(waiting_move_ins) do - snapshots = - waiting_move_ins - |> Map.values() - |> Enum.map(fn {snapshot, _} -> snapshot end) - |> Enum.reject(&is_nil/1) - - case snapshots do - [] -> nil - list -> Enum.min(list, &(Xid.compare_snapshots(&1, &2) != :gt)) - end - end - - @doc """ - Remove completed move-ins from the state. - - Move-in is considered "completed" (i.e. not included in the filtering logic) - once we see any transaction that is after the end of the move-in snapshot. - - Filtering generally is applied only to transactions that are already visible - in the snapshot, and those can only be with `xid < xmax`. - """ - @spec remove_completed(t(), Transaction.t()) :: t() - def remove_completed(%__MODULE__{} = state, %Transaction{xid: xid}) do - state.filtering_move_ins - |> Enum.reject(fn {snapshot, _} -> Xid.after_snapshot?(xid, snapshot) end) - |> then(&%{state | filtering_move_ins: &1}) - end - - @doc """ - Check if a change is already visible in one of the completed move-ins. - - A visible change means it needs to be skipped to avoid duplicates. - """ - @spec change_already_visible?(t(), Xid.anyxid(), Changes.change()) :: boolean() - def change_already_visible?(_state, _xid, %Changes.DeletedRecord{}), do: false - def change_already_visible?(%__MODULE__{filtering_move_ins: []}, _, _), do: false - - def change_already_visible?(%__MODULE__{filtering_move_ins: filters}, xid, %{key: key}) do - Enum.any?(filters, fn {snapshot, key_set} -> - Transaction.visible_in_snapshot?(xid, snapshot) and MapSet.member?(key_set, key) - end) - end - - def change_visible_in_unresolved_move_ins_for_values?( - %__MODULE__{waiting_move_ins: waiting_move_ins}, - referenced_values, - xid - ) do - Enum.any?(Map.values(waiting_move_ins), fn {snapshot, {path, moved_values}} -> - case Map.fetch(referenced_values, path) do - {:ok, value} -> - (is_nil(snapshot) or Transaction.visible_in_snapshot?(xid, snapshot)) and - MapSet.member?(moved_values, value) - - :error -> - false - end - end) - end - - @doc """ - Track a touch for a non-delete change. - Returns updated touch_tracker. - """ - @spec track_touch(t(), pos_integer(), Changes.change()) :: t() - - def track_touch(%__MODULE__{} = state, _xid, %Changes.DeletedRecord{}), - do: state - - def track_touch(%__MODULE__{touch_tracker: touch_tracker} = state, xid, %{key: key}) do - %{state | touch_tracker: Map.put(touch_tracker, key, xid)} - end - - @doc """ - Garbage collect touches that are visible in all pending snapshots. - A touch is visible if its xid is before the minimum xmin of all waiting snapshots. - """ - @spec gc_touch_tracker(t()) :: t() - def gc_touch_tracker( - %__MODULE__{ - move_in_buffering_snapshot: nil, - waiting_move_ins: waiting_move_ins - } = state - ) do - # If there are waiting move-ins but buffering_snapshot is nil (all snapshots unknown), - # keep all touches. Otherwise (no waiting move-ins), clear all touches. - case waiting_move_ins do - empty when empty == %{} -> %{state | touch_tracker: %{}} - _ -> state - end - end - - def gc_touch_tracker( - %__MODULE__{ - touch_tracker: touch_tracker, - move_in_buffering_snapshot: {xmin, _xmax, _xip_list} - } = - state - ) do - # Remove touches that are before the minimum xmin (visible in all snapshots) - %{ - state - | touch_tracker: - Map.reject(touch_tracker, fn {_key, touch_xid} -> - touch_xid < xmin - end) - } - end - - @doc """ - Check if a query result row should be skipped because a fresher version exists in the stream. - Skip if: touch exists AND touch xid is NOT visible in query snapshot. - """ - @spec should_skip_query_row?(%{String.t() => pos_integer()}, pg_snapshot(), String.t()) :: - boolean() - def should_skip_query_row?(touch_tracker, _snapshot, key) - when not is_map_key(touch_tracker, key) do - false - end - - def should_skip_query_row?(touch_tracker, snapshot, key) do - touch_xid = Map.fetch!(touch_tracker, key) - # Skip if touch is NOT visible in snapshot (means we have fresher data in stream) - not Transaction.visible_in_snapshot?(touch_xid, snapshot) - end - - @spec max_snapshot(pg_snapshot() | nil, pg_snapshot() | nil) :: pg_snapshot() - defp max_snapshot(nil, value), do: value - defp max_snapshot(value, nil), do: value - - defp max_snapshot(snapshot1, snapshot2) do - case Xid.compare_snapshots(snapshot1, snapshot2) do - :lt -> snapshot2 - _ -> snapshot1 - end - end - - @spec min_snapshot(pg_snapshot(), pg_snapshot()) :: pg_snapshot() - defp min_snapshot(nil, value), do: value - defp min_snapshot(value, nil), do: value - - defp min_snapshot(snapshot1, snapshot2) do - case Xid.compare_snapshots(snapshot1, snapshot2) do - :lt -> snapshot1 - _ -> snapshot2 - end - end - - @doc """ - Check if the given snapshot is the minimum among all concurrent waiting move-ins - (excluding the current one being resolved, and only considering those with known snapshots). - """ - @spec is_minimum_snapshot?(t(), pg_snapshot()) :: boolean() - def is_minimum_snapshot?( - %__MODULE__{minimum_unresolved_snapshot: minimum_unresolved_snapshot}, - snapshot - ) do - Xid.compare_snapshots(snapshot, minimum_unresolved_snapshot) == :eq - end - - @doc """ - Store or update the maximum resolved snapshot. - If there's already a stored snapshot, keep the maximum of the two. - """ - @spec store_maximum_resolved_snapshot(t(), pg_snapshot()) :: t() - def store_maximum_resolved_snapshot( - %__MODULE__{maximum_resolved_snapshot: nil} = state, - snapshot - ) do - %{state | maximum_resolved_snapshot: snapshot} - end - - def store_maximum_resolved_snapshot( - %__MODULE__{maximum_resolved_snapshot: stored} = state, - snapshot - ) do - %{state | maximum_resolved_snapshot: max_snapshot(stored, snapshot)} - end - - @doc """ - Get the stored maximum resolved snapshot and clear it, or return nil if none is stored. - Returns {snapshot | nil, updated_state}. - """ - @spec get_and_clear_maximum_resolved_snapshot(t()) :: {pg_snapshot() | nil, t()} - def get_and_clear_maximum_resolved_snapshot(%__MODULE__{} = state) do - {state.maximum_resolved_snapshot, %{state | maximum_resolved_snapshot: nil}} - end -end diff --git a/packages/sync-service/lib/electric/shapes/consumer/state.ex b/packages/sync-service/lib/electric/shapes/consumer/state.ex index b32204d2a9..cca4e83a11 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/state.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/state.ex @@ -1,10 +1,7 @@ defmodule Electric.Shapes.Consumer.State do @moduledoc false - alias Electric.Shapes.Consumer.MoveIns alias Electric.Shapes.Consumer.InitialSnapshot alias Electric.Shapes.Shape - alias Electric.Replication.Eval.Parser - alias Electric.Replication.Eval.Walker alias Electric.Replication.TransactionBuilder alias Electric.Postgres.SnapshotQuery alias Electric.Replication.LogOffset @@ -25,15 +22,13 @@ defmodule Electric.Shapes.Consumer.State do :storage, :writer, initial_snapshot_state: InitialSnapshot.new(nil), - move_handling_state: MoveIns.new(), + subquery_state: nil, transaction_builder: TransactionBuilder.new(), buffer: [], txn_offset_mapping: [], materializer_subscribed?: false, terminating?: false, buffering?: false, - or_with_subquery?: false, - not_with_subquery?: false, # Based on the write unit value, consumer will either buffer txn fragments in memory until # it sees a commit (write_unit=txn) or it will write each received txn fragment to storage # immediately (write_unit=txn_fragment). @@ -46,31 +41,7 @@ defmodule Electric.Shapes.Consumer.State do ] @type pg_snapshot() :: SnapshotQuery.pg_snapshot() - @type move_in_name() :: String.t() - @type uninitialized_t() :: term() - # @type uninitialized_t() :: %__MODULE__{ - # stack_id: Electric.stack_id(), - # shape_handle: Shape.handle(), - # shape: Shape.t(), - # awaiting_snapshot_start: list(GenServer.from()), - # buffer: list(Transaction.t()), - # monitors: list({pid(), reference()}), - # txn_offset_mapping: list({LogOffset.t(), LogOffset.t()}), - # snapshot_started?: boolean(), - # materializer_subscribed?: boolean(), - # terminating?: boolean(), - # buffering?: boolean(), - # initial_snapshot_filtering?: boolean(), - # waiting_move_ins: %{move_in_name() => pg_snapshot()}, - # filtering_move_ins: list(Shape.handle()), - # move_in_buffering_snapshot: nil | pg_snapshot(), - # hibernate_after: non_neg_integer(), - # latest_offset: nil, - # initial_pg_snapshot: nil, - # storage: nil, - # writer: nil - # } @typedoc """ State of the consumer process. @@ -85,53 +56,14 @@ defmodule Electric.Shapes.Consumer.State do last relevant one to last one generally in the transaction and use that to map back the flushed offset to the transaction boundary. - ## Move-in handling - - There are 3 fields in the state relating to the move-in handling: - `waiting_move_ins`, `filtering_move_ins`, and `move_in_buffering_snapshot`. - - Once a move-in is necessary, we immeidately query the DB for the snapshot, - and store it in `waiting_move_ins` until we know the affected key set for this - move-in (possible only when entire query resolves). If a transaction is not a - part of any of these "waiting" move-in snapshots, we cannot apply it yet - and so we start buffering. In order to avoid walking the `waiting_move_ins` - map every time, we instead construct a "buffering snapshot" which is a union - of all the "waiting" move-in snapshots. This is stored in `move_in_buffering_snapshot` - and is updated when anything is added to or removed from `waiting_move_ins`. - - Once we have the affected key set, we can move the move-in to `filtering_move_ins`. - Filtering logic is described elsewhere. - ## Buffering Consumer will be buffering transactions in 2 cases: when we're waiting for initial - snapshot information, or when we can't reason about the change in context of a move-in. + snapshot information, or when an active subquery move-in is being spliced into the log. Buffer is stored in reverse order. """ @type t() :: term() - # @type t() :: %__MODULE__{ - # stack_id: Electric.stack_id(), - # shape_handle: Shape.handle(), - # shape: Shape.t(), - # awaiting_snapshot_start: list(GenServer.from()), - # buffer: list(Transaction.t()), - # monitors: list({pid(), reference()}), - # txn_offset_mapping: list({LogOffset.t(), LogOffset.t()}), - # snapshot_started?: boolean(), - # materializer_subscribed?: boolean(), - # terminating?: boolean(), - # buffering?: boolean(), - # initial_snapshot_filtering?: boolean(), - # waiting_move_ins: %{move_in_name() => pg_snapshot()}, - # filtering_move_ins: list(Shape.handle()), - # move_in_buffering_snapshot: nil | pg_snapshot(), - # hibernate_after: non_neg_integer(), - # latest_offset: LogOffset.t(), - # initial_pg_snapshot: nil | pg_snapshot(), - # storage: Storage.shape_storage(), - # writer: Storage.writer_state() - # } defguard is_snapshot_started(state) when is_struct(state.initial_snapshot_state, InitialSnapshot) and @@ -171,8 +103,6 @@ defmodule Electric.Shapes.Consumer.State do %{ state | shape: shape, - or_with_subquery?: has_or_with_subquery?(shape), - not_with_subquery?: has_not_with_subquery?(shape), # Enable direct fragment-to-storage streaming for shapes without subquery dependencies # and if the current shape itself isn't an inner shape of a shape with subqueries. write_unit: @@ -185,62 +115,6 @@ defmodule Electric.Shapes.Consumer.State do } end - defp has_or_with_subquery?(%Shape{shape_dependencies: []}), do: false - defp has_or_with_subquery?(%Shape{where: nil}), do: false - - defp has_or_with_subquery?(%Shape{where: where}) do - Walker.reduce!( - where.eval, - fn - %Parser.Func{name: "or"} = or_node, acc, _ctx -> - if subtree_has_sublink?(or_node) do - {:ok, true} - else - {:ok, acc} - end - - _node, acc, _ctx -> - {:ok, acc} - end, - false - ) - end - - defp subtree_has_sublink?(tree) do - Walker.reduce!( - tree, - fn - %Parser.Ref{path: ["$sublink", _]}, _acc, _ctx -> - {:ok, true} - - _node, acc, _ctx -> - {:ok, acc} - end, - false - ) - end - - defp has_not_with_subquery?(%Shape{shape_dependencies: []}), do: false - defp has_not_with_subquery?(%Shape{where: nil}), do: false - - defp has_not_with_subquery?(%Shape{where: where}) do - Walker.reduce!( - where.eval, - fn - %Parser.Func{name: "not"} = not_node, acc, _ctx -> - if subtree_has_sublink?(not_node) do - {:ok, true} - else - {:ok, acc} - end - - _node, acc, _ctx -> - {:ok, acc} - end, - false - ) - end - @doc """ After the storage is ready, initialize the state with info from storage and writer state. """ @@ -358,32 +232,6 @@ defmodule Electric.Shapes.Consumer.State do def initial_snapshot_xmin(%__MODULE__{}), do: nil - @doc """ - Track a change in the touch tracker. - """ - @spec track_change(t(), pos_integer(), Electric.Replication.Changes.change()) :: t() - def track_change(%__MODULE__{move_handling_state: move_handling_state} = state, xid, change) do - %{state | move_handling_state: MoveIns.track_touch(move_handling_state, xid, change)} - end - - @doc """ - Garbage collect touches that are visible in all pending snapshots. - """ - @spec gc_touch_tracker(t()) :: t() - def gc_touch_tracker(%__MODULE__{move_handling_state: move_handling_state} = state) do - %{ - state - | move_handling_state: MoveIns.gc_touch_tracker(move_handling_state) - } - end - - def remove_completed_move_ins( - %__MODULE__{move_handling_state: move_handling_state} = state, - xid - ) do - %{state | move_handling_state: MoveIns.remove_completed(move_handling_state, xid)} - end - def telemetry_attrs(%__MODULE__{stack_id: stack_id, shape_handle: shape_handle, shape: shape}) do [ "shape.handle": shape_handle, diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex new file mode 100644 index 0000000000..28b97b0b88 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -0,0 +1,441 @@ +defmodule Electric.Shapes.Consumer.Subqueries do + @moduledoc false + + alias Electric.Connection.Manager + alias Electric.Postgres.Lsn + alias Electric.Postgres.SnapshotQuery + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Replication.Eval + alias Electric.Replication.Eval.Walker + alias Electric.Shapes.Consumer.Subqueries.Buffering + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + alias Electric.Shapes.Consumer.Subqueries.QueryRow + alias Electric.Shapes.Consumer.Subqueries.Steady + alias Electric.Shapes.Consumer.Subqueries.StateMachine + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Querying + alias Electric.Shapes.Shape + + @value_prefix "v:" + @null_sentinel "NULL" + + @type move_value() :: {term(), term()} + @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} + @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} + @type output() :: Changes.change() | move_out_control() | QueryRow.t() + + def value_prefix, do: @value_prefix + def null_sentinel, do: @null_sentinel + + @spec new(keyword() | map()) :: Steady.t() + def new(opts) when is_list(opts) or is_map(opts) do + opts = Map.new(opts) + + %Steady{ + shape: fetch_opt!(opts, :shape), + stack_id: fetch_opt!(opts, :stack_id), + shape_handle: fetch_opt!(opts, :shape_handle), + dnf_plan: fetch_opt!(opts, :dnf_plan), + views: Map.get(opts, :views, %{}), + dependency_handle_to_ref: Map.get(opts, :dependency_handle_to_ref, %{}), + latest_seen_lsn: Map.get(opts, :latest_seen_lsn), + queue: MoveQueue.new() + } + end + + @spec handle_event(Steady.t() | Buffering.t(), term()) :: + {[output()], Steady.t() | Buffering.t()} + def handle_event(state, event), do: StateMachine.handle_event(state, event) + + @spec normalize_global_lsn(Electric.Postgres.Lsn.t() | non_neg_integer()) :: + Electric.Postgres.Lsn.t() + def normalize_global_lsn(%Lsn{} = lsn), do: lsn + def normalize_global_lsn(lsn) when is_integer(lsn), do: Lsn.from_integer(lsn) + + @spec query_move_in_async(pid() | atom(), map(), Buffering.t(), pid()) :: :ok + def query_move_in_async( + supervisor, + consumer_state, + %Buffering{} = buffering_state, + consumer_pid + ) do + {where, params} = + DnfPlan.move_in_where_clause( + buffering_state.dnf_plan, + buffering_state.trigger_dep_index, + Enum.map(buffering_state.move_in_values, &elem(&1, 0)), + buffering_state.views_before_move, + consumer_state.shape.where.used_refs + ) + + pool = Manager.pool_name(consumer_state.stack_id, :snapshot) + stack_id = consumer_state.stack_id + shape = consumer_state.shape + shape_handle = consumer_state.shape_handle + + :telemetry.execute([:electric, :subqueries, :move_in_triggered], %{count: 1}, %{ + stack_id: stack_id + }) + + Task.Supervisor.start_child(supervisor, fn -> + try do + SnapshotQuery.execute_for_shape(pool, shape_handle, shape, + stack_id: stack_id, + query_reason: "move_in_query", + snapshot_info_fn: fn _, pg_snapshot, _lsn -> + send(consumer_pid, {:pg_snapshot_known, pg_snapshot}) + end, + query_fn: fn conn, _pg_snapshot, lsn -> + rows = + Querying.query_move_in(conn, stack_id, shape_handle, shape, {where, params}, + dnf_plan: buffering_state.dnf_plan, + views: buffering_state.views_after_move + ) + |> Enum.map(fn [key, _tags, json] -> %QueryRow{key: key, json: json} end) + + send(consumer_pid, {:query_move_in_complete, rows, lsn}) + end + ) + rescue + error -> + send(consumer_pid, {:query_move_in_error, error, __STACKTRACE__}) + end + end) + + :ok + end + + @spec move_in_tag_structure(Shape.t()) :: + {list(list(String.t() | {:hash_together, [String.t(), ...]})), map()} + def move_in_tag_structure(%Shape{} = shape) + when is_nil(shape.where) + when shape.shape_dependencies == [], + do: {[], %{}} + + def move_in_tag_structure(shape) do + {:ok, {tag_structure, comparison_expressions}} = + Walker.reduce( + shape.where.eval, + fn + %Eval.Parser.Func{name: "sublink_membership_check", args: [testexpr, sublink_ref]}, + {[current_tag | others], comparison_expressions}, + _ -> + tags = + case testexpr do + %Eval.Parser.Ref{path: [column_name]} -> + [[column_name | current_tag] | others] + + %Eval.Parser.RowExpr{elements: elements} -> + elements = + Enum.map(elements, fn %Eval.Parser.Ref{path: [column_name]} -> + column_name + end) + + [[{:hash_together, elements} | current_tag] | others] + end + + {:ok, {tags, Map.put(comparison_expressions, sublink_ref.path, testexpr)}} + + _, acc, _ -> + {:ok, acc} + end, + {[[]], %{}} + ) + + comparison_expressions + |> Map.new(fn {path, expr} -> {path, Eval.Expr.wrap_parser_part(expr)} end) + |> then(&{tag_structure, &1}) + end + + @spec drain_queue(Steady.t(), [output()]) :: {[output()], Steady.t() | Buffering.t()} + def drain_queue(%Steady{} = state, outputs \\ []) do + case MoveQueue.pop_next(state.queue) do + nil -> + {outputs, state} + + {{:move_out, dep_index, move_out_values}, queue} -> + subquery_ref = dep_ref_for_index(state, dep_index) + effect = DnfPlan.effect_for_dependency_move(state.dnf_plan, dep_index, :move_out) + + case effect do + :move_out -> + next_state = %{ + state + | queue: queue, + views: + Map.update!(state.views, subquery_ref, &remove_move_values(&1, move_out_values)) + } + + broadcast = + DnfPlan.make_move_out_broadcast( + state.dnf_plan, + dep_index, + move_out_values, + state.stack_id, + state.shape_handle + ) + + drain_queue(next_state, outputs ++ [broadcast]) + + :move_in -> + {outputs, + Buffering.from_steady( + state, + dep_index, + subquery_ref, + move_out_values, + queue, + :move_out + )} + end + + {{:move_in, dep_index, move_in_values}, queue} -> + subquery_ref = dep_ref_for_index(state, dep_index) + effect = DnfPlan.effect_for_dependency_move(state.dnf_plan, dep_index, :move_in) + + case effect do + :move_in -> + {outputs, + Buffering.from_steady( + state, + dep_index, + subquery_ref, + move_in_values, + queue, + :move_in + )} + + :move_out -> + next_state = %{ + state + | queue: queue, + views: + Map.update!(state.views, subquery_ref, &add_move_values(&1, move_in_values)) + } + + broadcast = + DnfPlan.make_move_out_broadcast( + state.dnf_plan, + dep_index, + move_in_values, + state.stack_id, + state.shape_handle + ) + + drain_queue(next_state, outputs ++ [broadcast]) + end + end + end + + @spec maybe_splice(Buffering.t()) :: {[output()], Steady.t() | Buffering.t()} + def maybe_splice(%Buffering{} = state) do + if ready_to_splice?(state) do + {pre_txns, post_txns} = Enum.split(state.buffered_txns, state.boundary_txn_count) + + move_in_broadcast = + DnfPlan.make_move_in_broadcast( + state.dnf_plan, + state.trigger_dep_index, + state.move_in_values, + state.stack_id, + state.shape_handle + ) + + outputs = + Enum.flat_map( + pre_txns, + &convert_transaction(&1, state, state.views_before_move) + ) ++ + [move_in_broadcast] ++ + state.move_in_rows ++ + Enum.flat_map( + post_txns, + &convert_transaction(&1, state, state.views_after_move) + ) + + state + |> to_steady_state() + |> drain_queue(outputs) + else + {[], state} + end + end + + @spec convert_transaction(Transaction.t(), Steady.t() | Buffering.t(), map()) :: [ + Changes.change() + ] + def convert_transaction(%Transaction{changes: changes}, %{shape: shape} = state, views) do + changes + |> Enum.flat_map(fn change -> + Shape.convert_change(shape, change, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + extra_refs: {views, views}, + dnf_plan: state.dnf_plan + ) + end) + |> mark_last_change() + end + + @spec maybe_buffer_boundary_from_txn(Buffering.t(), Transaction.t()) :: Buffering.t() + def maybe_buffer_boundary_from_txn(%Buffering{boundary_txn_count: boundary} = state, _txn) + when not is_nil(boundary), + do: state + + def maybe_buffer_boundary_from_txn(%Buffering{snapshot: nil} = state, _txn), do: state + + def maybe_buffer_boundary_from_txn(%Buffering{} = state, %Transaction{} = txn) do + if Transaction.visible_in_snapshot?(txn, state.snapshot) do + state + else + %{state | boundary_txn_count: length(state.buffered_txns)} + end + end + + @spec maybe_buffer_boundary_from_snapshot(Buffering.t()) :: Buffering.t() + def maybe_buffer_boundary_from_snapshot(%Buffering{boundary_txn_count: boundary} = state) + when not is_nil(boundary), + do: state + + def maybe_buffer_boundary_from_snapshot(%Buffering{snapshot: nil} = state), do: state + + def maybe_buffer_boundary_from_snapshot(%Buffering{} = state) do + case Enum.find_index( + state.buffered_txns, + &(not Transaction.visible_in_snapshot?(&1, state.snapshot)) + ) do + nil -> state + index -> %{state | boundary_txn_count: index} + end + end + + @spec maybe_buffer_boundary_from_lsn(Buffering.t(), Lsn.t()) :: Buffering.t() + def maybe_buffer_boundary_from_lsn(%Buffering{boundary_txn_count: boundary} = state, _lsn) + when not is_nil(boundary), + do: state + + def maybe_buffer_boundary_from_lsn(%Buffering{move_in_lsn: nil} = state, _lsn), do: state + + def maybe_buffer_boundary_from_lsn(%Buffering{} = state, %Lsn{} = lsn) do + case Lsn.compare(lsn, state.move_in_lsn) do + :lt -> state + _ -> %{state | boundary_txn_count: length(state.buffered_txns)} + end + end + + @spec maybe_buffer_boundary_from_seen_lsn(Buffering.t()) :: Buffering.t() + def maybe_buffer_boundary_from_seen_lsn(%Buffering{latest_seen_lsn: nil} = state), do: state + + def maybe_buffer_boundary_from_seen_lsn(%Buffering{} = state) do + maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) + end + + @spec validate_dependency_handle!(Steady.t() | Buffering.t(), term()) :: :ok + def validate_dependency_handle!(%{dependency_handle_to_ref: mapping}, dep_handle) do + unless Map.has_key?(mapping, dep_handle) do + raise ArgumentError, + "unexpected dependency handle #{inspect(dep_handle)}, " <> + "known: #{inspect(Map.keys(mapping))}" + end + + :ok + end + + @spec make_move_out_control_message(Steady.t() | Buffering.t(), non_neg_integer(), [ + move_value() + ]) :: + move_out_control() + def make_move_out_control_message( + %{dnf_plan: dnf_plan, stack_id: stack_id, shape_handle: shape_handle}, + dep_index, + values + ) do + DnfPlan.make_move_out_broadcast(dnf_plan, dep_index, values, stack_id, shape_handle) + end + + @spec should_skip_query_row?( + %{String.t() => pos_integer()}, + SnapshotQuery.pg_snapshot(), + String.t() + ) :: + boolean() + def should_skip_query_row?(touch_tracker, _snapshot, key) + when not is_map_key(touch_tracker, key), + do: false + + def should_skip_query_row?(touch_tracker, snapshot, key) do + touch_xid = Map.fetch!(touch_tracker, key) + not Transaction.visible_in_snapshot?(touch_xid, snapshot) + end + + @spec namespace_value(nil | binary()) :: binary() + def namespace_value(nil), do: @null_sentinel + def namespace_value(value), do: @value_prefix <> value + + @spec make_value_hash(binary(), binary(), nil | binary()) :: binary() + def make_value_hash(stack_id, shape_handle, value) do + make_value_hash_raw(stack_id, shape_handle, namespace_value(value)) + end + + @spec make_value_hash_raw(binary(), binary(), binary()) :: binary() + def make_value_hash_raw(stack_id, shape_handle, namespaced_value) do + :crypto.hash(:md5, "#{stack_id}#{shape_handle}#{namespaced_value}") + |> Base.encode16(case: :lower) + end + + @doc """ + Returns the subquery ref path for a given dependency index, looking it up + via the dependency_handle_to_ref mapping. + """ + def dep_ref_for_index(%{dependency_handle_to_ref: mapping}, dep_index) do + case Enum.find(mapping, fn {_handle, {idx, _ref}} -> idx == dep_index end) do + {_handle, {_idx, ref}} -> ref + nil -> raise ArgumentError, "no dependency found for index #{dep_index}" + end + end + + defp to_steady_state(%Buffering{} = state) do + %Steady{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + dnf_plan: state.dnf_plan, + views: state.views_after_move, + dependency_handle_to_ref: state.dependency_handle_to_ref, + latest_seen_lsn: state.latest_seen_lsn, + queue: state.queue + } + end + + defp ready_to_splice?(%Buffering{} = state) do + not is_nil(state.snapshot) and not is_nil(state.move_in_rows) and + not is_nil(state.boundary_txn_count) + end + + defp mark_last_change([]), do: [] + + defp mark_last_change(changes) do + {last, rest} = List.pop_at(changes, -1) + rest ++ [%{last | last?: true}] + end + + defp remove_move_values(subquery_view, move_values) do + Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> + MapSet.delete(view, value) + end) + end + + defp add_move_values(subquery_view, move_values) do + Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> + MapSet.put(view, value) + end) + end + + defp fetch_opt!(opts, key) do + case Map.fetch(opts, key) do + {:ok, value} -> value + :error -> raise ArgumentError, "missing required option #{inspect(key)}" + end + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex new file mode 100644 index 0000000000..c3394bb948 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -0,0 +1,179 @@ +defmodule Electric.Shapes.Consumer.Subqueries.Buffering do + @moduledoc false + + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + alias Electric.Shapes.Consumer.Subqueries.Steady + + @enforce_keys [ + :shape, + :stack_id, + :shape_handle, + :dnf_plan, + :trigger_dep_index, + :move_in_values, + :views_before_move, + :views_after_move, + :latest_seen_lsn + ] + defstruct [ + :shape, + :stack_id, + :shape_handle, + :dnf_plan, + :trigger_dep_index, + :move_in_values, + :views_before_move, + :views_after_move, + dependency_handle_to_ref: %{}, + snapshot: nil, + move_in_rows: nil, + move_in_lsn: nil, + latest_seen_lsn: nil, + boundary_txn_count: nil, + buffered_txns: [], + queue: MoveQueue.new(), + query_started?: false + ] + + @type t() :: %__MODULE__{ + shape: Electric.Shapes.Shape.t(), + stack_id: String.t(), + shape_handle: String.t(), + dnf_plan: Electric.Shapes.DnfPlan.t(), + trigger_dep_index: non_neg_integer(), + move_in_values: [Electric.Shapes.Consumer.Subqueries.move_value()], + views_before_move: %{[String.t()] => MapSet.t()}, + views_after_move: %{[String.t()] => MapSet.t()}, + dependency_handle_to_ref: %{String.t() => {non_neg_integer(), [String.t()]}}, + snapshot: {term(), term(), [term()]} | nil, + move_in_rows: [term()] | nil, + move_in_lsn: Electric.Postgres.Lsn.t() | nil, + latest_seen_lsn: Electric.Postgres.Lsn.t() | nil, + boundary_txn_count: non_neg_integer() | nil, + buffered_txns: [Electric.Replication.Changes.Transaction.t()], + queue: MoveQueue.t(), + query_started?: boolean() + } + + @spec from_steady( + Steady.t(), + non_neg_integer(), + [String.t()], + [Electric.Shapes.Consumer.Subqueries.move_value()], + MoveQueue.t(), + :move_in | :move_out + ) :: t() + def from_steady( + %Steady{} = state, + dep_index, + subquery_ref, + move_in_values, + queue, + dependency_move_kind + ) do + views_after = + Map.update!( + state.views, + subquery_ref, + &apply_dependency_move(&1, move_in_values, dependency_move_kind) + ) + + %__MODULE__{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + dnf_plan: state.dnf_plan, + trigger_dep_index: dep_index, + move_in_values: move_in_values, + views_before_move: state.views, + views_after_move: views_after, + dependency_handle_to_ref: state.dependency_handle_to_ref, + latest_seen_lsn: state.latest_seen_lsn, + queue: queue + } + end + + defp apply_dependency_move(subquery_view, move_in_values, :move_in) do + add_move_in_values(subquery_view, move_in_values) + end + + defp apply_dependency_move(subquery_view, move_in_values, :move_out) do + remove_move_values(subquery_view, move_in_values) + end + + defp add_move_in_values(subquery_view, move_in_values) do + Enum.reduce(move_in_values, subquery_view, fn {value, _original_value}, view -> + MapSet.put(view, value) + end) + end + + defp remove_move_values(subquery_view, move_values) do + Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> + MapSet.delete(view, value) + end) + end +end + +defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, + for: Electric.Shapes.Consumer.Subqueries.Buffering do + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + + def handle_event(state, %Transaction{} = txn) do + next_state = + state + |> Subqueries.maybe_buffer_boundary_from_txn(txn) + |> Map.update!(:buffered_txns, &(&1 ++ [txn])) + + Subqueries.maybe_splice(next_state) + end + + def handle_event(state, {:global_last_seen_lsn, lsn}) do + lsn = Subqueries.normalize_global_lsn(lsn) + + state + |> Map.put(:latest_seen_lsn, lsn) + |> Subqueries.maybe_buffer_boundary_from_lsn(lsn) + |> Subqueries.maybe_splice() + end + + def handle_event(state, {:materializer_changes, dep_handle, payload}) do + :ok = Subqueries.validate_dependency_handle!(state, dep_handle) + {dep_index, subquery_ref} = Map.fetch!(state.dependency_handle_to_ref, dep_handle) + dep_view = Map.get(state.views_after_move, subquery_ref, MapSet.new()) + + {[], + Map.update!( + state, + :queue, + &MoveQueue.enqueue(&1, dep_index, payload, dep_view) + )} + end + + def handle_event(%{snapshot: snapshot}, {:pg_snapshot_known, _new_snapshot}) + when not is_nil(snapshot) do + raise ArgumentError, "received {:pg_snapshot_known, snapshot} more than once for one move-in" + end + + def handle_event(state, {:pg_snapshot_known, snapshot}) do + state + |> Map.put(:snapshot, snapshot) + |> Subqueries.maybe_buffer_boundary_from_snapshot() + |> Subqueries.maybe_splice() + end + + def handle_event(%{move_in_rows: rows}, {:query_move_in_complete, _new_rows, _move_in_lsn}) + when not is_nil(rows) do + raise ArgumentError, + "received {:query_move_in_complete, rows, move_in_lsn} more than once for one move-in" + end + + def handle_event(state, {:query_move_in_complete, rows, move_in_lsn}) do + state + |> Map.put(:move_in_rows, rows) + |> Map.put(:move_in_lsn, move_in_lsn) + |> Subqueries.maybe_buffer_boundary_from_seen_lsn() + |> Subqueries.maybe_splice() + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_queue.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_queue.ex new file mode 100644 index 0000000000..f4572c6e85 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_queue.ex @@ -0,0 +1,109 @@ +defmodule Electric.Shapes.Consumer.Subqueries.MoveQueue do + @moduledoc """ + Multi-dependency move queue. Tracks move_in/move_out operations per dependency index, + with deduplication and redundancy elimination scoped per dependency. + + Move-outs from any dependency are drained before move-ins from any dependency. + """ + + alias Electric.Shapes.Consumer.Subqueries + + # move_out/move_in are maps from dep_index to [move_value] + defstruct move_out: %{}, move_in: %{} + + @type t() :: %__MODULE__{ + move_out: %{non_neg_integer() => [Subqueries.move_value()]}, + move_in: %{non_neg_integer() => [Subqueries.move_value()]} + } + + @type batch_kind() :: :move_out | :move_in + @type batch() :: {batch_kind(), non_neg_integer(), [Subqueries.move_value()]} + + @spec new() :: t() + def new, do: %__MODULE__{} + + @spec length(t()) :: non_neg_integer() + def length(%__MODULE__{move_out: move_out, move_in: move_in}) do + count_values(move_out) + count_values(move_in) + end + + defp count_values(map) do + Enum.reduce(map, 0, fn {_, vs}, acc -> acc + Kernel.length(vs) end) + end + + @doc """ + Enqueue a materializer payload for a specific dependency. + `dep_view` is the current view for this dependency, used for redundancy elimination. + """ + @spec enqueue(t(), non_neg_integer(), map() | keyword(), MapSet.t()) :: t() + def enqueue(%__MODULE__{} = queue, dep_index, payload, %MapSet{} = dep_view) + when is_map(payload) or is_list(payload) do + payload = Map.new(payload) + + existing_outs = Map.get(queue.move_out, dep_index, []) + existing_ins = Map.get(queue.move_in, dep_index, []) + + ops = + Enum.map(existing_outs, &{:move_out, &1}) ++ + Enum.map(existing_ins, &{:move_in, &1}) ++ + payload_to_ops(payload) + + {new_outs, new_ins} = reduce(ops, dep_view) + + %__MODULE__{ + move_out: put_or_delete(queue.move_out, dep_index, new_outs), + move_in: put_or_delete(queue.move_in, dep_index, new_ins) + } + end + + @doc """ + Pop the next batch of operations. Returns move-out batches (any dep) before move-in batches. + Returns `{batch, updated_queue}` or `nil` if the queue is empty. + """ + @spec pop_next(t()) :: {batch(), t()} | nil + def pop_next(%__MODULE__{move_out: move_out} = queue) when move_out != %{} do + {dep_index, values} = Enum.min_by(move_out, &elem(&1, 0)) + {{:move_out, dep_index, values}, %{queue | move_out: Map.delete(move_out, dep_index)}} + end + + def pop_next(%__MODULE__{move_out: move_out, move_in: move_in} = queue) + when move_out == %{} and move_in != %{} do + {dep_index, values} = Enum.min_by(move_in, &elem(&1, 0)) + {{:move_in, dep_index, values}, %{queue | move_in: Map.delete(move_in, dep_index)}} + end + + def pop_next(%__MODULE__{}), do: nil + + defp payload_to_ops(payload) do + Enum.map(Map.get(payload, :move_out, []), &{:move_out, &1}) ++ + Enum.map(Map.get(payload, :move_in, []), &{:move_in, &1}) + end + + defp reduce(ops, base_view) do + terminal_ops = + ops + |> Enum.with_index() + |> Enum.reduce(%{}, fn {{kind, move_value}, index}, acc -> + Map.put(acc, elem(move_value, 0), %{kind: kind, move_value: move_value, index: index}) + end) + |> Map.values() + |> Enum.reject(&redundant?(&1, base_view)) + |> Enum.sort_by(& &1.index) + + { + for(%{kind: :move_out, move_value: move_value} <- terminal_ops, do: move_value), + for(%{kind: :move_in, move_value: move_value} <- terminal_ops, do: move_value) + } + end + + defp redundant?(%{kind: :move_in, move_value: {value, _}}, base_view) do + MapSet.member?(base_view, value) + end + + defp redundant?(%{kind: :move_out, move_value: {value, _}}, base_view) do + not MapSet.member?(base_view, value) + end + + defp put_or_delete(map, key, []), do: Map.delete(map, key) + defp put_or_delete(map, key, values), do: Map.put(map, key, values) +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/query_row.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/query_row.ex new file mode 100644 index 0000000000..2997036f1f --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/query_row.ex @@ -0,0 +1,11 @@ +defmodule Electric.Shapes.Consumer.Subqueries.QueryRow do + @moduledoc false + + @enforce_keys [:key, :json] + defstruct [:key, :json] + + @type t() :: %__MODULE__{ + key: String.t(), + json: iodata() + } +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex new file mode 100644 index 0000000000..360ccdc7a9 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex @@ -0,0 +1,4 @@ +defprotocol Electric.Shapes.Consumer.Subqueries.StateMachine do + @spec handle_event(t(), term()) :: {list(term()), t()} + def handle_event(state, event) +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex new file mode 100644 index 0000000000..6a0eaa0c77 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex @@ -0,0 +1,62 @@ +defmodule Electric.Shapes.Consumer.Subqueries.Steady do + @moduledoc false + + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + + @enforce_keys [:shape, :stack_id, :shape_handle, :dnf_plan, :dependency_handle_to_ref] + defstruct [ + :shape, + :stack_id, + :shape_handle, + :dnf_plan, + views: %{}, + dependency_handle_to_ref: %{}, + latest_seen_lsn: nil, + queue: MoveQueue.new() + ] + + @type t() :: %__MODULE__{ + shape: Electric.Shapes.Shape.t(), + stack_id: String.t(), + shape_handle: String.t(), + dnf_plan: Electric.Shapes.DnfPlan.t(), + views: %{[String.t()] => MapSet.t()}, + dependency_handle_to_ref: %{String.t() => {non_neg_integer(), [String.t()]}}, + latest_seen_lsn: Electric.Postgres.Lsn.t() | nil, + queue: MoveQueue.t() + } +end + +defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, + for: Electric.Shapes.Consumer.Subqueries.Steady do + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + + def handle_event(state, %Transaction{} = txn) do + {Subqueries.convert_transaction(txn, state, state.views), state} + end + + def handle_event(state, {:global_last_seen_lsn, lsn}) do + {[], %{state | latest_seen_lsn: Subqueries.normalize_global_lsn(lsn)}} + end + + def handle_event(state, {:materializer_changes, dep_handle, payload}) do + :ok = Subqueries.validate_dependency_handle!(state, dep_handle) + {dep_index, subquery_ref} = Map.fetch!(state.dependency_handle_to_ref, dep_handle) + dep_view = Map.get(state.views, subquery_ref, MapSet.new()) + + state + |> Map.update!(:queue, &MoveQueue.enqueue(&1, dep_index, payload, dep_view)) + |> Subqueries.drain_queue() + end + + def handle_event(_state, {:pg_snapshot_known, _snapshot}) do + raise ArgumentError, "received {:pg_snapshot_known, snapshot} while no move-in is buffering" + end + + def handle_event(_state, {:query_move_in_complete, _rows, _move_in_lsn}) do + raise ArgumentError, + "received {:query_move_in_complete, rows, move_in_lsn} while no move-in is buffering" + end +end diff --git a/packages/sync-service/lib/electric/shapes/dnf_plan.ex b/packages/sync-service/lib/electric/shapes/dnf_plan.ex new file mode 100644 index 0000000000..cdfc5636c8 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/dnf_plan.ex @@ -0,0 +1,661 @@ +defmodule Electric.Shapes.DnfPlan do + @moduledoc """ + A DNF sidecar plan compiled from a shape's WHERE clause. + + Decomposes the WHERE clause into Disjunctive Normal Form and enriches each + position with subquery dependency metadata, tag generation info, and SQL for + active_conditions evaluation. + + Not stored on the Shape struct itself — compiled at runtime when needed + (e.g. in consumer state). + """ + + alias Electric.Replication.Eval.Decomposer + alias Electric.Replication.Eval.Expr + alias Electric.Replication.Eval.Parser.{Func, Ref, RowExpr} + alias Electric.Replication.Eval.Runner + alias Electric.Replication.Eval.SqlGenerator + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Utils + + defstruct [ + :disjuncts, + :disjuncts_positions, + :position_count, + :positions, + :dependency_positions, + :dependency_disjuncts, + :dependency_polarities, + :has_negated_subquery + ] + + @type tag_columns :: [String.t()] | {:hash_together, [String.t()]} + + @type position_info :: %{ + ast: term(), + sql: String.t(), + is_subquery: boolean(), + negated: boolean(), + dependency_index: non_neg_integer() | nil, + subquery_ref: [String.t()] | nil, + tag_columns: tag_columns() | nil + } + + @type t :: %__MODULE__{ + disjuncts: Decomposer.dnf(), + disjuncts_positions: [[Decomposer.position()]], + position_count: non_neg_integer(), + positions: %{Decomposer.position() => position_info()}, + dependency_positions: %{non_neg_integer() => [Decomposer.position()]}, + dependency_disjuncts: %{non_neg_integer() => [non_neg_integer()]}, + dependency_polarities: %{non_neg_integer() => :positive | :negated}, + has_negated_subquery: boolean() + } + + @doc """ + Compile a DNF plan from a shape. + + Returns `{:ok, plan}` for shapes with subquery dependencies, + `:no_subqueries` for shapes without, or `{:error, reason}` if + decomposition fails. + """ + @spec compile(Electric.Shapes.Shape.t()) :: {:ok, t()} | :no_subqueries | {:error, term()} + def compile(shape) do + if is_nil(shape.where) or shape.shape_dependencies == [] do + :no_subqueries + else + do_compile(shape) + end + end + + @doc """ + Get row metadata from a DNF plan. + + Given a record, subquery views, and the shape's WHERE clause expression, + computes whether the row is included, tags for each disjunct, and + active_conditions for each position. + + `views` should be keyed by subquery ref path, e.g. `%{["$sublink", "0"] => MapSet}`. + """ + @spec get_row_metadata(t(), map(), map(), Expr.t(), String.t(), String.t()) :: + {:ok, boolean(), [String.t()], [boolean()]} | :error + def get_row_metadata(plan, record, views, where_expr, stack_id, shape_handle) do + with {:ok, ref_values} <- Runner.record_to_ref_values(where_expr.used_refs, record) do + refs = Map.merge(ref_values, views) + active_conditions = compute_active_conditions(plan, refs) + tags = compute_tags(plan, record, stack_id, shape_handle) + included? = compute_inclusion(plan, active_conditions) + {:ok, included?, tags, active_conditions} + end + end + + defp compute_active_conditions(plan, refs) do + Enum.map(0..(plan.position_count - 1), fn pos -> + info = plan.positions[pos] + pos_expr = Expr.wrap_parser_part(info.ast) + + base_result = + case Runner.execute(pos_expr, refs) do + {:ok, value} when value not in [nil, false] -> true + _ -> false + end + + if info.negated, do: not base_result, else: base_result + end) + end + + defp compute_tags(plan, record, stack_id, shape_handle) do + Enum.map(plan.disjuncts, fn conj -> + positions_in_disjunct = MapSet.new(conj, &elem(&1, 0)) + + Enum.map(0..(plan.position_count - 1), fn pos -> + if MapSet.member?(positions_in_disjunct, pos) do + compute_tag_slot(plan.positions[pos], record, stack_id, shape_handle) + else + "" + end + end) + |> Enum.join("/") + end) + end + + defp compute_tag_slot(%{is_subquery: true, tag_columns: [col]}, record, stack_id, shape_handle) do + Subqueries.make_value_hash(stack_id, shape_handle, Map.get(record, col)) + end + + defp compute_tag_slot( + %{is_subquery: true, tag_columns: {:hash_together, cols}}, + record, + stack_id, + shape_handle + ) do + parts = + Enum.map(cols, fn col -> + col <> ":" <> Subqueries.namespace_value(Map.get(record, col)) + end) + + Subqueries.make_value_hash_raw(stack_id, shape_handle, Enum.join(parts)) + end + + defp compute_tag_slot(%{is_subquery: false}, _record, _stack_id, _shape_handle) do + "1" + end + + @doc """ + Build the WHERE clause and params for a move-in query from the DNF plan. + + Given the triggering dependency index, the move-in values (delta), the + current views for all dependencies, and the WHERE clause's `used_refs` + type map, generates a parameterized WHERE clause. + + The candidate predicate selects rows matching the impacted disjuncts with + move_in_values substituted for the triggering dependency. + + The exclusion predicate filters out rows already present via unaffected + disjuncts. + """ + @spec move_in_where_clause(t(), non_neg_integer(), [term()], map(), map()) :: + {String.t(), [list()]} + def move_in_where_clause(plan, dep_index, move_in_values, views, used_refs) do + impacted = Map.get(plan.dependency_disjuncts, dep_index, []) + all_idxs = Enum.to_list(0..(length(plan.disjuncts) - 1)) + unaffected = all_idxs -- impacted + + {candidate_sql, candidate_params, next_param} = + build_disjuncts_sql( + plan, + impacted, + dep_index, + move_in_values, + views, + used_refs, + 1, + ignore_trigger_polarity?: true + ) + + {exclusion_sql, exclusion_params, _} = + build_disjuncts_sql( + plan, + unaffected, + nil, + nil, + views, + used_refs, + next_param, + ignore_trigger_polarity?: false + ) + + where = + case exclusion_sql do + nil -> candidate_sql + excl -> "(#{candidate_sql}) AND NOT (#{excl})" + end + + {where, candidate_params ++ exclusion_params} + end + + @doc """ + Maps a dependency-view move event to the corresponding outer-shape effect. + + Materializer `move_in` / `move_out` events describe whether the dependency + view added or removed values. Negated subquery positions invert that effect + for the outer shape. + """ + @spec effect_for_dependency_move(t(), non_neg_integer(), :move_in | :move_out) :: + :move_in | :move_out + def effect_for_dependency_move(plan, dep_index, move_kind) do + case {Map.fetch!(plan.dependency_polarities, dep_index), move_kind} do + {:positive, effect} -> + effect + + {:negated, :move_in} -> + :move_out + + {:negated, :move_out} -> + :move_in + end + end + + @doc """ + Build a move-in control message for the given dependency and values. + + The message contains position-aware patterns so clients can update + `active_conditions` for rows already present via another disjunct. + """ + def make_move_in_broadcast(plan, dep_index, values, stack_id, shape_handle) do + positions = Map.get(plan.dependency_positions, dep_index, []) + + patterns = + Enum.flat_map(positions, fn pos -> + info = plan.positions[pos] + + Enum.map(values, fn {_typed_value, original_value} -> + %{pos: pos, value: make_broadcast_hash(info, stack_id, shape_handle, original_value)} + end) + end) + + %{headers: %{event: "move-in", patterns: patterns}} + end + + @doc """ + Build a move-out control message for the given dependency and values. + """ + def make_move_out_broadcast(plan, dep_index, values, stack_id, shape_handle) do + positions = Map.get(plan.dependency_positions, dep_index, []) + + patterns = + Enum.flat_map(positions, fn pos -> + info = plan.positions[pos] + + Enum.map(values, fn {_typed_value, original_value} -> + %{pos: pos, value: make_broadcast_hash(info, stack_id, shape_handle, original_value)} + end) + end) + + %{headers: %{event: "move-out", patterns: patterns}} + end + + @doc """ + Generate SQL expressions for computing per-position active_conditions + in a SELECT clause. Returns a list of SQL boolean expressions, one per + position. + + For row predicates, the SQL is the predicate itself cast to boolean. + For subquery predicates, the SQL evaluates the subquery membership. + """ + def active_conditions_sql(plan) do + Enum.map(0..(plan.position_count - 1), fn pos -> + info = plan.positions[pos] + base_sql = info.sql + + if info.negated do + "(NOT COALESCE((#{base_sql})::boolean, false))::boolean" + else + "COALESCE((#{base_sql})::boolean, false)" + end + end) + end + + @doc """ + Generate per-position active_conditions SQL against a concrete subquery view map. + + Used for move-in query rows, where subquery-backed positions must be evaluated + against `views_after_move` instead of live subqueries. + """ + @spec active_conditions_sql_for_views(t(), map(), map(), pos_integer()) :: + {[String.t()], [list()], pos_integer()} + def active_conditions_sql_for_views(plan, views, used_refs, start_param_idx \\ 1) do + {sqls, params, next_param_idx} = + Enum.reduce(0..(plan.position_count - 1), {[], [], start_param_idx}, fn pos, + {sqls, params, + param_idx} -> + info = Map.fetch!(plan.positions, pos) + + {base_sql, sql_params, next_param_idx} = + position_to_sql(info, nil, nil, views, used_refs, param_idx) + + sql = + if info.negated do + "(NOT COALESCE((#{base_sql})::boolean, false))::boolean" + else + "COALESCE((#{base_sql})::boolean, false)" + end + + {[sql | sqls], params ++ sql_params, next_param_idx} + end) + + {Enum.reverse(sqls), params, next_param_idx} + end + + @doc """ + Generate SQL expressions for computing per-disjunct tags in a SELECT clause. + + Each disjunct produces one tag string with `position_count` slots joined by "/". + Subquery positions get md5 hashes, row predicates get "1", and positions not + in the disjunct get empty strings. + """ + def tags_sql(plan, stack_id, shape_handle) do + Enum.map(plan.disjuncts, fn conj -> + positions_in_disjunct = MapSet.new(conj, &elem(&1, 0)) + + slot_sqls = + Enum.map(0..(plan.position_count - 1), fn pos -> + if MapSet.member?(positions_in_disjunct, pos) do + tag_slot_sql(plan.positions[pos], stack_id, shape_handle) + else + "''" + end + end) + + Enum.join(slot_sqls, " || '/' || ") + end) + end + + # -- Private: SQL generation helpers -- + + defp build_disjuncts_sql( + _plan, + [], + _trigger_dep, + _trigger_vals, + _views, + _used_refs, + pidx, + _opts + ) do + {nil, [], pidx} + end + + defp build_disjuncts_sql( + plan, + disjunct_idxs, + trigger_dep, + trigger_vals, + views, + used_refs, + pidx, + opts + ) do + {sqls, params, next_pidx} = + Enum.reduce(disjunct_idxs, {[], [], pidx}, fn didx, {sqls, params, pi} -> + conj = Enum.at(plan.disjuncts, didx) + + {conj_sql, conj_params, next_pi} = + build_conjunction_sql( + plan, + conj, + trigger_dep, + trigger_vals, + views, + used_refs, + pi, + opts + ) + + {[conj_sql | sqls], params ++ conj_params, next_pi} + end) + + sql = + case Enum.reverse(sqls) do + [single] -> single + multiple -> Enum.map_join(multiple, " OR ", &"(#{&1})") + end + + {sql, params, next_pidx} + end + + defp build_conjunction_sql( + plan, + conj, + trigger_dep, + trigger_vals, + views, + used_refs, + pidx, + opts + ) do + {parts, params, next_pi} = + Enum.reduce(conj, {[], [], pidx}, fn {pos, polarity}, {parts, params, pi} -> + info = plan.positions[pos] + + {sql, ps, next_pi} = + position_to_sql(info, trigger_dep, trigger_vals, views, used_refs, pi) + + sql = + if polarity == :negated and not ignore_polarity_for_trigger?(info, trigger_dep, opts) do + "NOT (#{sql})" + else + sql + end + + {[sql | parts], params ++ ps, next_pi} + end) + + sql = parts |> Enum.reverse() |> Enum.join(" AND ") + {sql, params, next_pi} + end + + defp position_to_sql(%{is_subquery: false} = info, _, _, _, _, pidx) do + {info.sql, [], pidx} + end + + defp position_to_sql( + %{is_subquery: true, dependency_index: dep_idx} = info, + trigger_dep, + trigger_vals, + views, + used_refs, + pidx + ) do + lhs_sql = lhs_sql_from_ast(info.ast) + ref_type = Map.get(used_refs, info.subquery_ref) + + values = + if dep_idx == trigger_dep and trigger_vals != nil do + trigger_vals + else + Map.get(views, info.subquery_ref, MapSet.new()) |> MapSet.to_list() + end + + case ref_type do + {:array, {:row, col_types}} -> + casts = Enum.map(col_types, &Electric.Replication.Eval.type_to_pg_cast/1) + + params = + case values do + [] -> Enum.map(casts, fn _ -> [] end) + _ -> values |> Electric.Utils.unzip_any() |> Tuple.to_list() + end + + sql = + casts + |> Enum.with_index(pidx) + |> Enum.map_join(", ", fn {col, index} -> "$#{index}::#{col}[]" end) + |> then(&"#{lhs_sql} IN (SELECT * FROM unnest(#{&1}))") + + {sql, params, pidx + length(casts)} + + {:array, element_type} -> + type_cast = Electric.Replication.Eval.type_to_pg_cast(element_type) + sql = "#{lhs_sql} = ANY ($#{pidx}::#{type_cast}[])" + {sql, [values], pidx + 1} + end + end + + defp lhs_sql_from_ast(%Func{name: "sublink_membership_check", args: [testexpr, _]}) do + SqlGenerator.to_sql(testexpr) + end + + defp make_broadcast_hash(%{tag_columns: [_col]}, stack_id, shape_handle, value) do + Subqueries.make_value_hash(stack_id, shape_handle, value) + end + + defp make_broadcast_hash( + %{tag_columns: {:hash_together, cols}}, + stack_id, + shape_handle, + original_value + ) do + parts = + original_value + |> Tuple.to_list() + |> Enum.zip_with(cols, fn value, column -> + column <> ":" <> Subqueries.namespace_value(value) + end) + + Subqueries.make_value_hash_raw(stack_id, shape_handle, Enum.join(parts)) + end + + defp tag_slot_sql(%{is_subquery: true, tag_columns: [col]}, stack_id, shape_handle) do + col_sql = ~s["#{col}"::text] + namespaced = pg_namespace_value_sql(col_sql) + ~s[md5('#{stack_id}#{shape_handle}' || #{namespaced})] + end + + defp tag_slot_sql( + %{is_subquery: true, tag_columns: {:hash_together, cols}}, + stack_id, + shape_handle + ) do + column_parts = + Enum.map(cols, fn col_name -> + col = ~s["#{col_name}"::text] + ~s['#{col_name}:' || #{pg_namespace_value_sql(col)}] + end) + + ~s[md5('#{stack_id}#{shape_handle}' || #{Enum.join(column_parts, " || ")})] + end + + defp tag_slot_sql(%{is_subquery: false}, _stack_id, _shape_handle) do + "'1'" + end + + defp pg_namespace_value_sql(col_sql) do + ~s[CASE WHEN #{col_sql} IS NULL THEN '#{Subqueries.null_sentinel()}' ELSE '#{Subqueries.value_prefix()}' || #{col_sql} END] + end + + defp compute_inclusion(plan, active_conditions) do + Enum.any?(plan.disjuncts, fn conj -> + Enum.all?(conj, fn {pos, _polarity} -> + Enum.at(active_conditions, pos) + end) + end) + end + + defp do_compile(shape) do + with {:ok, decomposition} <- Decomposer.decompose(shape.where.eval) do + positions = enrich_positions(decomposition.subexpressions, shape) + + {:ok, + %__MODULE__{ + disjuncts: decomposition.disjuncts, + disjuncts_positions: decomposition.disjuncts_positions, + position_count: decomposition.position_count, + positions: positions, + dependency_positions: build_dependency_positions(positions), + dependency_disjuncts: build_dependency_disjuncts(decomposition.disjuncts, positions), + dependency_polarities: build_dependency_polarities(positions), + has_negated_subquery: has_negated_subquery?(positions) + }} + end + end + + defp enrich_positions(subexpressions, shape) do + Map.new(subexpressions, fn {pos, subexpr} -> + {dep_index, subquery_ref, tag_columns} = + if subexpr.is_subquery do + extract_subquery_info(subexpr.ast) + else + {nil, nil, nil} + end + + {pos, + %{ + ast: subexpr.ast, + sql: position_sql(subexpr.ast, subexpr.is_subquery, shape), + is_subquery: subexpr.is_subquery, + negated: subexpr.negated, + dependency_index: dep_index, + subquery_ref: subquery_ref, + tag_columns: tag_columns + }} + end) + end + + defp extract_subquery_info(%Func{ + name: "sublink_membership_check", + args: [testexpr, %Ref{path: path}] + }) do + dep_index = path |> List.last() |> String.to_integer() + + tag_columns = + case testexpr do + %Ref{path: [column_name]} -> + [column_name] + + %RowExpr{elements: elements} -> + {:hash_together, Enum.map(elements, fn %Ref{path: [col]} -> col end)} + end + + {dep_index, path, tag_columns} + end + + defp build_dependency_positions(positions) do + positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Enum.group_by(fn {_pos, info} -> info.dependency_index end, fn {pos, _} -> pos end) + |> Map.new(fn {idx, poses} -> {idx, Enum.sort(poses)} end) + end + + defp build_dependency_disjuncts(disjuncts, positions) do + disjuncts + |> Enum.with_index() + |> Enum.reduce(%{}, fn {conj, disjunct_idx}, acc -> + Enum.reduce(conj, acc, fn {pos, _polarity}, acc -> + case Map.get(positions, pos) do + %{is_subquery: true, dependency_index: idx} when not is_nil(idx) -> + Map.update(acc, idx, MapSet.new([disjunct_idx]), &MapSet.put(&1, disjunct_idx)) + + _ -> + acc + end + end) + end) + |> Map.new(fn {idx, set} -> {idx, set |> MapSet.to_list() |> Enum.sort()} end) + end + + defp build_dependency_polarities(positions) do + positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Enum.group_by( + fn {_pos, info} -> info.dependency_index end, + fn {_pos, info} -> info.negated end + ) + |> Map.new(fn {dep_index, negated_flags} -> + case Enum.uniq(negated_flags) do + [false] -> + {dep_index, :positive} + + [true] -> + {dep_index, :negated} + + mixed -> + raise ArgumentError, + "dependency #{dep_index} has inconsistent polarity across positions: #{inspect(mixed)}" + end + end) + end + + defp has_negated_subquery?(positions) do + Enum.any?(positions, fn {_pos, info} -> info.is_subquery and info.negated end) + end + + defp ignore_polarity_for_trigger?(info, trigger_dep, opts) do + Keyword.get(opts, :ignore_trigger_polarity?, false) and info.is_subquery and + info.dependency_index == trigger_dep + end + + defp position_sql(ast, false, _shape), do: SqlGenerator.to_sql(ast) + + defp position_sql( + %Func{name: "sublink_membership_check", args: [testexpr, %Ref{path: path}]}, + true, + shape + ) do + dep_index = path |> List.last() |> String.to_integer() + dependency = Enum.fetch!(shape.shape_dependencies, dep_index) + + selected_columns = + dependency.explicitly_selected_columns + |> Enum.map_join(", ", &Utils.quote_name/1) + + dependency_sql = + "SELECT " <> + selected_columns <> + " FROM " <> + Utils.relation_to_sql(dependency.root_table) <> + if(dependency.where, do: " WHERE " <> dependency.where.query, else: "") + + SqlGenerator.to_sql(testexpr) <> " IN (" <> dependency_sql <> ")" + end +end diff --git a/packages/sync-service/lib/electric/shapes/querying.ex b/packages/sync-service/lib/electric/shapes/querying.ex index ffd9220603..52b121cc42 100644 --- a/packages/sync-service/lib/electric/shapes/querying.ex +++ b/packages/sync-service/lib/electric/shapes/querying.ex @@ -2,20 +2,29 @@ defmodule Electric.Shapes.Querying do alias Electric.ShapeCache.LogChunker alias Electric.Utils alias Electric.Shapes.Shape - alias Electric.Shapes.Shape.SubqueryMoves + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.DnfPlan alias Electric.Telemetry.OpenTelemetry - @value_prefix SubqueryMoves.value_prefix() - @null_sentinel SubqueryMoves.null_sentinel() + @value_prefix Subqueries.value_prefix() + @null_sentinel Subqueries.null_sentinel() - def query_move_in(conn, stack_id, shape_handle, shape, {where, params}) do + def query_move_in(conn, stack_id, shape_handle, shape, {where, params}, opts \\ []) do table = Utils.relation_to_sql(shape.root_table) - {json_like_select, _} = - json_like_select(shape, %{"is_move_in" => true}, stack_id, shape_handle) + metadata = + metadata_sql( + shape, + stack_id, + shape_handle, + opts |> Keyword.put(:start_param_idx, length(params) + 1) + ) + + {json_like_select, metadata_params} = + json_like_select(shape, %{"is_move_in" => true}, stack_id, shape_handle, metadata) key_select = key_select(shape) - tag_select = make_tags(shape, stack_id, shape_handle) |> Enum.join(", ") + tag_select = Enum.join(metadata.tags_sqls, ", ") query = Postgrex.prepare!( @@ -24,7 +33,7 @@ defmodule Electric.Shapes.Querying do ~s|SELECT #{key_select}, ARRAY[#{tag_select}]::text[], #{json_like_select} FROM #{table} WHERE #{where}| ) - Postgrex.stream(conn, query, params) + Postgrex.stream(conn, query, params ++ metadata_params) |> Stream.flat_map(& &1.rows) end @@ -51,7 +60,10 @@ defmodule Electric.Shapes.Querying do limit = if limit = subset.limit, do: " LIMIT #{limit}", else: "" offset = if offset = subset.offset, do: " OFFSET #{offset}", else: "" - {json_like_select, params} = json_like_select(shape, headers, stack_id, shape_handle) + metadata = metadata_sql(shape, stack_id, shape_handle) + + {json_like_select, params} = + json_like_select(shape, headers, stack_id, shape_handle, metadata) query = Postgrex.prepare!( @@ -120,7 +132,8 @@ defmodule Electric.Shapes.Querying do where = if not is_nil(shape.where), do: " WHERE " <> shape.where.query, else: "" - {json_like_select, params} = json_like_select(shape, [], stack_id, shape_handle) + metadata = metadata_sql(shape, stack_id, shape_handle) + {json_like_select, params} = json_like_select(shape, [], stack_id, shape_handle, metadata) query = Postgrex.prepare!(conn, table, ~s|SELECT #{json_like_select} FROM #{table} #{where}|) @@ -177,13 +190,13 @@ defmodule Electric.Shapes.Querying do selected_columns: columns } = shape, additional_headers, - stack_id, - shape_handle + _stack_id, + _shape_handle, + metadata ) do - tags = make_tags(shape, stack_id, shape_handle) key_part = build_key_part(shape) value_part = build_value_part(columns) - headers_part = build_headers_part(root_table, additional_headers, tags) + headers_part = build_headers_part(root_table, additional_headers, metadata) # We're building a JSON string that looks like this: # @@ -200,13 +213,13 @@ defmodule Electric.Shapes.Querying do query = ~s['{' || #{key_part} || ',' || #{value_part} || ',' || #{headers_part} || '}'] - {query, []} + {query, metadata.params} end - defp build_headers_part(rel, headers, tags) when is_list(headers), - do: build_headers_part(rel, Map.new(headers), tags) + defp build_headers_part(rel, headers, metadata) when is_list(headers), + do: build_headers_part(rel, Map.new(headers), metadata) - defp build_headers_part({relation, table}, additional_headers, tags) do + defp build_headers_part({relation, table}, additional_headers, metadata) do headers = %{operation: "insert", relation: [relation, table]} headers = @@ -216,12 +229,11 @@ defmodule Electric.Shapes.Querying do |> Utils.escape_quotes(?') headers = - if tags != [] do + if metadata.tags_sqls != [] do "{" <> json = headers - active_conditions = List.duplicate(true, length(tags)) |> Jason.encode!() - tags = Enum.join(tags, ~s[ || '","' || ]) - ~s/{"active_conditions":#{active_conditions},"tags":["' || #{tags} || '"],/ <> json + ~s/{"active_conditions":#{active_conditions_json_expr(metadata)},"tags":#{tags_json_expr(metadata.tags_sqls)},/ <> + json else headers end @@ -279,8 +291,66 @@ defmodule Electric.Shapes.Querying do defp pg_escape_string_for_json(str), do: ~s[to_json(#{str})::text] defp pg_coalesce_json_string(str), do: ~s[coalesce(#{str} , 'null')] + defp metadata_sql(shape, stack_id, shape_handle, opts \\ []) do + case dnf_plan_for_metadata(shape, opts) do + %DnfPlan{} = plan -> + tags_sqls = DnfPlan.tags_sql(plan, stack_id, shape_handle) + + {active_conditions_sqls, params} = + case Keyword.get(opts, :views) do + nil -> + {DnfPlan.active_conditions_sql(plan), []} + + views -> + {sqls, params, _next_idx} = + DnfPlan.active_conditions_sql_for_views( + plan, + views, + shape.where.used_refs, + Keyword.get(opts, :start_param_idx, 1) + ) + + {sqls, params} + end + + %{tags_sqls: tags_sqls, active_conditions_sqls: active_conditions_sqls, params: params} + + nil -> + %{ + tags_sqls: make_tags(shape, stack_id, shape_handle), + active_conditions_sqls: nil, + params: [] + } + end + end + + defp dnf_plan_for_metadata(shape, opts) do + case Keyword.get(opts, :dnf_plan) do + %DnfPlan{} = plan -> + plan + + nil -> + case DnfPlan.compile(shape) do + {:ok, %DnfPlan{} = plan} -> plan + _ -> nil + end + end + end + + defp active_conditions_json_expr(%{active_conditions_sqls: nil, tags_sqls: tags_sqls}) do + List.duplicate(true, length(tags_sqls)) |> Jason.encode!() + end + + defp active_conditions_json_expr(%{active_conditions_sqls: sqls}) do + "' || to_json(ARRAY[" <> Enum.join(sqls, ", ") <> "]::boolean[])::text || '" + end + + defp tags_json_expr(tags_sqls) do + "' || to_json(ARRAY[" <> Enum.join(tags_sqls, ", ") <> "]::text[])::text || '" + end + # Generates SQL to namespace a value for tag hashing. - # This MUST produce identical output to SubqueryMoves.namespace_value/1 for + # This MUST produce identical output to Subqueries.namespace_value/1 for # the same input values, or Elixir-side and SQL-side tag computation will diverge. defp pg_namespace_value_sql(col_sql) do ~s[CASE WHEN #{col_sql} IS NULL THEN '#{@null_sentinel}' ELSE '#{@value_prefix}' || #{col_sql} END] diff --git a/packages/sync-service/lib/electric/shapes/shape.ex b/packages/sync-service/lib/electric/shapes/shape.ex index e55305738d..aefbed88e0 100644 --- a/packages/sync-service/lib/electric/shapes/shape.ex +++ b/packages/sync-service/lib/electric/shapes/shape.ex @@ -2,11 +2,13 @@ defmodule Electric.Shapes.Shape do @moduledoc """ Struct describing the requested shape """ - alias Electric.Shapes.Shape.SubqueryMoves + alias Electric.Shapes.Consumer.Subqueries alias Electric.Replication.Eval.Expr alias Electric.Postgres.Inspector alias Electric.Replication.Eval.Parser + alias Electric.Replication.Eval.Walker alias Electric.Replication.Changes + alias Electric.Shapes.DnfPlan alias Electric.Shapes.WhereClause alias Electric.Utils alias Electric.Shapes.Shape.Validators @@ -265,7 +267,7 @@ defmodule Electric.Shapes.Shape do end defp fill_tag_structure(shape) do - {tag_structure, comparison_expressions} = SubqueryMoves.move_in_tag_structure(shape) + {tag_structure, comparison_expressions} = Subqueries.move_in_tag_structure(shape) %{ shape @@ -280,16 +282,20 @@ defmodule Electric.Shapes.Shape do with {:ok, where} <- Parser.parse_query(where), {:ok, subqueries} <- Parser.extract_subqueries(where), :ok <- check_feature_flag(subqueries, opts), - {:ok, shape_dependencies} <- build_shape_dependencies(subqueries, opts), + {:ok, shape_dependencies, sublink_dependency_indexes} <- + build_shape_dependencies(subqueries, opts), {:ok, dependency_refs} <- build_dependency_refs(shape_dependencies, inspector), - all_refs = Map.merge(refs, dependency_refs), + all_refs = + Map.merge(refs, expand_dependency_refs(dependency_refs, sublink_dependency_indexes)), :ok <- Validators.validate_parameters(opts[:params]), {:ok, where} <- Parser.validate_where_ast(where, params: opts[:params], refs: all_refs, - sublink_queries: extract_sublink_queries(shape_dependencies) + sublink_queries: + expand_sublink_queries(shape_dependencies, sublink_dependency_indexes) ), + {:ok, where} <- canonicalize_where_sublink_refs(where, sublink_dependency_indexes), {:ok, where} <- Validators.validate_where_return_type(where) do {:ok, where, shape_dependencies} else @@ -317,13 +323,42 @@ defmodule Electric.Shapes.Shape do defp build_shape_dependencies(subqueries, opts) do shared_opts = Map.drop(opts, [:where, :columns, :relation]) - Utils.map_while_ok(subqueries, fn subquery -> - shared_opts - |> Map.put(:select, subquery) - |> Map.put(:autofill_pk_select?, true) - |> Map.put(:log_mode, :full) - |> new() + subqueries + |> Enum.with_index() + |> Utils.reduce_while_ok({[], %{}, %{}}, fn {subquery, occurrence_idx}, + {shape_dependencies, dependency_index_by_shape, + occurrence_to_dependency} -> + with {:ok, shape_dependency} <- + shared_opts + |> Map.put(:select, subquery) + |> Map.put(:autofill_pk_select?, true) + |> Map.put(:log_mode, :full) + |> new() do + comparable_shape = comparable(shape_dependency) + + case dependency_index_by_shape do + %{^comparable_shape => dependency_idx} -> + {:ok, + {shape_dependencies, dependency_index_by_shape, + Map.put(occurrence_to_dependency, occurrence_idx, dependency_idx)}} + + %{} -> + dependency_idx = length(shape_dependencies) + + {:ok, + {shape_dependencies ++ [shape_dependency], + Map.put(dependency_index_by_shape, comparable_shape, dependency_idx), + Map.put(occurrence_to_dependency, occurrence_idx, dependency_idx)}} + end + end end) + |> case do + {:ok, {shape_dependencies, _dependency_index_by_shape, occurrence_to_dependency}} -> + {:ok, shape_dependencies, occurrence_to_dependency} + + {:error, reason} -> + {:error, reason} + end end defp build_dependency_refs(shape_dependencies, inspector) do @@ -351,6 +386,46 @@ defmodule Electric.Shapes.Shape do end) end + defp expand_dependency_refs(dependency_refs, sublink_dependency_indexes) do + Map.new(sublink_dependency_indexes, fn {occurrence_idx, dependency_idx} -> + {["$sublink", "#{occurrence_idx}"], + Map.fetch!(dependency_refs, ["$sublink", "#{dependency_idx}"])} + end) + end + + defp expand_sublink_queries(shape_dependencies, sublink_dependency_indexes) do + canonical_queries = extract_sublink_queries(shape_dependencies) + + Map.new(sublink_dependency_indexes, fn {occurrence_idx, dependency_idx} -> + {occurrence_idx, Map.fetch!(canonical_queries, dependency_idx)} + end) + end + + defp canonicalize_where_sublink_refs(%Expr{} = where, sublink_dependency_indexes) do + with {:ok, eval} <- + Walker.fold( + where.eval, + fn + %Parser.Ref{path: ["$sublink", idx]} = ref, _children, occurrence_to_dependency -> + dependency_idx = + occurrence_to_dependency + |> Map.fetch!(String.to_integer(idx)) + |> Integer.to_string() + + {:ok, %{ref | path: ["$sublink", dependency_idx]}} + + node, children, _occurrence_to_dependency when map_size(children) == 0 -> + {:ok, node} + + node, children, _occurrence_to_dependency -> + {:ok, Map.merge(node, children)} + end, + sublink_dependency_indexes + ) do + {:ok, %{where | eval: eval, used_refs: Parser.find_refs(eval)}} + end + end + defp extract_sublink_queries(shapes) do Enum.with_index(shapes, fn %__MODULE__{} = shape, i -> columns = Enum.map_join(shape.explicitly_selected_columns, ", ", &Utils.quote_name/1) @@ -545,17 +620,20 @@ defmodule Electric.Shapes.Shape do Updates, on the other hand, may be converted to an "new record" or a "deleted record" if the previous/new version of the updated row isn't in the shape. """ - def convert_change(shape, change, opts \\ []) + def convert_change(shape, change, opts \\ []) do + opts = Map.new(opts) + do_convert_change(shape, change, opts) + end - def convert_change(%__MODULE__{root_table: table}, %{relation: relation}, _) - when table != relation, - do: [] + defp do_convert_change(%__MODULE__{root_table: table}, %{relation: relation}, _) + when table != relation, + do: [] - def convert_change( - %__MODULE__{where: nil, flags: %{selects_all_columns: true}} = shape, - change, - opts - ) do + defp do_convert_change( + %__MODULE__{where: nil, flags: %{selects_all_columns: true}} = shape, + change, + opts + ) do # If the change actually doesn't change any columns, we can skip it - this is possible on Postgres but we don't care for those. if is_struct(change, Changes.UpdatedRecord) and change.changed_columns == MapSet.new() do [] @@ -564,64 +642,129 @@ defmodule Electric.Shapes.Shape do end end - def convert_change(%__MODULE__{}, %Changes.TruncatedRelation{} = change, _), do: [change] + defp do_convert_change(%__MODULE__{}, %Changes.TruncatedRelation{} = change, _), + do: [change] - def convert_change( - %__MODULE__{where: where, selected_columns: selected_columns} = shape, - change, - opts - ) - when is_struct(change, Changes.NewRecord) - when is_struct(change, Changes.DeletedRecord) do - record = if is_struct(change, Changes.NewRecord), do: change.record, else: change.old_record + defp do_convert_change( + %__MODULE__{selected_columns: selected_columns} = shape, + %Changes.NewRecord{record: record} = change, + opts + ) do + {_old_refs, new_refs} = opts[:extra_refs] || {%{}, %{}} - # This is a pre-image and post-image of the value sets for subqueries. - # In case of a new record, we use the post-image, because we'll need to see the record, - # but in case of a deleted record, we use the pre-image, because we've never seen an insert - extra_refs = opts[:extra_refs] || {%{}, %{}} + case project_row_metadata(shape, record, new_refs, opts) do + {:ok, true, metadata} -> + [change |> put_row_metadata(metadata) |> filter_change_columns(selected_columns)] - used_extra_refs = - if is_struct(change, Changes.NewRecord), do: elem(extra_refs, 1), else: elem(extra_refs, 0) + {:ok, false, _metadata} -> + [] + end + end - if WhereClause.includes_record?(where, record, used_extra_refs) do - change - |> fill_move_tags(shape, opts[:stack_id], opts[:shape_handle]) - |> filter_change_columns(selected_columns) - |> List.wrap() - else - [] + defp do_convert_change( + %__MODULE__{selected_columns: selected_columns} = shape, + %Changes.DeletedRecord{old_record: record} = change, + opts + ) do + {old_refs, _new_refs} = opts[:extra_refs] || {%{}, %{}} + + case project_row_metadata(shape, record, old_refs, opts) do + {:ok, true, metadata} -> + [change |> put_row_metadata(metadata) |> filter_change_columns(selected_columns)] + + {:ok, false, _metadata} -> + [] end end - def convert_change( - %__MODULE__{where: where, selected_columns: selected_columns} = shape, - %Changes.UpdatedRecord{old_record: old_record, record: record} = change, - opts - ) do - {extra_refs_old, extra_refs_new} = opts[:extra_refs] || {%{}, %{}} - old_record_in_shape = WhereClause.includes_record?(where, old_record, extra_refs_old) - new_record_in_shape = WhereClause.includes_record?(where, record, extra_refs_new) + defp do_convert_change( + %__MODULE__{selected_columns: selected_columns} = shape, + %Changes.UpdatedRecord{old_record: old_record, record: record} = change, + opts + ) do + {old_refs, new_refs} = opts[:extra_refs] || {%{}, %{}} + + {:ok, old_included?, old_metadata} = project_row_metadata(shape, old_record, old_refs, opts) + {:ok, new_included?, new_metadata} = project_row_metadata(shape, record, new_refs, opts) converted_changes = - case {old_record_in_shape, new_record_in_shape} do - {true, true} -> [change] - {true, false} -> [Changes.convert_update(change, to: :deleted_record)] - {false, true} -> [Changes.convert_update(change, to: :new_record)] + case {old_included?, new_included?} do + {true, true} -> + [ + put_updated_metadata(change, new_metadata, + removed_move_tags: old_metadata.move_tags -- new_metadata.move_tags + ) + ] + + {true, false} -> + [ + Changes.convert_update(change, to: :deleted_record) + |> put_row_metadata(old_metadata) + ] + + {false, true} -> + [ + Changes.convert_update(change, to: :new_record) + |> put_row_metadata(new_metadata) + ] + {false, false} -> [] end converted_changes - |> Enum.map(&fill_move_tags(&1, shape, opts[:stack_id], opts[:shape_handle])) |> Enum.map(&filter_change_columns(&1, selected_columns)) |> Enum.filter(&should_keep_change?/1) end + defp project_row_metadata( + %__MODULE__{where: where}, + record, + refs, + %{dnf_plan: %DnfPlan{} = dnf_plan, stack_id: stack_id, shape_handle: shape_handle} + ) do + case DnfPlan.get_row_metadata(dnf_plan, record, refs, where, stack_id, shape_handle) do + {:ok, included?, move_tags, active_conditions} -> + {:ok, included?, %{move_tags: move_tags, active_conditions: active_conditions}} + end + end + + defp project_row_metadata( + %__MODULE__{where: where, tag_structure: tag_structure}, + record, + refs, + opts + ) do + {:ok, + WhereClause.includes_record?(where, record, refs), + %{ + move_tags: make_tags_from_pattern(tag_structure, record, opts[:stack_id], opts[:shape_handle]), + active_conditions: make_active_conditions(tag_structure) + }} + end + defp filter_change_columns(change, nil), do: change defp filter_change_columns(change, selected_columns) do Changes.filter_columns(change, selected_columns) end + defp put_row_metadata(change, %{move_tags: move_tags, active_conditions: active_conditions}) do + %{change | move_tags: move_tags, active_conditions: active_conditions} + end + + defp put_updated_metadata( + change, + %{move_tags: move_tags, active_conditions: active_conditions}, + opts + ) do + %{ + change + | move_tags: move_tags, + removed_move_tags: Keyword.get(opts, :removed_move_tags, []), + active_conditions: active_conditions + } + end + def fill_move_tags(change, %__MODULE__{tag_structure: []}, _, _), do: change def fill_move_tags(%Changes.NewRecord{move_tags: [_ | _]} = change, _, _, _), do: change @@ -680,22 +823,22 @@ defmodule Electric.Shapes.Shape do } end - defp make_active_conditions([]), do: nil + defp make_active_conditions([]), do: [] defp make_active_conditions(tag_structure), do: List.duplicate(true, length(tag_structure)) defp make_tags_from_pattern(patterns, record, stack_id, shape_handle) do Enum.map(patterns, fn pattern -> Enum.map(pattern, fn column_name when is_binary(column_name) -> - SubqueryMoves.make_value_hash(stack_id, shape_handle, Map.get(record, column_name)) + Subqueries.make_value_hash(stack_id, shape_handle, Map.get(record, column_name)) {:hash_together, columns} -> column_parts = Enum.map(columns, fn col -> - col <> ":" <> SubqueryMoves.namespace_value(Map.get(record, col)) + col <> ":" <> Subqueries.namespace_value(Map.get(record, col)) end) - SubqueryMoves.make_value_hash_raw(stack_id, shape_handle, Enum.join(column_parts)) + Subqueries.make_value_hash_raw(stack_id, shape_handle, Enum.join(column_parts)) end) |> Enum.join("/") end) @@ -704,6 +847,13 @@ defmodule Electric.Shapes.Shape do defp should_keep_change?(%Changes.UpdatedRecord{removed_move_tags: removed_move_tags}) when removed_move_tags != [], do: true + defp should_keep_change?(%Changes.UpdatedRecord{ + old_record: record, + record: record, + active_conditions: [_ | _] + }), + do: true + defp should_keep_change?(%Changes.UpdatedRecord{old_record: record, record: record}), do: false diff --git a/packages/sync-service/lib/electric/shapes/shape/subquery_moves.ex b/packages/sync-service/lib/electric/shapes/shape/subquery_moves.ex deleted file mode 100644 index cba66aecb2..0000000000 --- a/packages/sync-service/lib/electric/shapes/shape/subquery_moves.ex +++ /dev/null @@ -1,208 +0,0 @@ -defmodule Electric.Shapes.Shape.SubqueryMoves do - @moduledoc false - alias Electric.Replication.Eval - alias Electric.Replication.Eval.Walker - alias Electric.Shapes.Shape - - @value_prefix "v:" - @null_sentinel "NULL" - - def value_prefix, do: @value_prefix - def null_sentinel, do: @null_sentinel - - @doc """ - Given a shape with a where clause that contains a subquery, make a query that can use a - list of value in place of the subquery. - - When we're querying for new data, we're only querying for a subset of entire query. - To make that, we need to replace the subquery with a list of values. - - For example, if the shape has a where clause like this: - - ~S|WHERE parent_id IN (SELECT id FROM parent WHERE value = '1')| - - And we're querying for new data with a list of values like this: - - ["1", "2", "3"] - - Then the query will be transformed to: - - ~S|WHERE parent_id = ANY ($1::text[]::int8[])| - - And the parameters will be: - - [["1", "2", "3"]] - """ - def move_in_where_clause( - %Shape{ - where: %{query: query, used_refs: used_refs}, - shape_dependencies: shape_dependencies, - shape_dependencies_handles: shape_dependencies_handles - }, - shape_handle, - move_ins - ) do - index = Enum.find_index(shape_dependencies_handles, &(&1 == shape_handle)) - target_section = Enum.at(shape_dependencies, index) |> rebuild_subquery_section() - - case used_refs[["$sublink", "#{index}"]] do - {:array, {:row, cols}} -> - unnest_sections = - cols - |> Enum.map(&Electric.Replication.Eval.type_to_pg_cast/1) - |> Enum.with_index(fn col, index -> "$#{index + 1}::text[]::#{col}[]" end) - |> Enum.join(", ") - - {String.replace(query, target_section, "IN (SELECT * FROM unnest(#{unnest_sections}))"), - Electric.Utils.unzip_any(move_ins) |> Tuple.to_list()} - - col -> - type = Electric.Replication.Eval.type_to_pg_cast(col) - {String.replace(query, target_section, "= ANY ($1::text[]::#{type})"), [move_ins]} - end - end - - defp rebuild_subquery_section(shape) do - base = - ~s|IN (SELECT #{Enum.join(shape.explicitly_selected_columns, ", ")} FROM #{Electric.Utils.relation_to_sql(shape.root_table)}| - - where = if shape.where, do: " WHERE #{shape.where.query}", else: "" - base <> where <> ")" - end - - @doc """ - Generate a tag-removal control message for a shape. - - Patterns are a list of lists, where each inner list represents a pattern (and is functionally a tuple, but - JSON can't directly represent tuples). This pattern is filled with actual values that have been removed. - """ - @spec make_move_out_control_message(Shape.t(), String.t(), String.t(), [ - {dep_handle :: String.t(), gone_values :: String.t()}, - ... - ]) :: map() - # Stub guard to allow only one dependency for now. - def make_move_out_control_message(shape, stack_id, shape_handle, [_] = move_outs) do - %{ - headers: %{ - event: "move-out", - patterns: - Enum.flat_map(move_outs, &make_move_out_pattern(shape, stack_id, shape_handle, &1)) - } - } - end - - # This is a stub implementation valid only for when there is exactly one dependency. - defp make_move_out_pattern( - %{tag_structure: patterns}, - stack_id, - shape_handle, - {_dep_handle, gone_values} - ) do - # TODO: This makes the assumption of only one column per pattern. - Enum.flat_map(patterns, fn [column_or_expr] -> - case column_or_expr do - column_name when is_binary(column_name) -> - Enum.map( - gone_values, - &%{pos: 0, value: make_value_hash(stack_id, shape_handle, elem(&1, 1))} - ) - - {:hash_together, columns} -> - column_parts = - &(Enum.zip_with(&1, columns, fn value, column -> - column <> ":" <> namespace_value(value) - end) - |> Enum.join()) - - Enum.map( - gone_values, - &%{ - pos: 0, - value: - make_value_hash_raw( - stack_id, - shape_handle, - column_parts.(Tuple.to_list(elem(&1, 1))) - ) - } - ) - end - end) - end - - def make_value_hash(stack_id, shape_handle, value) do - make_value_hash_raw(stack_id, shape_handle, namespace_value(value)) - end - - @doc """ - Hash a pre-namespaced value. Use `make_value_hash/3` for single values that need namespacing. - """ - def make_value_hash_raw(stack_id, shape_handle, namespaced_value) do - :crypto.hash(:md5, "#{stack_id}#{shape_handle}#{namespaced_value}") - |> Base.encode16(case: :lower) - end - - @doc """ - Namespace a value for hashing. - - To distinguish NULL from the literal string 'NULL', values are prefixed with - 'v:' and NULL becomes 'NULL' (no prefix). This MUST match the SQL logic in - `Querying.pg_namespace_value_sql/1` - see lib/electric/shapes/querying.ex. - """ - def namespace_value(nil), do: @null_sentinel - def namespace_value(value), do: @value_prefix <> value - - @doc """ - Generate a tag structure for a shape. - - A tag structure is a list of lists, where each inner list represents a tag (and is functionally a tuple, but - JSON can't directly represent tuples). The structure is used to generate actual tags for each row, that act - as a refenence as to why this row is part of the shape. - - Tag structure then is essentially a list of column names in correct positions that will get filled in - with actual values from the row - """ - @spec move_in_tag_structure(Shape.t()) :: - list(list(String.t() | {:hash_together, [String.t(), ...]})) - def move_in_tag_structure(%Shape{} = shape) - when is_nil(shape.where) - when shape.shape_dependencies == [], - do: {[], %{}} - - def move_in_tag_structure(shape) do - # TODO: For multiple subqueries this should be a DNF form - # and this walking overrides the comparison expressions - {:ok, {tag_structure, comparison_expressions}} = - Walker.reduce( - shape.where.eval, - fn - %Eval.Parser.Func{name: "sublink_membership_check", args: [testexpr, sublink_ref]}, - {[current_tag | others], comparison_expressions}, - _ -> - tags = - case testexpr do - %Eval.Parser.Ref{path: [column_name]} -> - [[column_name | current_tag] | others] - - %Eval.Parser.RowExpr{elements: elements} -> - elements = - Enum.map(elements, fn %Eval.Parser.Ref{path: [column_name]} -> - column_name - end) - - [[{:hash_together, elements} | current_tag] | others] - end - - {:ok, {tags, Map.put(comparison_expressions, sublink_ref.path, testexpr)}} - - _, acc, _ -> - {:ok, acc} - end, - {[[]], %{}} - ) - - comparison_expressions - |> Map.new(fn {path, expr} -> {path, Eval.Expr.wrap_parser_part(expr)} end) - |> then(&{tag_structure, &1}) - end -end diff --git a/packages/sync-service/test/electric/plug/router_test.exs b/packages/sync-service/test/electric/plug/router_test.exs index 55840e328f..952ff66aeb 100644 --- a/packages/sync-service/test/electric/plug/router_test.exs +++ b/packages/sync-service/test/electric/plug/router_test.exs @@ -2315,7 +2315,13 @@ defmodule Electric.Plug.RouterTest do :crypto.hash(:md5, stack_id <> req.handle <> "v:2") |> Base.encode16(case: :lower) - assert {_, 200, [data, %{"headers" => %{"control" => "snapshot-end"}}, up_to_date_ctl()]} = + assert {_, 200, + [ + %{"headers" => %{"event" => "move-in"}}, + data, + %{"headers" => %{"control" => "snapshot-end"}}, + up_to_date_ctl() + ]} = Task.await(task) assert %{"id" => "2", "parent_id" => "2", "value" => "20"} = data["value"] @@ -2330,9 +2336,10 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, excluded) VALUES (1, false), (2, true)", "INSERT INTO child (id, parent_id, value) VALUES (1, 1, 10), (2, 2, 20)" ] - test "NOT IN subquery should return 409 on move-in to subquery", %{ + test "NOT IN subquery emits a move-out when a dependency value moves in", %{ opts: opts, - db_conn: db_conn + db_conn: db_conn, + stack_id: stack_id } do # Child rows where parent_id is NOT IN the set of excluded parents # Initially: parent 1 is not excluded, so child 1 is in the shape @@ -2354,13 +2361,61 @@ defmodule Electric.Plug.RouterTest do task = live_shape_req(req, opts) + tag = + :crypto.hash(:md5, stack_id <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + # Now set parent 1 to excluded = true # This causes parent 1 to move INTO the subquery result # Which should cause child 1 to move OUT of the outer shape - # Since NOT IN subquery move-out isn't implemented, we expect a 409 Postgrex.query!(db_conn, "UPDATE parent SET excluded = true WHERE id = 1", []) - assert {_req, 409, _response} = Task.await(task) + assert {_req, 200, [data, up_to_date_ctl()]} = Task.await(task) + + assert %{ + "headers" => %{ + "event" => "move-out", + "patterns" => [%{"pos" => 0, "value" => ^tag}] + } + } = data + end + + @tag with_sql: [ + "CREATE TABLE parent (id INT PRIMARY KEY, excluded BOOLEAN NOT NULL DEFAULT FALSE)", + "CREATE TABLE child (id INT PRIMARY KEY, parent_id INT NOT NULL REFERENCES parent(id), value INT NOT NULL)", + "INSERT INTO parent (id, excluded) VALUES (1, true), (2, true)", + "INSERT INTO child (id, parent_id, value) VALUES (1, 1, 10), (2, 2, 20)" + ] + test "NOT IN subquery emits a move-in query when a dependency value moves out", %{ + opts: opts, + db_conn: db_conn, + stack_id: stack_id + } do + req = + make_shape_req("child", + where: "parent_id NOT IN (SELECT id FROM parent WHERE excluded = true)" + ) + + assert {req, 200, [%{"headers" => %{"control" => "snapshot-end"}}]} = shape_req(req, opts) + + task = live_shape_req(req, opts) + + Postgrex.query!(db_conn, "UPDATE parent SET excluded = false WHERE id = 1", []) + + tag = + :crypto.hash(:md5, stack_id <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + + assert {_req, 200, + [ + %{"headers" => %{"event" => "move-in"}}, + data, + %{"headers" => %{"control" => "snapshot-end"}}, + up_to_date_ctl() + ]} = Task.await(task) + + assert %{"id" => "1", "parent_id" => "1", "value" => "10"} = data["value"] + assert %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag]} = data["headers"] end @tag with_sql: [ @@ -2413,6 +2468,7 @@ defmodule Electric.Plug.RouterTest do assert {_, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{"value" => %{"id" => "2", "other_value" => "4"}}, %{"headers" => %{"control" => "snapshot-end"}}, up_to_date_ctl() @@ -2460,6 +2516,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "value" => %{"id" => "2", "value" => "20"}, "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [tag]} @@ -2491,7 +2548,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, include_parent) VALUES (1, true)", "INSERT INTO child (id, parent_id, include_child) VALUES (1, 1, true)" ] - test "subquery combined with OR should return a 409 on move-out", %{ + test "subquery combined with OR handles move-out via DNF without invalidation", %{ opts: opts, db_conn: db_conn } do @@ -2505,16 +2562,32 @@ defmodule Electric.Plug.RouterTest do # Should contain the data record and the snapshot-end control message assert length(response) == 2 - assert %{"value" => %{"id" => "1", "include_child" => "true"}} = + tag = + :crypto.hash(:md5, opts[:stack_id] <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + + assert %{ + "value" => %{"id" => "1", "include_child" => "true"}, + "headers" => %{ + "tags" => [^tag <> "/", "/1"], + "active_conditions" => [true, true] + } + } = Enum.find(response, &Map.has_key?(&1, "key")) task = live_shape_req(req, opts) - # Setting include_parent to false may cause a move out, but it doesn't in this case because include_child is still true + # Setting include_parent to false causes a move out on the subquery position, + # but the row stays because include_child is still true (second disjunct). + # With DNF runtime, this is handled as a position flip, not invalidation. Postgrex.query!(db_conn, "UPDATE parent SET include_parent = false WHERE id = 1", []) - # Rather than working out whether this is a move out or not we return a 409 - assert {_req, 409, _response} = Task.await(task) + assert {_req, 200, response} = Task.await(task) + + assert [%{"headers" => %{"event" => "move-out", "patterns" => [%{"pos" => 0}]}}] = + Enum.filter(response, &match?(%{"headers" => %{"event" => _}}, &1)) + + refute Enum.any?(response, &Map.has_key?(&1, "key")) end @tag with_sql: [ @@ -2523,7 +2596,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, include_parent) VALUES (1, false)", "INSERT INTO child (id, parent_id, include_child) VALUES (1, 1, true)" ] - test "subquery combined with OR should return a 409 on move-in", %{ + test "subquery combined with OR handles move-in via DNF without invalidation", %{ opts: opts, db_conn: db_conn } do @@ -2537,16 +2610,34 @@ defmodule Electric.Plug.RouterTest do # Should contain the data record and the snapshot-end control message assert length(response) == 2 - assert %{"value" => %{"id" => "1", "include_child" => "true"}} = + tag = + :crypto.hash(:md5, opts[:stack_id] <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + + assert %{ + "value" => %{"id" => "1", "include_child" => "true"}, + "headers" => %{ + "tags" => [^tag <> "/", "/1"], + "active_conditions" => [false, true] + } + } = Enum.find(response, &Map.has_key?(&1, "key")) task = live_shape_req(req, opts) - # Setting include_parent to true may cause a move in, but it doesn't in this case because include_child is already true + # Setting include_parent to true causes a move in on the subquery position. + # The row is already present via include_child = true (second disjunct). + # With DNF runtime, the move-in is handled as a position flip, not invalidation. Postgrex.query!(db_conn, "UPDATE parent SET include_parent = true WHERE id = 1", []) - # Rather than working out whether this is a move in or not we return a 409 - assert {_req, 409, _response} = Task.await(task) + assert {_req, 200, response} = Task.await(task) + + # Move-in control message with move-in query rows + move_in_events = + Enum.filter(response, &match?(%{"headers" => %{"event" => "move-in"}}, &1)) + + assert length(move_in_events) >= 1 + refute Enum.any?(response, &Map.has_key?(&1, "key")) end @tag with_sql: [ @@ -2557,7 +2648,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, grandparent_id, include_parent) VALUES (1, 1, true)", "INSERT INTO child (id, parent_id) VALUES (1, 1)" ] - test "nested subquery combined with OR should return a 409 on move-in", %{ + test "nested subquery combined with OR handles move-in via DNF without invalidation", %{ opts: opts, db_conn: db_conn } do @@ -2576,15 +2667,25 @@ defmodule Electric.Plug.RouterTest do task = live_shape_req(req, opts) - # Setting include_grandparent to true may cause a move in, but it doesn't in this case because include_parent is already true + # Setting include_grandparent to true causes a move-in in the inner (parent) + # shape's subquery, but parent 1 is already in the result set because + # include_parent = true. With DNF on the inner shape, this is handled as a + # position flip on the inner shape — no new rows enter or leave. + # The outer (child) shape sees no change and stays live. Postgrex.query!( db_conn, "UPDATE grandparent SET include_grandparent = true WHERE id = 1", [] ) - # Rather than working out whether this is a move in or not we return a 409 - assert {_req, 409, _response} = Task.await(task) + # The inner shape handles the move-in via DNF. The outer shape's dependency + # doesn't change, so no move event is triggered on the outer shape. + # The live request should receive the inner shape's move-in broadcast + # (propagated via the dependency materializer). + assert {_req, 200, response} = Task.await(task) + + # Verify we got a response (move-in on inner shape, no invalidation) + assert is_list(response) end @tag with_sql: [ @@ -2628,6 +2729,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "value" => %{"id" => "2", "name" => "Team B"}, "headers" => %{"tags" => [^tag], "is_move_in" => true} @@ -2705,6 +2807,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"tags" => [^tag]}, "value" => %{"id" => "2", "role" => "Member"} @@ -2770,12 +2873,16 @@ defmodule Electric.Plug.RouterTest do task = live_shape_req(req, ctx.opts) Postgrex.query!(ctx.db_conn, "UPDATE parent SET other_value = 10 WHERE id = 2") - tag = + tag_hash = :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:20") |> Base.encode16(case: :lower) + # DNF tags: "subquery_hash/row_predicate_slot" + tag = "#{tag_hash}/1" + assert {_, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{"headers" => %{"tags" => [^tag]}, "value" => %{"id" => "3"}}, %{"headers" => %{"control" => "snapshot-end"}}, up_to_date_ctl() @@ -2801,7 +2908,11 @@ defmodule Electric.Plug.RouterTest do # Should contain the data record and the snapshot-end control message assert length(response) == 2 - tag = :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:1") |> Base.encode16(case: :lower) + tag_hash = + :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:1") |> Base.encode16(case: :lower) + + # DNF tags: "subquery_hash/row_predicate_slot" + tag = "#{tag_hash}/1" assert %{ "value" => %{"id" => "1", "parentId" => "1", "Value" => "10"}, @@ -2847,11 +2958,13 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ %{"headers" => %{"event" => "move-out"}}, + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [tag2]}, "value" => %{"parent_id" => "2", "value" => "12"} }, %{"headers" => %{"control" => "snapshot-end"}}, + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag]}, "value" => %{"id" => "1", "parent_id" => "1", "value" => "13"} @@ -2886,7 +2999,8 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, value) VALUES (1, 1), (2, 2), (3, 3)", "INSERT INTO child (id, parent_id, value) VALUES (1, 1, 10), (2, 2, 20), (3, 3, 30)" ] - test "move-in into move-out into move-in of the same parent results in a ", ctx do + test "move-in into move-out into move-in of the same parent collapses queued oscillations", + ctx do req = make_shape_req("child", where: "parent_id in (SELECT id FROM parent WHERE value = 1)") assert {req, 200, [data, _snapshot_end]} = shape_req(req, ctx.opts) @@ -2911,18 +3025,31 @@ defmodule Electric.Plug.RouterTest do # Hard to wait exactly what we want, so this should be OK Process.sleep(1000) - # We're essentially guaranteed, in this test environment, to see move-out before move-in resolves. - # It's safe to propagate a move-out even for stuff client hasn't seen (because of hashing in the pattern) - # as it's just a no-op. - # So we should see 2 move-outs and a move-in but only for the 3rd parent. The move-in should be filtered despite - # being triggered for 2 moved in parents initially + tag2 = + :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:2") |> Base.encode16(case: :lower) + + tag3 = + :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:3") |> Base.encode16(case: :lower) + + # The reduced move queue keeps the first move-in/move-out pair for parent 2, then drops + # the later move-in/move-out oscillation before parent 3 moves in. assert {_req, 200, [ - %{"headers" => %{"event" => "move-out", "patterns" => p1}}, - %{"headers" => %{"event" => "move-out", "patterns" => p1}}, + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag2]}, + "value" => %{"id" => "2", "parent_id" => "2", "value" => "20"} + }, %{"headers" => %{"control" => "snapshot-end"}}, %{ - "headers" => %{"operation" => "insert", "is_move_in" => true}, + "headers" => %{ + "event" => "move-out", + "patterns" => [%{"pos" => 0, "value" => ^tag2}] + } + }, + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag3]}, "value" => %{"id" => "3", "parent_id" => "3", "value" => "30"} }, %{"headers" => %{"control" => "snapshot-end"}}, @@ -2947,6 +3074,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "value" => %{"id" => "1", "parent_id" => "1", "value" => "10"}, "headers" => %{"operation" => "insert", "tags" => [tag]} @@ -2981,7 +3109,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO project_members (project_id, user_id) VALUES (1, 100), (3, 100)", "INSERT INTO projects (id, workspace_id, name) VALUES (1, 1, 'project 1'), (2, 1, 'project 2')" ] - test "supports two subqueries at the same level but returns 409 on move-in", %{ + test "supports two subqueries at the same level with move-in", %{ opts: opts, db_conn: db_conn } do @@ -3049,8 +3177,17 @@ defmodule Electric.Plug.RouterTest do [] ) - # Should get a 409 because multiple same-level subqueries cannot currently correctly handle move-ins - assert %{status: 409} = Task.await(task) + # With DNF runtime, multiple same-level subqueries now handle move-ins correctly + assert %{status: 200} = conn = Task.await(task) + + body = Jason.decode!(conn.resp_body) + + assert [%{"headers" => %{"event" => "move-in"}} | rest] = body + + assert Enum.any?(rest, fn + %{"value" => %{"id" => "2", "name" => "project 2"}} -> true + _ -> false + end) end end diff --git a/packages/sync-service/test/electric/replication/eval/decomposer_test.exs b/packages/sync-service/test/electric/replication/eval/decomposer_test.exs new file mode 100644 index 0000000000..223f1a7fe3 --- /dev/null +++ b/packages/sync-service/test/electric/replication/eval/decomposer_test.exs @@ -0,0 +1,487 @@ +defmodule Electric.Replication.Eval.DecomposerTest do + use ExUnit.Case, async: true + + alias Electric.Replication.Eval.Parser + alias Electric.Replication.Eval.SqlGenerator + alias Electric.Replication.Eval.Decomposer + + @refs %{ + ["a"] => :int4, + ["b"] => :int4, + ["c"] => :int4, + ["d"] => :int4, + ["e"] => :int4, + ["f"] => :int4, + ["g"] => :int4, + ["name"] => :text + } + + describe "decompose/1" do + test "should decompose a DNF query with shared subexpressions" do + # (a = 1 AND b = 2) OR (c = 3 AND d = 4) OR (a = 1 AND c = 3) + # Disjunct 1: positions 0-1 + # Disjunct 2: positions 2-3 + # Disjunct 3: positions 4-5 (reuses r1 for a=1, r3 for c=3) + ~S"(a = 1 AND b = 2) OR (c = 3 AND d = 4) OR (a = 1 AND c = 3)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil, nil, nil], + [nil, nil, ~s|"c" = 3|, ~s|"d" = 4|, nil, nil], + [nil, nil, nil, nil, ~s|"a" = 1|, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, ~s|"d" = 4|] + ) + end + + test "should handle a single comparison without AND/OR" do + ~S"a = 1" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[~s|"a" = 1|]], + expected_subexpressions: [~s|"a" = 1|] + ) + end + + test "should handle all ANDs as a single disjunct" do + ~S"a = 1 AND b = 2 AND c = 3" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|]], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should handle all ORs as N disjuncts with 1 expression each" do + # a = 1 OR b = 2 OR c = 3 + # Each OR branch is its own disjunct with 1 expression + # Total positions: 3 (one per disjunct) + ~S"a = 1 OR b = 2 OR c = 3" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, nil, nil], + [nil, ~s|"b" = 2|, nil], + [nil, nil, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should distribute AND over OR with subexpression reuse" do + # a = 1 AND (b = 2 OR c = 3) => (a = 1 AND b = 2) OR (a = 1 AND c = 3) + # After distribution, we get 2 disjuncts with 2 expressions each + # The "a = 1" subexpression should be deduplicated (same reference) + ~S"a = 1 AND (b = 2 OR c = 3)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should handle subquery expressions as atomic subexpressions" do + ~S"a = 1 AND (b IN (SELECT id FROM test_table) OR c = 3)" + |> prepare_with_sublinks( + %{["$sublink", "0"] => {:array, :int4}}, + %{0 => "SELECT id FROM test_table"} + ) + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" IN (SELECT $sublink.0)|, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"c" = 3|] + ], + expected_subexpressions: [ + ~s|"a" = 1|, + ~s|"b" IN (SELECT $sublink.0)|, + ~s|"c" = 3| + ] + ) + end + + test "should handle deeply nested distribution ((a OR b) AND (c OR d))" do + # (a OR b) AND (c OR d) => (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) + # 4 disjuncts, each with 2 expressions + ~S"(a = 1 OR b = 2) AND (c = 3 OR d = 4)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"c" = 3|, nil, nil, nil, nil, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"d" = 4|, nil, nil, nil, nil], + [nil, nil, nil, nil, ~s|"b" = 2|, ~s|"c" = 3|, nil, nil], + [nil, nil, nil, nil, nil, nil, ~s|"b" = 2|, ~s|"d" = 4|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, ~s|"d" = 4|] + ) + end + + test "should push NOT down to leaf expressions" do + # NOT a = 1 AND b = 2 parses as (NOT a = 1) AND b = 2 + # The NOT is already at the leaf, so it becomes {:not, ref} + ~S"NOT a = 1 AND b = 2" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[{:not, ~s|"a" = 1|}, ~s|"b" = 2|]], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|] + ) + end + + test "should apply De Morgan's law for NOT over OR" do + # NOT (a = 1 OR b = 2) => (NOT a = 1) AND (NOT b = 2) + # Single disjunct with two negated terms + ~S"NOT (a = 1 OR b = 2)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[{:not, ~s|"a" = 1|}, {:not, ~s|"b" = 2|}]], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|] + ) + end + + test "should apply De Morgan's law for NOT over AND" do + # NOT (a = 1 AND b = 2) => (NOT a = 1) OR (NOT b = 2) + # Two disjuncts, each with one negated term + ~S"NOT (a = 1 AND b = 2)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [{:not, ~s|"a" = 1|}, nil], + [nil, {:not, ~s|"b" = 2|}] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|] + ) + end + + test "should handle double negation" do + # NOT NOT a = 1 => a = 1 (double negation elimination) + ~S"NOT NOT a = 1" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[~s|"a" = 1|]], + expected_subexpressions: [~s|"a" = 1|] + ) + end + + test "should handle function calls as atomic subexpressions" do + ~S"lower(name) = 'test' OR upper(name) = 'TEST'" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|lower("name") = 'test'|, nil], + [nil, ~s|upper("name") = 'TEST'|] + ], + expected_subexpressions: [~s|lower("name") = 'test'|, ~s|upper("name") = 'TEST'|] + ) + end + + test "should handle mixed-width disjuncts (multi-term AND with single-term OR)" do + # (a = 1 AND b = 2 AND c = 3) OR d = 4 + # Disjunct 1 has 3 terms, disjunct 2 has 1 term, total width = 4 + ~S"(a = 1 AND b = 2 AND c = 3) OR d = 4" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, nil], + [nil, nil, nil, ~s|"d" = 4|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, ~s|"d" = 4|] + ) + end + + test "should combine De Morgan with distribution" do + # NOT (a = 1 AND b = 2) AND c = 3 + # De Morgan: NOT(AND(a,b)) => OR(NOT a, NOT b) + # Then: AND(OR(NOT a, NOT b), c) distributes to: + # (NOT a AND c) OR (NOT b AND c) + ~S"NOT (a = 1 AND b = 2) AND c = 3" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [{:not, ~s|"a" = 1|}, ~s|"c" = 3|, nil, nil], + [nil, nil, {:not, ~s|"b" = 2|}, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should apply De Morgan recursively over nested AND within OR" do + # NOT ((a = 1 AND b = 2) OR c = 3) + # De Morgan over OR: AND(NOT(AND(a,b)), NOT c) + # Inner De Morgan over AND: NOT(AND(a,b)) => OR(NOT a, NOT b) + # Distribution: AND(OR(NOT a, NOT b), NOT c) => + # (NOT a AND NOT c) OR (NOT b AND NOT c) + ~S"NOT ((a = 1 AND b = 2) OR c = 3)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [{:not, ~s|"a" = 1|}, {:not, ~s|"c" = 3|}, nil, nil], + [nil, nil, {:not, ~s|"b" = 2|}, {:not, ~s|"c" = 3|}] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should handle double cross-product with deduplication" do + # ((a AND b) OR (c AND d)) AND ((d AND e) OR (f AND g)) + # Left OR: 2 disjuncts [ab, cd] + # Right OR: 2 disjuncts [de, fg] + # Cross-product: 2 × 2 = 4 disjuncts, each with 4 terms, expanded to width 16 + # d = 4 appears in left's 2nd disjunct AND right's 1st disjunct — shared ref + {:ok, decomposition} = + ~S"((a = 1 AND b = 2) OR (c = 3 AND d = 4)) AND ((d = 4 AND e = 5) OR (f = 6 AND g = 7))" + |> prepare() + |> Decomposer.decompose() + + assert_expanded_dnf({:ok, decomposition}, + expected_disjuncts: [ + # ab × de + [ + ~s|"a" = 1|, + ~s|"b" = 2|, + ~s|"d" = 4|, + ~s|"e" = 5|, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil + ], + # ab × fg + [ + nil, + nil, + nil, + nil, + ~s|"a" = 1|, + ~s|"b" = 2|, + ~s|"f" = 6|, + ~s|"g" = 7|, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil + ], + # cd × de + [ + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + ~s|"c" = 3|, + ~s|"d" = 4|, + ~s|"d" = 4|, + ~s|"e" = 5|, + nil, + nil, + nil, + nil + ], + # cd × fg + [ + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + ~s|"c" = 3|, + ~s|"d" = 4|, + ~s|"f" = 6|, + ~s|"g" = 7| + ] + ], + expected_subexpressions: [ + ~s|"a" = 1|, + ~s|"b" = 2|, + ~s|"c" = 3|, + ~s|"d" = 4|, + ~s|"e" = 5|, + ~s|"f" = 6|, + ~s|"g" = 7| + ] + ) + + # Verify d = 4 appears at 4 positions (shared across disjuncts) + d_eq_4_count = + decomposition.subexpressions + |> Enum.count(fn {_pos, subexpr} -> deparse(subexpr.ast) == ~s|"d" = 4| end) + + assert d_eq_4_count == 4 + end + + test "should share refs when same subexpression appears positive and negated" do + # (a = 1 AND b = 2) OR (NOT a = 1 AND c = 3) + # a = 1 appears positive in disjunct 1, negated in disjunct 2 + # The subexpressions map should have only 3 unique expressions + {:ok, decomposition} = + ~S"(a = 1 AND b = 2) OR (NOT a = 1 AND c = 3)" + |> prepare() + |> Decomposer.decompose() + + assert_expanded_dnf({:ok, decomposition}, + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil], + [nil, nil, {:not, ~s|"a" = 1|}, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + + # Verify a = 1 appears at one positive and one negated position + a_eq_1_entries = + decomposition.subexpressions + |> Enum.filter(fn {_pos, subexpr} -> deparse(subexpr.ast) == ~s|"a" = 1| end) + + assert length(a_eq_1_entries) == 2 + polarities = a_eq_1_entries |> Enum.map(fn {_, s} -> s.negated end) |> Enum.sort() + assert polarities == [false, true] + end + + test "should deduplicate references for identical subexpressions" do + # All three disjuncts contain `a = 1` - should use same reference + {:ok, decomposition} = + ~S"(a = 1 AND b = 2) OR (a = 1 AND c = 3) OR a = 1" + |> prepare() + |> Decomposer.decompose() + + assert_expanded_dnf({:ok, decomposition}, + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"c" = 3|, nil], + [nil, nil, nil, nil, ~s|"a" = 1|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + + # a = 1 should appear at 3 positions (one per disjunct) + a_eq_1_count = + decomposition.subexpressions + |> Enum.count(fn {_pos, subexpr} -> deparse(subexpr.ast) == ~s|"a" = 1| end) + + assert a_eq_1_count == 3 + end + + test "should return error when disjunct count exceeds limit" do + # Build a WHERE clause with >100 disjuncts: a = 1 OR a = 2 OR ... OR a = 101 + clause = Enum.map_join(1..101, " OR ", &"a = #{&1}") + + result = + clause + |> prepare() + |> Decomposer.decompose() + + assert {:error, message} = result + assert message =~ "too complex" + assert message =~ "101 disjuncts" + assert message =~ "limit of 100" + end + end + + # Helper to prepare a WHERE clause string into a Parser AST + defp prepare(where_clause) do + {:ok, pgquery} = Parser.parse_query(where_clause) + {:ok, expr} = Parser.validate_where_ast(pgquery, refs: @refs) + expr.eval + end + + # Helper for WHERE clauses containing subqueries (IN (SELECT ...)) + defp prepare_with_sublinks(where_clause, sublink_refs, sublink_queries) do + {:ok, pgquery} = Parser.parse_query(where_clause) + all_refs = Map.merge(@refs, sublink_refs) + + {:ok, expr} = + Parser.validate_where_ast(pgquery, refs: all_refs, sublink_queries: sublink_queries) + + expr.eval + end + + # Helper to deparse an AST node back to SQL string + defp deparse(ast) do + SqlGenerator.to_sql(ast) + end + + # Assertion helper that verifies: + # 1. Position count matches expected width + # 2. Correct number of disjuncts + # 3. Subexpressions map contains exactly the expected unique expressions + # 4. Reconstructed expanded format matches expected disjuncts + defp assert_expanded_dnf({:ok, decomposition}, opts) do + expected_disjuncts = Keyword.fetch!(opts, :expected_disjuncts) + expected_subexpressions = Keyword.fetch!(opts, :expected_subexpressions) + + %{ + disjuncts: disjuncts, + subexpressions: subexpressions, + position_count: position_count + } = decomposition + + # 1. Verify position count matches expected width + expected_width = expected_disjuncts |> hd() |> length() + + assert position_count == expected_width, + "Position count (#{position_count}) must equal expected width (#{expected_width})" + + # 2. Verify correct number of disjuncts + assert length(disjuncts) == length(expected_disjuncts), + "Expected #{length(expected_disjuncts)} disjuncts, got #{length(disjuncts)}" + + # 3. Verify subexpressions map contains exactly the expected unique expressions + actual_subexprs = subexpressions |> Map.values() |> Enum.map(&deparse(&1.ast)) |> MapSet.new() + expected_subexprs = MapSet.new(expected_subexpressions) + + assert actual_subexprs == expected_subexprs, + "Subexpressions mismatch. Expected: #{inspect(expected_subexprs)}, got: #{inspect(actual_subexprs)}" + + # 4. Reconstruct expanded format from sparse disjuncts for comparison + actual_expanded = + MapSet.new(disjuncts, fn conj -> + row = List.duplicate(nil, position_count) + + Enum.reduce(conj, row, fn {pos, polarity}, row -> + subexpr = Map.fetch!(subexpressions, pos) + sql = deparse(subexpr.ast) + term = if polarity == :negated, do: {:not, sql}, else: sql + List.replace_at(row, pos, term) + end) + end) + + assert actual_expanded == MapSet.new(expected_disjuncts) + end +end diff --git a/packages/sync-service/test/electric/replication/eval/sql_generator_test.exs b/packages/sync-service/test/electric/replication/eval/sql_generator_test.exs new file mode 100644 index 0000000000..6d6fae6fcf --- /dev/null +++ b/packages/sync-service/test/electric/replication/eval/sql_generator_test.exs @@ -0,0 +1,621 @@ +defmodule Electric.Replication.Eval.SqlGeneratorTest do + use ExUnit.Case, async: true + + alias Electric.Replication.Eval.SqlGenerator + alias Electric.Replication.Eval.Parser.{Const, Ref, Func, Array, RowExpr} + + describe "comparison operators" do + test "equals" do + ast = %Func{name: "\"=\"", args: [%Ref{path: ["status"]}, %Const{value: "active"}]} + assert SqlGenerator.to_sql(ast) == ~s|"status" = 'active'| + end + + test "not equals" do + ast = %Func{name: "\"<>\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" <> 1| + end + + test "less than" do + ast = %Func{name: "\"<\"", args: [%Ref{path: ["age"]}, %Const{value: 30}]} + assert SqlGenerator.to_sql(ast) == ~s|"age" < 30| + end + + test "greater than" do + ast = %Func{name: "\">\"", args: [%Ref{path: ["score"]}, %Const{value: 100}]} + assert SqlGenerator.to_sql(ast) == ~s|"score" > 100| + end + + test "less than or equal" do + ast = %Func{name: "\"<=\"", args: [%Ref{path: ["x"]}, %Const{value: 5}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" <= 5| + end + + test "greater than or equal" do + ast = %Func{name: "\">=\"", args: [%Ref{path: ["y"]}, %Const{value: 10}]} + assert SqlGenerator.to_sql(ast) == ~s|"y" >= 10| + end + end + + describe "pattern matching" do + test "LIKE" do + ast = %Func{name: "\"~~\"", args: [%Ref{path: ["name"]}, %Const{value: "%foo%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" LIKE '%foo%'| + end + + test "ILIKE" do + ast = %Func{name: "\"~~*\"", args: [%Ref{path: ["name"]}, %Const{value: "%bar%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" ILIKE '%bar%'| + end + + test "NOT LIKE" do + ast = %Func{name: "\"!~~\"", args: [%Ref{path: ["name"]}, %Const{value: "%baz%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" NOT LIKE '%baz%'| + end + + test "NOT ILIKE" do + ast = %Func{name: "\"!~~*\"", args: [%Ref{path: ["name"]}, %Const{value: "%qux%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" NOT ILIKE '%qux%'| + end + end + + describe "nullability" do + test "IS NULL" do + ast = %Func{name: "is null", args: [%Ref{path: ["deleted_at"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"deleted_at" IS NULL| + end + + test "IS NOT NULL" do + ast = %Func{name: "is not null", args: [%Ref{path: ["email"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"email" IS NOT NULL| + end + end + + describe "boolean tests" do + test "IS TRUE" do + ast = %Func{name: "IS_TRUE", args: [%Ref{path: ["active"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"active" IS TRUE| + end + + test "IS NOT TRUE" do + ast = %Func{name: "IS_NOT_TRUE", args: [%Ref{path: ["active"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"active" IS NOT TRUE| + end + + test "IS FALSE" do + ast = %Func{name: "IS_FALSE", args: [%Ref{path: ["deleted"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"deleted" IS FALSE| + end + + test "IS NOT FALSE" do + ast = %Func{name: "IS_NOT_FALSE", args: [%Ref{path: ["enabled"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"enabled" IS NOT FALSE| + end + + test "IS UNKNOWN" do + ast = %Func{name: "IS_UNKNOWN", args: [%Ref{path: ["flag"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"flag" IS UNKNOWN| + end + + test "IS NOT UNKNOWN" do + ast = %Func{name: "IS_NOT_UNKNOWN", args: [%Ref{path: ["flag"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"flag" IS NOT UNKNOWN| + end + end + + describe "membership" do + test "IN with literal array" do + ast = %Func{ + name: "in", + args: [ + %Ref{path: ["status"]}, + %Array{elements: [%Const{value: "a"}, %Const{value: "b"}, %Const{value: "c"}]} + ] + } + + assert SqlGenerator.to_sql(ast) == ~s|"status" IN ('a', 'b', 'c')| + end + + test "IN with integer array" do + ast = %Func{ + name: "in", + args: [ + %Ref{path: ["id"]}, + %Array{elements: [%Const{value: 1}, %Const{value: 2}, %Const{value: 3}]} + ] + } + + assert SqlGenerator.to_sql(ast) == ~s|"id" IN (1, 2, 3)| + end + end + + describe "sublink membership check" do + test "renders sublink reference" do + ast = %Func{ + name: "sublink_membership_check", + args: [ + %Ref{path: ["parent_id"]}, + %Ref{path: ["$sublink", "0"]} + ] + } + + assert SqlGenerator.to_sql(ast) == ~s|"parent_id" IN (SELECT $sublink.0)| + end + end + + describe "logical operators" do + test "NOT" do + inner = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + ast = %Func{name: "not", args: [inner]} + assert SqlGenerator.to_sql(ast) == ~s|NOT "x" = 1| + end + + test "AND" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + ast = %Func{name: "and", args: [a, b]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = 1 AND "y" = 2| + end + + test "OR" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + ast = %Func{name: "or", args: [a, b]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = 1 OR "y" = 2| + end + + test "nested AND within OR" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + c = %Func{name: "\"=\"", args: [%Ref{path: ["z"]}, %Const{value: 3}]} + ast = %Func{name: "or", args: [%Func{name: "and", args: [a, b]}, c]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = 1 AND "y" = 2 OR "z" = 3| + end + + test "nested OR within AND" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + c = %Func{name: "\"=\"", args: [%Ref{path: ["z"]}, %Const{value: 3}]} + ast = %Func{name: "and", args: [%Func{name: "or", args: [a, b]}, c]} + assert SqlGenerator.to_sql(ast) == ~s|("x" = 1 OR "y" = 2) AND "z" = 3| + end + + test "deeply nested logical expression" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["a"]}, %Const{value: 1}]} + b = %Func{name: "\">\"", args: [%Ref{path: ["b"]}, %Const{value: 2}]} + c = %Func{name: "\"<\"", args: [%Ref{path: ["c"]}, %Const{value: 3}]} + d = %Func{name: "is null", args: [%Ref{path: ["d"]}]} + + ast = + %Func{ + name: "or", + args: [ + %Func{name: "and", args: [a, b]}, + %Func{name: "and", args: [c, %Func{name: "not", args: [d]}]} + ] + } + + assert SqlGenerator.to_sql(ast) == + ~s|"a" = 1 AND "b" > 2 OR "c" < 3 AND NOT "d" IS NULL| + end + end + + describe "DISTINCT / NOT DISTINCT" do + test "IS DISTINCT FROM" do + left = %Ref{path: ["x"]} + right = %Const{value: 1} + comparison = %Func{name: "\"<>\"", args: [left, right]} + ast = %Func{name: "values_distinct?", args: [left, right, comparison]} + assert SqlGenerator.to_sql(ast) == ~s|"x" IS DISTINCT FROM 1| + end + + test "IS NOT DISTINCT FROM" do + left = %Ref{path: ["x"]} + right = %Const{value: nil} + comparison = %Func{name: "\"<>\"", args: [left, right]} + ast = %Func{name: "values_not_distinct?", args: [left, right, comparison]} + assert SqlGenerator.to_sql(ast) == ~s|"x" IS NOT DISTINCT FROM NULL| + end + end + + describe "ANY / ALL" do + test "ANY with equals" do + inner = %Func{ + name: "\"=\"", + args: [%Ref{path: ["x"]}, %Ref{path: ["arr"]}], + map_over_array_in_pos: 1 + } + + ast = %Func{name: "any", args: [inner]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = ANY("arr")| + end + + test "ALL with less than" do + inner = %Func{ + name: "\"<\"", + args: [%Ref{path: ["x"]}, %Ref{path: ["arr"]}], + map_over_array_in_pos: 1 + } + + ast = %Func{name: "all", args: [inner]} + assert SqlGenerator.to_sql(ast) == ~s|"x" < ALL("arr")| + end + end + + describe "arithmetic operators" do + test "addition" do + ast = %Func{name: "\"+\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" + 1| + end + + test "subtraction" do + ast = %Func{name: "\"-\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" - 1| + end + + test "multiplication" do + ast = %Func{name: "\"*\"", args: [%Ref{path: ["x"]}, %Const{value: 2}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" * 2| + end + + test "division" do + ast = %Func{name: "\"/\"", args: [%Ref{path: ["x"]}, %Const{value: 2}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" / 2| + end + + test "exponentiation" do + ast = %Func{name: "\"^\"", args: [%Ref{path: ["x"]}, %Const{value: 2}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" ^ 2| + end + + test "unary plus" do + ast = %Func{name: "\"+\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|+ "x"| + end + + test "unary minus" do + ast = %Func{name: "\"-\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|- "x"| + end + + test "square root" do + ast = %Func{name: "\"|/\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s(\|/ "x") + end + + test "absolute value" do + ast = %Func{name: "\"@\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|@ "x"| + end + end + + describe "bitwise operators" do + test "bitwise AND" do + ast = %Func{name: "\"&\"", args: [%Ref{path: ["x"]}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" & 3| + end + + test "bitwise OR" do + ast = %Func{name: "\"|\"", args: [%Ref{path: ["x"]}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == ~s("x" | 3) + end + + test "bitwise XOR" do + ast = %Func{name: "\"#\"", args: [%Ref{path: ["x"]}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" # 3| + end + + test "bitwise NOT" do + ast = %Func{name: "\"~\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|~ "x"| + end + end + + describe "string concatenation" do + test "||" do + ast = %Func{name: "\"||\"", args: [%Ref{path: ["first"]}, %Ref{path: ["last"]}]} + assert SqlGenerator.to_sql(ast) == ~s("first" || "last") + end + end + + describe "array operators" do + test "contains (@>)" do + ast = %Func{name: "\"@>\"", args: [%Ref{path: ["tags"]}, %Ref{path: ["required"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"tags" @> "required"| + end + + test "contained by (<@)" do + ast = %Func{name: "\"<@\"", args: [%Ref{path: ["tags"]}, %Ref{path: ["allowed"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"tags" <@ "allowed"| + end + + test "overlap (&&)" do + ast = %Func{name: "\"&&\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"a" && "b"| + end + end + + describe "named functions" do + test "lower" do + ast = %Func{name: "lower", args: [%Ref{path: ["name"]}]} + assert SqlGenerator.to_sql(ast) == ~s|lower("name")| + end + + test "upper" do + ast = %Func{name: "upper", args: [%Ref{path: ["name"]}]} + assert SqlGenerator.to_sql(ast) == ~s|upper("name")| + end + + test "array_ndims" do + ast = %Func{name: "array_ndims", args: [%Ref{path: ["arr"]}]} + assert SqlGenerator.to_sql(ast) == ~s|array_ndims("arr")| + end + end + + describe "type casts" do + test "cast with _to_ naming convention" do + ast = %Func{name: "int4_to_bool", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"x"::bool| + end + + test "another cast" do + ast = %Func{name: "text_to_int4", args: [%Ref{path: ["val"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"val"::int4| + end + end + + describe "column references" do + test "simple column" do + assert SqlGenerator.to_sql(%Ref{path: ["status"]}) == ~s|"status"| + end + + test "schema-qualified column" do + assert SqlGenerator.to_sql(%Ref{path: ["public", "users", "id"]}) == + ~s|"public"."users"."id"| + end + end + + describe "constants" do + test "NULL" do + assert SqlGenerator.to_sql(%Const{value: nil}) == "NULL" + end + + test "true" do + assert SqlGenerator.to_sql(%Const{value: true}) == "true" + end + + test "false" do + assert SqlGenerator.to_sql(%Const{value: false}) == "false" + end + + test "string" do + assert SqlGenerator.to_sql(%Const{value: "hello"}) == "'hello'" + end + + test "string with single quote escaping" do + assert SqlGenerator.to_sql(%Const{value: "it's"}) == "'it''s'" + end + + test "integer" do + assert SqlGenerator.to_sql(%Const{value: 42}) == "42" + end + + test "float" do + assert SqlGenerator.to_sql(%Const{value: 3.14}) == "3.14" + end + + test "negative integer" do + assert SqlGenerator.to_sql(%Const{value: -1}) == "-1" + end + end + + describe "array literals" do + test "simple array" do + ast = %Array{elements: [%Const{value: 1}, %Const{value: 2}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == "ARRAY[1, 2, 3]" + end + + test "string array" do + ast = %Array{elements: [%Const{value: "a"}, %Const{value: "b"}]} + assert SqlGenerator.to_sql(ast) == "ARRAY['a', 'b']" + end + + test "empty array" do + ast = %Array{elements: []} + assert SqlGenerator.to_sql(ast) == "ARRAY[]" + end + end + + describe "row expressions" do + test "simple row" do + ast = %RowExpr{elements: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + assert SqlGenerator.to_sql(ast) == ~s|ROW("a", "b")| + end + + test "row in sublink membership check" do + row = %RowExpr{elements: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + + ast = %Func{ + name: "sublink_membership_check", + args: [row, %Ref{path: ["$sublink", "0"]}] + } + + assert SqlGenerator.to_sql(ast) == ~s|ROW("a", "b") IN (SELECT $sublink.0)| + end + end + + describe "date/time/interval constants" do + test "date" do + ast = %Const{value: ~D[2024-01-15]} + assert SqlGenerator.to_sql(ast) == "'2024-01-15'::date" + end + + test "time" do + ast = %Const{value: ~T[13:45:00]} + assert SqlGenerator.to_sql(ast) == "'13:45:00'::time" + end + + test "timestamp (NaiveDateTime)" do + ast = %Const{value: ~N[2024-01-15 13:45:00]} + assert SqlGenerator.to_sql(ast) == "'2024-01-15T13:45:00'::timestamp" + end + + test "timestamptz (DateTime)" do + ast = %Const{value: DateTime.from_naive!(~N[2024-01-15 13:45:00], "Etc/UTC")} + assert SqlGenerator.to_sql(ast) == "'2024-01-15T13:45:00Z'::timestamptz" + end + + test "interval" do + ast = %Const{value: PgInterop.Interval.parse!("1 year 2 months 3 days")} + result = SqlGenerator.to_sql(ast) + assert result =~ ~r/^'.*'::interval$/ + end + end + + describe "error handling" do + test "raises ArgumentError for unsupported AST node" do + assert_raise ArgumentError, ~r/unsupported AST node/, fn -> + SqlGenerator.to_sql(%{unexpected: :node}) + end + end + + test "raises ArgumentError for unknown function name" do + assert_raise ArgumentError, ~r/unsupported AST node/, fn -> + SqlGenerator.to_sql(%Func{name: "totally_unknown_func", args: [%Const{value: 1}]}) + end + end + end + + describe "complex nested expressions" do + test "WHERE clause with AND, OR, comparisons and NULL check" do + status_check = %Func{ + name: "\"=\"", + args: [%Ref{path: ["status"]}, %Const{value: "active"}] + } + + age_check = %Func{name: "\">=\"", args: [%Ref{path: ["age"]}, %Const{value: 18}]} + email_check = %Func{name: "is not null", args: [%Ref{path: ["email"]}]} + + ast = + %Func{ + name: "and", + args: [ + %Func{name: "or", args: [status_check, age_check]}, + email_check + ] + } + + assert SqlGenerator.to_sql(ast) == + ~s|("status" = 'active' OR "age" >= 18) AND "email" IS NOT NULL| + end + + test "NOT with nested OR" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + + ast = %Func{name: "not", args: [%Func{name: "or", args: [a, b]}]} + + assert SqlGenerator.to_sql(ast) == ~s|NOT ("x" = 1 OR "y" = 2)| + end + + test "comparison with string concatenation" do + concat = %Func{name: "\"||\"", args: [%Ref{path: ["first"]}, %Ref{path: ["last"]}]} + ast = %Func{name: "\"=\"", args: [concat, %Const{value: "JohnDoe"}]} + assert SqlGenerator.to_sql(ast) == ~s("first" || "last" = 'JohnDoe') + end + + test "precedence: multiplication inside addition" do + # (a * b) + c — no parens needed since * binds tighter + mul = %Func{name: "\"*\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + ast = %Func{name: "\"+\"", args: [mul, %Ref{path: ["c"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"a" * "b" + "c"| + end + + test "precedence: addition inside multiplication" do + # a * (b + c) — parens needed since + binds looser + add = %Func{name: "\"+\"", args: [%Ref{path: ["b"]}, %Ref{path: ["c"]}]} + ast = %Func{name: "\"*\"", args: [%Ref{path: ["a"]}, add]} + assert SqlGenerator.to_sql(ast) == ~s|"a" * ("b" + "c")| + end + + test "precedence: left-associative subtraction" do + # a - (b - c) — parens needed on right child + inner = %Func{name: "\"-\"", args: [%Ref{path: ["b"]}, %Ref{path: ["c"]}]} + ast = %Func{name: "\"-\"", args: [%Ref{path: ["a"]}, inner]} + assert SqlGenerator.to_sql(ast) == ~s|"a" - ("b" - "c")| + end + + test "precedence: left-associative subtraction, left child" do + # (a - b) - c — no parens needed (left-associative) + inner = %Func{name: "\"-\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + ast = %Func{name: "\"-\"", args: [inner, %Ref{path: ["c"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"a" - "b" - "c"| + end + + test "precedence: right-associative exponentiation" do + # a ^ (b ^ c) — no parens needed (right-associative) + inner = %Func{name: "\"^\"", args: [%Ref{path: ["b"]}, %Ref{path: ["c"]}]} + ast = %Func{name: "\"^\"", args: [%Ref{path: ["a"]}, inner]} + assert SqlGenerator.to_sql(ast) == ~s|"a" ^ "b" ^ "c"| + end + + test "precedence: right-associative exponentiation, left child" do + # (a ^ b) ^ c — parens needed on left child + inner = %Func{name: "\"^\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + ast = %Func{name: "\"^\"", args: [inner, %Ref{path: ["c"]}]} + assert SqlGenerator.to_sql(ast) == ~s|("a" ^ "b") ^ "c"| + end + end + + describe "to_sql is the inverse of parse" do + use ExUnitProperties + + alias Electric.Replication.Eval.Parser + alias Support.PgExpressionGenerator + + property "to_sql output is parseable for any parseable WHERE clause" do + check all( + {sql, refs} <- PgExpressionGenerator.where_clause_generator(), + max_runs: 1_000, + max_run_time: 10_000 + ) do + assert_to_sql_inverts_parse(sql, refs) + end + end + + defp assert_to_sql_inverts_parse(sql, refs) do + # The parser may raise on some generated expressions (pre-existing parser + # limitations). We rescue those and skip — we only care that successfully + # parsed expressions produce valid SQL via to_sql. + parsed = + try do + Parser.parse_and_validate_expression(sql, refs: refs) + rescue + _ -> :skip + end + + case parsed do + {:ok, %{eval: ast}} -> + regenerated = SqlGenerator.to_sql(ast) + + reparsed = + try do + Parser.parse_and_validate_expression(regenerated, refs: refs) + rescue + e -> + flunk( + "to_sql output raised #{inspect(e)} when re-parsing: #{regenerated} (from: #{sql})" + ) + end + + assert {:ok, _} = reparsed, + "to_sql output is not valid SQL: #{regenerated} (from: #{sql})" + + {:error, _reason} -> + :ok + + :skip -> + :ok + end + end + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer/change_handling_test.exs b/packages/sync-service/test/electric/shapes/consumer/change_handling_test.exs deleted file mode 100644 index 990a46c2d5..0000000000 --- a/packages/sync-service/test/electric/shapes/consumer/change_handling_test.exs +++ /dev/null @@ -1,419 +0,0 @@ -defmodule Electric.Shapes.Consumer.ChangeHandlingTest do - use ExUnit.Case, async: true - - alias Electric.Replication.Changes.NewRecord - alias Electric.Replication.Changes.UpdatedRecord - alias Electric.Replication.LogOffset - alias Electric.Shapes.Consumer.ChangeHandling - alias Electric.Shapes.Consumer.MoveIns - alias Electric.Shapes.Consumer.State - alias Electric.Shapes.Shape - - import Support.ComponentSetup - - @moduletag :tmp_dir - - @inspector Support.StubInspector.new( - tables: ["users"], - columns: [ - %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, - %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, - %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}} - ] - ) - - describe "process_changes/3 with move-ins" do - setup [:with_stack_id_from_test] - - setup %{stack_id: stack_id} do - # Create a shape with dependencies (subquery) - shape = - Shape.new!("users", where: "parent_id IN (SELECT id FROM users)", inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - %{state: state, shape: shape} - end - - test "skips change when value is in unresolved move-in with nil snapshot", %{state: state} do - # Set up move-in state with a waiting move-in that has nil snapshot - # This simulates a move-in that was triggered but query hasn't started yet - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "a96441e4-59bd-426d-aefe-66c7fef4ddd2", - {["$sublink", "0"], MapSet.new([1])} - ) - - state = %{state | move_handling_state: move_handling_state} - - # Create a change that references the moved-in value (parent_id = 1) - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "1", "parent_id" => "1", "value" => "11"}, - record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"child\"/\"1\"", - changed_columns: MapSet.new(["value"]) - } - - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([])}, %{["$sublink", "0"] => MapSet.new([1])}} - } - - # The change should be skipped because: - # 1. Its parent_id=1 matches the in-flight moved value - # 2. The move-in has nil snapshot, meaning we don't know when it will be visible yet - # 3. Therefore we should skip to avoid duplicates when move-in results arrive - result = ChangeHandling.process_changes([change], state, ctx) - - # Should return empty changes since it should be skipped - {filtered_changes, _state, count, _offset} = result - - assert filtered_changes == [] - - assert count == 0 - end - - test "skips change when value is in unresolved move-in with known snapshot and xid is visible", - %{state: state} do - # Set up move-in state with a waiting move-in that has a known snapshot - # xid 962 should be visible in snapshot {963, 963, []} (since 962 < 963) - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "ab234061-eb07-4ef7-97c5-301ad2056280", - {["$sublink", "0"], MapSet.new([1])} - ) - |> MoveIns.set_snapshot("ab234061-eb07-4ef7-97c5-301ad2056280", {963, 963, []}) - - state = %{state | move_handling_state: move_handling_state} - - # Create a change that references the moved-in value (parent_id = 1) - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "1", "parent_id" => "1", "value" => "11"}, - record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"1\"", - changed_columns: MapSet.new(["value"]) - } - - # xid 962 is visible in snapshot {963, 963, []} - ctx = %{xid: 962, extra_refs: %{}} - - result = ChangeHandling.process_changes([change], state, ctx) - - {filtered_changes, _state, count, _offset} = result - - assert filtered_changes == [], - "Change should be skipped when value is in unresolved move-in and xid is visible" - - assert count == 0 - end - - test "keeps change but converts it to an insert if it covers the snapshot, and adds it to touched keys", - %{state: state} do - # Set up move-in state with a waiting move-in that has a known snapshot - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "ab234061-eb07-4ef7-97c5-301ad2056280", - {["$sublink", "0"], MapSet.new([1])} - ) - |> MoveIns.set_snapshot("ab234061-eb07-4ef7-97c5-301ad2056280", {963, 963, []}) - - state = %{state | move_handling_state: move_handling_state} - - # Create a change that references the moved-in value (parent_id = 1) - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "1", "parent_id" => "1", "value" => "11"}, - record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"1\"", - changed_columns: MapSet.new(["value"]) - } - - # xid 970 covers the snapshot - ctx = %{ - xid: 970, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([])}, %{["$sublink", "0"] => MapSet.new([1])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - - assert {[change], state, 1, _offset} = result - - assert %NewRecord{record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, key: key} = - change - - assert state.move_handling_state.touch_tracker == %{key => 970} - end - end - - describe "process_changes/3 with subquery combined with other conditions" do - # Tests for shapes that have a subquery ANDed with other non-subquery conditions. - # The bug occurred when a change's sublink value was in a pending move-in, but - # the record didn't match other parts of the WHERE clause. The old code would - # incorrectly skip the change, assuming the move-in would cover it. - # - # Example: "parent_id IN (SELECT id FROM parents WHERE active) AND status = 'published'" - # If parent becomes active (triggers move-in), but record has status='draft', - # the change should NOT be skipped because the move-in won't return this row. - - @parents_inspector Support.StubInspector.new( - tables: ["parents", "children"], - columns: [ - %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, - %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, - %{name: "status", type: "text", pk_position: nil, type_id: {28, 1}}, - %{name: "active", type: "bool", pk_position: nil, type_id: {16, 1}} - ] - ) - - setup [:with_stack_id_from_test] - - setup %{stack_id: stack_id} do - # Create a shape with a subquery AND a simple equality condition: - # parent must be active AND child must be published - shape = - Shape.new!( - "children", - where: - "parent_id IN (SELECT id FROM parents WHERE active = true) AND status = 'published'", - inspector: @parents_inspector - ) - - state = State.new(stack_id, "test-handle", shape) - %{state: state, shape: shape} - end - - test "processes change when sublink is in move-in but record fails other WHERE conditions", %{ - state: state - } do - # This tests the fix: parent_id=3 enters a move-in (parent became active), - # but the child has status='draft', so the change should NOT be skipped. - # The move-in query uses the full WHERE clause, so it won't return this row. - - # Set up move-in state: parent_id=3 just became active (triggers move-in) - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-3", - {["$sublink", "0"], MapSet.new([3])} - ) - - state = %{state | move_handling_state: move_handling_state} - - # A record moving FROM parent_id=1 (in shape) TO parent_id=3 (active but status=draft) - # Old record: parent_id=1 active, status=published -> in shape - # New record: parent_id=3 active, status=draft -> NOT in shape (fails status check) - # This should result in a DELETE, not be skipped - change = %UpdatedRecord{ - relation: {"public", "children"}, - old_record: %{"id" => "100", "parent_id" => "1", "status" => "published"}, - record: %{"id" => "100", "parent_id" => "3", "status" => "draft"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"children\"/\"100\"", - changed_columns: MapSet.new(["parent_id", "status"]) - } - - # extra_refs: old has parent 1 active, new has parent 3 active - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new([1, 3])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - {filtered_changes, _state, count, _offset} = result - - # The change should NOT be skipped - it should be processed as a delete - # because the new record doesn't match status = 'published' - assert count == 1 - assert length(filtered_changes) == 1 - - [processed_change] = filtered_changes - # Should be converted to a delete since old was in shape, new is not - assert %Electric.Replication.Changes.DeletedRecord{} = processed_change - assert processed_change.old_record["id"] == "100" - end - - test "skips change when value is in move-in AND matches full WHERE clause", %{state: state} do - # When parent_id=2 enters a move-in AND the record has status='published', - # the change should be skipped (covered by move-in query) - - # Set up move-in state: parent_id=2 just became active (triggers move-in) - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-2", - {["$sublink", "0"], MapSet.new([2])} - ) - - state = %{state | move_handling_state: move_handling_state} - - # A record with parent_id=2 and status=published being updated - # Both subquery (parent active) and status condition are satisfied - # This change should be skipped because the move-in will handle it - change = %UpdatedRecord{ - relation: {"public", "children"}, - old_record: %{"id" => "100", "parent_id" => "2", "status" => "published"}, - record: %{"id" => "100", "parent_id" => "2", "status" => "published"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"children\"/\"100\"", - changed_columns: MapSet.new([]) - } - - # extra_refs: parent 2 is now active (in new refs) - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new([1, 2])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - {filtered_changes, _state, count, _offset} = result - - # The change should be skipped because: - # 1. parent_id=2 is in the pending move-in - # 2. status='published' satisfies the other WHERE condition - # 3. The move-in query will return this row - assert filtered_changes == [] - assert count == 0 - end - - test "processes delete when record fails non-subquery condition even with active move-in", %{ - state: state - } do - # When a record changes from status='published' to status='draft', - # even if the parent is in a pending move-in, we should delete - # because the status condition fails. - - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-1", - {["$sublink", "0"], MapSet.new([1])} - ) - |> MoveIns.set_snapshot("move-in-for-parent-1", {963, 963, []}) - - state = %{state | move_handling_state: move_handling_state} - - # Record changes status from published (in shape) to draft (not in shape) - change = %UpdatedRecord{ - relation: {"public", "children"}, - old_record: %{"id" => "200", "parent_id" => "1", "status" => "published"}, - record: %{"id" => "200", "parent_id" => "1", "status" => "draft"}, - log_offset: LogOffset.new(12346, 0), - key: "\"public\".\"children\"/\"200\"", - changed_columns: MapSet.new(["status"]) - } - - # xid 962 is visible in snapshot {963, 963, []} - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new([1])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - {filtered_changes, _state, count, _offset} = result - - # Should produce a delete, not be skipped - assert count == 1 - assert [%Electric.Replication.Changes.DeletedRecord{} = delete] = filtered_changes - assert delete.old_record["id"] == "200" - end - end - - describe "process_changes/3 with sublink value changes during move-in" do - setup [:with_stack_id_from_test] - - setup %{stack_id: stack_id} do - shape = - Shape.new!("users", where: "parent_id IN (SELECT id FROM users)", inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - %{state: state, shape: shape} - end - - test "processes UpdatedRecord with changed sublink value even when new value is in a pending move-in", - %{state: state} do - # parent_id changes from 2 to 3, and there's a pending move-in for parent_id=3. - # The change must still be processed so removed_move_tags are emitted for the - # old sublink value — the move-in query only returns INSERTs with the new tag. - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-3", - {["$sublink", "0"], MapSet.new([3])} - ) - - state = %{state | move_handling_state: move_handling_state} - - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "10", "parent_id" => "2", "value" => "hello"}, - record: %{"id" => "10", "parent_id" => "3", "value" => "hello"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"10\"", - changed_columns: MapSet.new(["parent_id"]) - } - - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([2])}, %{["$sublink", "0"] => MapSet.new([2, 3])}} - } - - {filtered_changes, _state, count, _offset} = - ChangeHandling.process_changes([change], state, ctx) - - assert count == 1 - assert [%UpdatedRecord{} = processed] = filtered_changes - assert processed.record["parent_id"] == "3" - assert processed.old_record["parent_id"] == "2" - assert processed.move_tags != [] - assert processed.removed_move_tags != [] - end - - test "skips UpdatedRecord when sublink value is unchanged and in a pending move-in", - %{state: state} do - # Only a non-sublink field changes — the move-in will return the row with - # identical tags, so the WAL change can safely be skipped. - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-2", - {["$sublink", "0"], MapSet.new([2])} - ) - - state = %{state | move_handling_state: move_handling_state} - - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "10", "parent_id" => "2", "value" => "old"}, - record: %{"id" => "10", "parent_id" => "2", "value" => "new"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"10\"", - changed_columns: MapSet.new(["value"]) - } - - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([])}, %{["$sublink", "0"] => MapSet.new([2])}} - } - - {filtered_changes, _state, count, _offset} = - ChangeHandling.process_changes([change], state, ctx) - - assert filtered_changes == [] - assert count == 0 - end - end -end diff --git a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs index 480c75515d..f96182266a 100644 --- a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs @@ -1018,6 +1018,221 @@ defmodule Electric.Shapes.Consumer.MaterializerTest do end end + describe "DNF: multiple tags per row with active_conditions" do + test "insert with active_conditions where row is not initially included", ctx do + ctx = with_materializer(ctx) + + # Row has two disjunct tags but active_conditions says position 0 is false + # Tag "hash_a/" participates in position 0, tag "/hash_b" participates in position 1 + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [false, false] + } + ]) + + # Row is not included because no disjunct has all positions active + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + end + + test "insert with active_conditions where one disjunct is satisfied", ctx do + ctx = with_materializer(ctx) + + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [true, false] + } + ]) + + # First disjunct "hash_a/" has position 0 active → included + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + end + + test "move-in broadcast activates a previously excluded row", ctx do + ctx = with_materializer(ctx) + + # Insert with position 0 inactive + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [false, false] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + + # Move-in at position 0 with value "hash_a" + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-in", patterns: [%{pos: 0, value: "hash_a"}]}} + ]) + + # Now position 0 is true, first disjunct "hash_a/" is satisfied + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + end + + test "move-out does not remove row when another disjunct still holds", ctx do + ctx = with_materializer(ctx) + + # Insert with both positions active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + + # Move-out at position 0 - but position 1 still holds via second disjunct + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-out", patterns: [%{pos: 0, value: "hash_a"}]}} + ]) + + # Row should still be included because disjunct "/hash_b" at position 1 is still true + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + refute_received {:materializer_changes, _, _} + end + + test "move-out removes row when last active disjunct becomes false", ctx do + ctx = with_materializer(ctx) + + # Insert with only position 1 active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [false, true] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + + # Move-out at position 1 - now no disjunct holds + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-out", patterns: [%{pos: 1, value: "hash_b"}]}} + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new() + assert_receive {:materializer_changes, _, %{move_out: [{10, "10"}]}} + end + + test "move-in on already-present row is a no-op for value counts", ctx do + ctx = with_materializer(ctx) + + # Insert with position 0 active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [true, false] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + + # Move-in at position 1 - row was already included via position 0 + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-in", patterns: [%{pos: 1, value: "hash_b"}]}} + ]) + + # No value count change + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + refute_received {:materializer_changes, _, _} + end + + test "multi-position disjunct requires all positions active", ctx do + ctx = with_materializer(ctx) + + # Tag "hash_a/1" means positions 0 AND 1 must be active for this disjunct + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/1"], + active_conditions: [true, false] + } + ]) + + # Position 1 is false, so the disjunct is not satisfied + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + end + + test "multi-position disjunct becomes satisfied when all positions active", ctx do + ctx = with_materializer(ctx) + + # Tag "hash_a/1" needs both positions active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/1"], + active_conditions: [false, true] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + + # Move-in at position 0 makes both positions active + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-in", patterns: [%{pos: 0, value: "hash_a"}]}} + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + end + + test "composite-key tag indexing works for position lookup", ctx do + ctx = with_materializer(ctx) + + # Two rows with different position-0 hashes + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_x/"], + active_conditions: [true, false] + }, + %Changes.NewRecord{ + key: "2", + record: %{"value" => "20"}, + move_tags: ["hash_y/"], + active_conditions: [true, false] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10, 20]) + assert_receive {:materializer_changes, _, %{move_in: _}} + + # Move-out only for hash_x at position 0 + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-out", patterns: [%{pos: 0, value: "hash_x"}]}} + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([20]) + assert_receive {:materializer_changes, _, %{move_out: [{10, "10"}]}} + end + end + defp respond_to_call(request, response) do receive do {:"$gen_call", {from, ref}, {^request, _arg}} -> diff --git a/packages/sync-service/test/electric/shapes/consumer/move_ins_test.exs b/packages/sync-service/test/electric/shapes/consumer/move_ins_test.exs deleted file mode 100644 index 424bc4455d..0000000000 --- a/packages/sync-service/test/electric/shapes/consumer/move_ins_test.exs +++ /dev/null @@ -1,629 +0,0 @@ -defmodule Electric.Shapes.Consumer.MoveInsTest do - use ExUnit.Case, async: true - - alias Electric.Shapes.Consumer.MoveIns - alias Electric.Replication.Changes.Transaction - alias Electric.Replication.Changes - - describe "new/0" do - test "creates empty state" do - state = MoveIns.new() - - assert state.waiting_move_ins == %{} - assert state.filtering_move_ins == [] - end - end - - describe "add_waiting/4" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "adds a single move-in with nil snapshot", %{state: state} do - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - - assert Map.has_key?(state.waiting_move_ins, "move1") - assert state.waiting_move_ins["move1"] == {nil, moved_values} - end - - @tag :move_in - test "adds multiple move-ins", %{state: state} do - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values1) - |> MoveIns.add_waiting("move2", moved_values2) - - assert map_size(state.waiting_move_ins) == 2 - assert state.waiting_move_ins["move1"] == {nil, moved_values1} - assert state.waiting_move_ins["move2"] == {nil, moved_values2} - end - end - - describe "set_snapshot/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "sets snapshot for waiting move-in", %{state: state} do - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - snapshot = {100, 200, [150]} - state = MoveIns.set_snapshot(state, "move1", snapshot) - - assert state.waiting_move_ins["move1"] == {snapshot, moved_values} - end - - @tag :move_in - test "raises on non-existent move-in", %{state: state} do - snapshot = {100, 200, [150]} - - assert_raise KeyError, fn -> - MoveIns.set_snapshot(state, "nonexistent", snapshot) - end - end - end - - describe "change_to_filtering/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "moves from waiting to filtering and returns visibility boundary", %{state: state} do - snapshot = {100, 200, []} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", snapshot) - - key_set = MapSet.new(["key1", "key2"]) - {visibility_boundary, state} = MoveIns.change_to_filtering(state, "move1", key_set) - - assert state.waiting_move_ins == %{} - assert [{^snapshot, ^key_set}] = state.filtering_move_ins - # Single move-in returns its snapshot as visibility boundary - assert visibility_boundary == snapshot - end - - @tag :move_in - test "keeps other waiting move-ins", %{state: state} do - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values1) - |> MoveIns.set_snapshot("move1", {100, 200, []}) - |> MoveIns.add_waiting("move2", moved_values2) - |> MoveIns.set_snapshot("move2", {150, 250, []}) - - {_boundary, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - - assert Map.has_key?(state.waiting_move_ins, "move2") - refute Map.has_key?(state.waiting_move_ins, "move1") - end - - @tag :move_in - test "raises on unknown move-in name", %{state: state} do - assert_raise KeyError, fn -> - MoveIns.change_to_filtering(state, "nonexistent", MapSet.new([])) - end - end - - @tag :move_in - test "returns snapshot when resolving minimum with no other waiting", %{state: state} do - snapshot = {100, 200, []} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", snapshot) - - {boundary, _state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary == snapshot - end - - @tag :move_in - test "returns snapshot when resolving minimum among concurrent move-ins", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move1 (minimum) - {boundary, _state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary == snapshot1 - end - - @tag :move_in - test "returns nil when resolving non-minimum", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move2 (non-minimum) - should return nil and store snapshot2 - {boundary, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary == nil - assert state.maximum_resolved_snapshot == snapshot2 - end - - @tag :move_in - test "returns stored maximum when last move-in resolves", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move2 (non-minimum) first - {boundary1, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary1 == nil - - # Resolve move1 (last one) - should return stored maximum (snapshot2) - {boundary2, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary2 == snapshot2 - assert state.maximum_resolved_snapshot == nil - end - end - - describe "remove_completed/2" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "removes move-ins where xid >= xmax", %{state: state} do - # Move-in with xmax=200 - moved_values = {[], MapSet.new()} - - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - {_boundary, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - - # Transaction with xid=200 (at xmax boundary - should complete) - txn = %Transaction{xid: 200, lsn: {0, 1}, changes: []} - state = MoveIns.remove_completed(state, txn) - - assert state.filtering_move_ins == [] - end - - @tag :move_in - test "keeps move-ins where xid < xmax", %{state: state} do - moved_values = {[], MapSet.new()} - - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - {_boundary, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - - txn = %Transaction{xid: 150, lsn: {0, 1}, changes: []} - state = MoveIns.remove_completed(state, txn) - - assert length(state.filtering_move_ins) == 1 - end - - @tag :move_in - test "removes only completed move-ins from multiple", %{state: state} do - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values1) - |> MoveIns.set_snapshot("move1", {100, 200, []}) - |> MoveIns.add_waiting("move2", moved_values2) - |> MoveIns.set_snapshot("move2", {100, 300, []}) - - {_boundary1, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - {_boundary2, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new(["key2"])) - - # xid=250 completes move1 (xmax=200) but not move2 (xmax=300) - txn = %Transaction{xid: 250, lsn: {0, 1}, changes: []} - state = MoveIns.remove_completed(state, txn) - - assert length(state.filtering_move_ins) == 1 - [{snapshot, key_set}] = state.filtering_move_ins - assert snapshot == {100, 300, []} - assert key_set == MapSet.new(["key2"]) - end - end - - describe "track_touch/3" do - @tag :move_in - test "tracks INSERT operations" do - state = MoveIns.new() - change = %Changes.NewRecord{key: "key1", record: %{}} - - state = MoveIns.track_touch(state, 100, change) - - assert state.touch_tracker == %{"key1" => 100} - end - - @tag :move_in - test "tracks UPDATE operations" do - state = MoveIns.new() - change = %Changes.UpdatedRecord{key: "key1", record: %{}, old_record: %{}} - - state = MoveIns.track_touch(state, 100, change) - - assert state.touch_tracker == %{"key1" => 100} - end - - @tag :move_in - test "does NOT track DELETE operations" do - state = MoveIns.new() - change = %Changes.DeletedRecord{key: "key1", old_record: %{}} - - state = MoveIns.track_touch(state, 100, change) - - assert state.touch_tracker == %{} - end - - @tag :move_in - test "updates existing key with newer xid" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 100}} - change = %Changes.NewRecord{key: "key1", record: %{}} - - state = MoveIns.track_touch(state, 150, change) - - assert state.touch_tracker == %{"key1" => 150} - end - end - - describe "gc_touch_tracker/1" do - @tag :move_in - test "clears all when no pending queries" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 100, "key2" => 150}} - - state = MoveIns.gc_touch_tracker(state) - - assert state.touch_tracker == %{} - end - - @tag :move_in - test "keeps all touches when no snapshots known yet" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 100, "key2" => 150}} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - - state = MoveIns.gc_touch_tracker(state) - - assert state.touch_tracker == %{"key1" => 100, "key2" => 150} - end - - @tag :move_in - test "removes touches < min_xmin" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 50, "key2" => 100, "key3" => 150}} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - - state = MoveIns.gc_touch_tracker(state) - - assert state.touch_tracker == %{"key2" => 100, "key3" => 150} - end - - @tag :move_in - test "keeps touches >= min_xmin across multiple snapshots" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 50, "key2" => 100, "key3" => 150}} - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values1) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - state = MoveIns.add_waiting(state, "move2", moved_values2) - state = MoveIns.set_snapshot(state, "move2", {120, 250, []}) - - state = MoveIns.gc_touch_tracker(state) - - # min_xmin = 100, so keeps keys with xid >= 100 - assert state.touch_tracker == %{"key2" => 100, "key3" => 150} - end - - @tag :move_in - test "handles mix of nil and real snapshots" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 50, "key2" => 100, "key3" => 150}} - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values1) - state = MoveIns.add_waiting(state, "move2", moved_values2) - state = MoveIns.set_snapshot(state, "move2", {120, 250, []}) - - state = MoveIns.gc_touch_tracker(state) - - # min_xmin = 120, so only keeps key3 - assert state.touch_tracker == %{"key3" => 150} - end - end - - describe "should_skip_query_row?/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "returns false when key not in tracker", %{state: state} do - snapshot = {100, 200, []} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - assert result == false - end - - @tag :move_in - test "returns false when touch is visible in snapshot", %{state: state} do - state = %{state | touch_tracker: %{"key1" => 50}} - snapshot = {100, 200, []} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - # xid=50 < xmin=100, so visible - assert result == false - end - - @tag :move_in - test "returns true when touch xid >= xmax", %{state: state} do - state = %{state | touch_tracker: %{"key1" => 250}} - snapshot = {100, 200, []} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - # xid=250 >= xmax=200, so not visible (happened after snapshot) - assert result == true - end - - @tag :move_in - test "returns true when touch xid in xip_list", %{state: state} do - state = %{state | touch_tracker: %{"key1" => 150}} - snapshot = {100, 200, [150]} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - # xid=150 is in xip_list, so not visible (not committed at snapshot time) - assert result == true - end - end - - describe "visibility boundary scenarios (integration)" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "single move-in: returns its own snapshot", %{state: state} do - snapshot = {100, 200, [150]} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", snapshot) - - {boundary, _state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary == snapshot - end - - @tag :move_in - test "two move-ins resolving in order (both minimum): both return their snapshots", %{ - state: state - } do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move1 (minimum) first - returns snapshot1 - {boundary1, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary1 == snapshot1 - - # Resolve move2 (last one) - returns snapshot2 - {boundary2, _state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary2 == snapshot2 - end - - @tag :move_in - test "two move-ins resolving out of order: stores max, returns it on last", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move2 (non-minimum) first - returns nil, stores snapshot2 - {boundary1, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary1 == nil - assert state.maximum_resolved_snapshot == snapshot2 - - # Resolve move1 (last one) - returns stored maximum (snapshot2) - {boundary2, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary2 == snapshot2 - assert state.maximum_resolved_snapshot == nil - end - - @tag :move_in - test "three move-ins resolving: 2nd, 3rd, then 1st", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - snapshot3 = {120, 250, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - |> MoveIns.add_waiting("move3", moved_values) - |> MoveIns.set_snapshot("move3", snapshot3) - - # Resolve move2 (largest, not minimum) - stores snapshot2 - {boundary1, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary1 == nil - assert state.maximum_resolved_snapshot == snapshot2 - - # Resolve move3 (middle, not minimum) - keeps maximum as snapshot2 - {boundary2, state} = MoveIns.change_to_filtering(state, "move3", MapSet.new([])) - assert boundary2 == nil - assert state.maximum_resolved_snapshot == snapshot2 - - # Resolve move1 (last one) - returns stored maximum (snapshot2) - {boundary3, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary3 == snapshot2 - assert state.maximum_resolved_snapshot == nil - end - - @tag :move_in - test "equal snapshots: both treated as minimum, both return snapshot", %{state: state} do - snapshot = {100, 200, [150]} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot) - - # Resolve move1 - returns snapshot - {boundary1, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary1 == snapshot - - # Resolve move2 (last one) - also returns snapshot - {boundary2, _state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary2 == snapshot - end - - @tag :move_in - test "complex: 4 move-ins resolving in order 4→2→3→1", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - snapshot3 = {120, 250, []} - snapshot4 = {200, 400, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - |> MoveIns.add_waiting("move3", moved_values) - |> MoveIns.set_snapshot("move3", snapshot3) - |> MoveIns.add_waiting("move4", moved_values) - |> MoveIns.set_snapshot("move4", snapshot4) - - # Resolve move4 (largest, not minimum) - stores snapshot4 - {boundary1, state} = MoveIns.change_to_filtering(state, "move4", MapSet.new([])) - assert boundary1 == nil - assert state.maximum_resolved_snapshot == snapshot4 - - # Resolve move2 (second largest, not minimum) - keeps snapshot4 - {boundary2, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary2 == nil - assert state.maximum_resolved_snapshot == snapshot4 - - # Resolve move3 (second smallest, not minimum) - keeps snapshot4 - {boundary3, state} = MoveIns.change_to_filtering(state, "move3", MapSet.new([])) - assert boundary3 == nil - assert state.maximum_resolved_snapshot == snapshot4 - - # Resolve move1 (last one) - returns stored maximum (snapshot4) - {boundary4, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary4 == snapshot4 - assert state.maximum_resolved_snapshot == nil - end - end - - describe "change_visible_in_unresolved_move_ins_for_values?/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - test "returns true when value is in unresolved move-in with nil snapshot", %{state: state} do - state = MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - - assert MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 1}, - 100 - ) - end - - test "returns true when value is in unresolved move-in with known snapshot and xid is visible", - %{state: state} do - state = - MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - |> MoveIns.set_snapshot("move1", {150, 200, []}) - - assert MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 1}, - 100 - ) - end - - test "returns false when value is in unresolved move-in with known snapshot and xid is not visible", - %{state: state} do - state = - MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - |> MoveIns.set_snapshot("move1", {150, 200, []}) - - refute MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 1}, - 300 - ) - end - - test "returns false when value is not in unresolved move-in", %{state: state} do - state = - MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - - refute MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 2}, - 100 - ) - end - end -end diff --git a/packages/sync-service/test/electric/shapes/consumer/state_test.exs b/packages/sync-service/test/electric/shapes/consumer/state_test.exs index 288c8fdaa0..c00bfdc199 100644 --- a/packages/sync-service/test/electric/shapes/consumer/state_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/state_test.exs @@ -9,37 +9,6 @@ defmodule Electric.Shapes.Consumer.StateTest do @moduletag :tmp_dir - @inspector Support.StubInspector.new( - tables: [ - {1, {"public", "items"}}, - {2, {"public", "parent"}}, - {2, {"public", "grandparent"}} - ], - columns: [ - %{ - name: "id", - type: "int8", - pk_position: 0, - type_id: {20, 1}, - is_generated: false - }, - %{ - name: "parent_id", - type: "int8", - pk_position: nil, - type_id: {20, 1}, - is_generated: false - }, - %{ - name: "flag", - type: "bool", - pk_position: nil, - type_id: {16, 1}, - is_generated: false - } - ] - ) - describe "new/3" do setup [:with_stack_id_from_test] @@ -168,142 +137,4 @@ defmodule Electric.Shapes.Consumer.StateTest do assert log =~ "Falling back to full-transaction buffering" end end - - describe "or_with_subquery? field in new/3" do - setup [:with_stack_id_from_test] - - for {where, expected} <- [ - # No WHERE clause - {nil, false}, - - # WHERE clause without subquery - {"id = 1", false}, - {"id = 1 AND flag = true", false}, - {"id = 1 OR flag = true", false}, - - # Subquery without OR - {"id IN (SELECT id FROM parent)", false}, - {"id = 1 AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND id = 1", false}, - {"parent_id IN (SELECT id FROM parent) AND flag = true AND id = 1", false}, - - # OR directly with subquery - {"parent_id IN (SELECT id FROM parent) OR flag = true", true}, - {"flag = true OR parent_id IN (SELECT id FROM parent)", true}, - {"(parent_id IN (SELECT id FROM parent)) OR (flag = true)", true}, - - # OR that is ANDed with subquery (OR not directly containing subquery) - {"(id = 1 OR flag = true) AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND (id = 1 OR flag = true)", false}, - - # Nested cases - OR with subquery in one branch - {"id = 1 OR parent_id IN (SELECT id FROM parent)", true}, - {"id = 1 OR (flag = true AND parent_id IN (SELECT id FROM parent))", true}, - {"(id = 1 AND parent_id IN (SELECT id FROM parent)) OR flag = true", true}, - - # Subquery has OR inside - {"id IN (SELECT id FROM parent WHERE flag = true OR id = 2)", false}, - - # Subquery has OR with nested subquery - {"id IN (SELECT id FROM parent WHERE id = 2 OR id IN (SELECT id FROM grandparent))", - false}, - - # NOT should not change result - {"NOT (parent_id IN (SELECT id FROM parent) OR flag = true)", true}, - {"parent_id NOT IN (SELECT id FROM parent) OR flag = true", true}, - {"parent_id NOT IN (SELECT id FROM parent)", false}, - {"NOT(parent_id IN (SELECT id FROM parent))", false} - ] do - @tag where: where, expected: expected - test "#{inspect(where)} -> or_with_subquery?=#{expected}", %{ - stack_id: stack_id, - where: where, - expected: expected - } do - shape = Shape.new!("items", where: where, inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - - assert state.or_with_subquery? == expected - end - end - end - - describe "not_with_subquery? field in new/3" do - setup [:with_stack_id_from_test] - - for {where, expected} <- [ - # No WHERE clause - {nil, false}, - - # WHERE clause without subquery (NOT doesn't matter without subquery) - {"id = 1", false}, - {"NOT (id = 1)", false}, - {"NOT (id = 1 AND flag = true)", false}, - {"id = 1 AND NOT flag = true", false}, - - # Subquery without NOT - {"id IN (SELECT id FROM parent)", false}, - {"id = 1 AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND id = 1", false}, - {"parent_id IN (SELECT id FROM parent) OR flag = true", false}, - - # x NOT IN (subquery) - the most common case - {"parent_id NOT IN (SELECT id FROM parent)", true}, - {"parent_id NOT IN (SELECT id FROM parent) AND id = 1", true}, - {"id = 1 AND parent_id NOT IN (SELECT id FROM parent)", true}, - - # NOT(x IN subquery) - equivalent to NOT IN - {"NOT(parent_id IN (SELECT id FROM parent))", true}, - {"NOT (parent_id IN (SELECT id FROM parent))", true}, - - # NOT(condition AND x IN subquery) - NOT wrapping expression with subquery - {"NOT(flag = true AND parent_id IN (SELECT id FROM parent))", true}, - {"NOT(parent_id IN (SELECT id FROM parent) AND flag = true)", true}, - - # NOT(condition OR x IN subquery) - NOT wrapping OR with subquery - {"NOT(flag = true OR parent_id IN (SELECT id FROM parent))", true}, - {"NOT(parent_id IN (SELECT id FROM parent) OR flag = true)", true}, - - # Nested NOT with subquery - {"NOT(id = 1 AND (flag = true OR parent_id IN (SELECT id FROM parent)))", true}, - {"NOT((parent_id IN (SELECT id FROM parent)) AND id = 1)", true}, - - # NOT inside subquery (shouldn't affect outer query) - {"id IN (SELECT id FROM parent WHERE NOT flag = true)", false}, - {"id IN (SELECT id FROM parent WHERE id NOT IN (SELECT id FROM grandparent))", false}, - - # NOT combined with AND/OR at outer level - {"parent_id NOT IN (SELECT id FROM parent) OR flag = true", true}, - {"parent_id NOT IN (SELECT id FROM parent) AND flag = true", true}, - {"flag = true OR parent_id NOT IN (SELECT id FROM parent)", true}, - {"flag = true AND parent_id NOT IN (SELECT id FROM parent)", true}, - - # Multiple subqueries with NOT - {"parent_id NOT IN (SELECT id FROM parent) AND id IN (SELECT id FROM grandparent)", - true}, - {"parent_id IN (SELECT id FROM parent) AND id NOT IN (SELECT id FROM grandparent)", - true}, - - # Double NOT (cancels out, but still has NOT wrapping subquery in AST) - {"NOT(NOT(parent_id IN (SELECT id FROM parent)))", true}, - - # NOT on non-subquery part, subquery without NOT - {"NOT(flag = true) AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND NOT(flag = true)", false} - ] do - @tag where: where, expected: expected - test "#{inspect(where)} -> not_with_subquery?=#{expected}", %{ - stack_id: stack_id, - where: where, - expected: expected - } do - shape = Shape.new!("items", where: where, inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - - assert state.not_with_subquery? == expected - end - end - end end diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries/move_queue_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries/move_queue_test.exs new file mode 100644 index 0000000000..36642118d9 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries/move_queue_test.exs @@ -0,0 +1,110 @@ +defmodule Electric.Shapes.Consumer.Subqueries.MoveQueueTest do + use ExUnit.Case, async: true + + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + + @dep 0 + + test "drops redundant move outs for values absent from the base view" do + queue = MoveQueue.enqueue(MoveQueue.new(), @dep, %{move_out: [{1, "1"}]}, MapSet.new()) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "drops redundant move ins for values already present in the base view" do + queue = MoveQueue.enqueue(MoveQueue.new(), @dep, %{move_in: [{1, "1"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "cancels a pending move in with a later move out for the same value" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "1"}]}, MapSet.new()) + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}]}, MapSet.new()) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "cancels a pending move out with a later move in for the same value" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "1"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "merges repeated move ins and keeps the terminal tuple" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "01"}]}, MapSet.new()) + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "1"}], move_out: []}, MapSet.new()) + + assert %MoveQueue{move_in: %{0 => [{1, "1"}]}, move_out: empty_out} = queue + assert empty_out == %{} + end + + test "merges repeated move outs and keeps the terminal tuple" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "01"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}], move_in: []}, MapSet.new([1])) + + assert %MoveQueue{move_out: %{0 => [{1, "1"}]}, move_in: empty_in} = queue + assert empty_in == %{} + end + + test "orders surviving move outs before move ins" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: %{0 => [{1, "1"}]}, move_in: %{0 => [{2, "2"}]}} = queue + end + + test "uses the provided base view when reducing buffering follow-up moves" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_out: [{2, "2"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "pop_next returns the whole move out batch before the move in batch" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_in: [{3, "3"}]}, MapSet.new([1])) + + assert {{:move_out, 0, [{1, "1"}]}, queue} = MoveQueue.pop_next(queue) + assert queue.move_out == %{} + assert queue.move_in == %{0 => [{2, "2"}, {3, "3"}]} + + assert {{:move_in, 0, [{2, "2"}, {3, "3"}]}, queue} = MoveQueue.pop_next(queue) + assert queue.move_out == %{} + assert queue.move_in == %{} + assert nil == MoveQueue.pop_next(queue) + end + + test "length counts queued values across both batches" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_in: [{3, "3"}]}, MapSet.new([1])) + + assert 3 == MoveQueue.length(queue) + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs new file mode 100644 index 0000000000..2951049088 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -0,0 +1,756 @@ +defmodule Electric.Shapes.Consumer.SubqueriesTest do + use ExUnit.Case, async: true + + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.Buffering + alias Electric.Shapes.Consumer.Subqueries.Steady + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Shape + + @inspector Support.StubInspector.new( + tables: ["parent", "child"], + columns: [ + %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, + %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}}, + %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, + %{name: "name", type: "text", pk_position: nil, type_id: {28, 1}} + ] + ) + + test "converts steady transactions against the current subquery view" do + state = new_state(subquery_view: MapSet.new([1])) + + {changes, state} = + Subqueries.handle_event( + state, + txn(50, [child_insert("1", "1"), child_insert("2", "2")]) + ) + + assert %Steady{} = state + assert [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}] = changes + end + + test "negated subquery turns dependency move-in into an outer move-out" do + state = new_negated_state() + dep_handle = dep_handle(state) + + {changes, state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert %Steady{views: %{["$sublink", "0"] => view}} = state + assert view == MapSet.new([1]) + + assert [ + %{ + headers: %{ + event: "move-out", + patterns: [%{pos: 0, value: _value}] + } + } + ] = changes + end + + test "negated subquery turns dependency move-out into a buffered outer move-in" do + state = new_negated_state(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + assert %Buffering{ + views_before_move: %{["$sublink", "0"] => before_view}, + views_after_move: %{["$sublink", "0"] => after_view} + } = state + + assert before_view == MapSet.new([1]) + assert after_view == MapSet.new() + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + + assert %Steady{views: %{["$sublink", "0"] => view}} = state + assert view == MapSet.new() + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}} + ] = changes + end + + test "splices buffered transactions around the snapshot visibility boundary" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert %Buffering{} = state + + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + + query_row = child_insert("99", "1") + + {changes, state} = + Subqueries.handle_event(state, {:query_move_in_complete, [query_row], lsn(10)}) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] = changes + end + + test "splices move-in query rows between emitted pre and post boundary changes" do + state = new_state(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "2")])) + + {changes, state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} + ) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1, 2]) + + assert [ + %Changes.NewRecord{record: %{"id" => "10"}}, + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] = changes + end + + test "splices updates that become a delete before the boundary and an insert after it" do + state = new_state(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + # Before the splice we still evaluate against the old view {1}, so moving + # from parent 1 to parent 2 means the row leaves the shape and becomes a delete. + {[], state} = Subqueries.handle_event(state, txn(50, [child_update("10", "1", "2")])) + + # After the splice we evaluate against the new view {1, 2}, so moving from + # parent 3 to parent 2 means the row enters the shape and becomes a new record. + {[], state} = Subqueries.handle_event(state, txn(150, [child_update("11", "3", "2")])) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + + {changes, state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} + ) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1, 2]) + + assert [ + %Changes.DeletedRecord{old_record: %{"id" => "10"}}, + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] = changes + end + + test "uses lsn updates to splice at the current buffer tail" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = Subqueries.handle_event(state, txn(120, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = + changes + end + + test "splices buffered inserts, updates, and deletes around an lsn boundary" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = + Subqueries.handle_event( + state, + txn(120, [ + child_insert("10", "1"), + child_update("20", "1"), + child_delete("30", "1") + ]) + ) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + + {[], state} = + Subqueries.handle_event( + state, + txn(150, [ + child_insert("11", "1"), + child_update("21", "1"), + child_delete("31", "1") + ]) + ) + + {changes, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}}, + %Changes.UpdatedRecord{record: %{"id" => "21"}}, + %Changes.DeletedRecord{old_record: %{"id" => "31"}, last?: true} + ] = changes + end + + test "keeps the transaction splice boundary when a later lsn update arrives" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + {[], state} = Subqueries.handle_event(state, txn(160, [child_insert("12", "1")])) + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + + {changes, state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}}, + %Changes.NewRecord{record: %{"id" => "12"}, last?: true} + ] = changes + end + + test "keeps the lsn splice boundary when the snapshot later reveals invisible txns" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + + {changes, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "10"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] = changes + end + + test "waits for an lsn update even when the move-in query completes with an empty buffer" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert %Buffering{} = state + + {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = + changes + end + + test "uses an lsn update that arrived before the move-in query completed" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + + {changes, state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = + changes + end + + test "uses an lsn update that was already seen before the move-in started" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + + {changes, state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = + changes + end + + test "defers queued move outs until after splice and starts the next move in" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} + ) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + + assert %Buffering{ + move_in_values: [{2, "2"}], + views_before_move: views_before, + views_after_move: views_after + } = state + + assert views_before[["$sublink", "0"]] == MapSet.new() + assert views_after[["$sublink", "0"]] == MapSet.new([2]) + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + ] = changes + end + + test "applies a queued move out for the active move-in value after splice" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new() + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + ] = changes + end + + test "batches consecutive move ins into a single active move in" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}, {2, "2"}], move_out: []}} + ) + + assert %Buffering{ + move_in_values: [{1, "1"}, {2, "2"}], + views_before_move: views_before, + views_after_move: views_after + } = state + + assert views_before[["$sublink", "0"]] == MapSet.new() + assert views_after[["$sublink", "0"]] == MapSet.new([1, 2]) + end + + test "cancels pending inverse ops while buffering" do + state = new_state() + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} + ) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new([1]) + + assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = + changes + end + + test "merges queued move outs into a single control message after splice" do + state = new_state(subquery_view: MapSet.new([2])) + dep_handle = dep_handle(state) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + {[], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} + ) + + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + + {[], state} = + Subqueries.handle_event( + state, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + + assert %Steady{views: views} = state + view = views[["$sublink", "0"]] + assert view == MapSet.new() + + assert [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{event: "move-out", patterns: patterns}} + ] = changes + + assert length(patterns) == 2 + end + + test "raises on dependency handle mismatch" do + assert_raise ArgumentError, ~r/unexpected dependency handle/, fn -> + new_state() + |> Subqueries.handle_event({:materializer_changes, "wrong", %{move_in: [], move_out: []}}) + end + end + + test "raises on query callbacks while steady" do + state = new_state() + + assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> + Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + end + + assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> + Subqueries.handle_event(state, {:query_move_in_complete, [], lsn(1)}) + end + end + + test "builds a move-in where clause that excludes the current view" do + shape = shape() + {:ok, dnf_plan} = DnfPlan.compile(shape) + + assert {where, _params} = + DnfPlan.move_in_where_clause( + dnf_plan, + 0, + Enum.map([{1, "1"}, {2, "2"}], &elem(&1, 0)), + %{["$sublink", "0"] => MapSet.new([3])}, + shape.where.used_refs + ) + + assert is_binary(where) + end + + test "builds move-out control messages with the current hashing scheme" do + state = new_state() + + assert %{ + headers: %{ + event: "move-out", + patterns: [%{pos: 0, value: value}] + } + } = Subqueries.make_move_out_control_message(state, 0, [{1, "1"}]) + + assert value == + :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:1") + |> Base.encode16(case: :lower) + end + + test "extracts tag structure for the direct subquery predicate" do + shape = shape() + + assert {[["parent_id"]], %{["$sublink", "0"] => _comparison_expr}} = + Subqueries.move_in_tag_structure(shape) + end + + defp new_state(opts \\ []) do + shape = Keyword.get(opts, :shape, shape()) + {:ok, dnf_plan} = DnfPlan.compile(shape) + dep_handle = hd(shape.shape_dependencies_handles) + + Subqueries.new( + shape: shape, + stack_id: "stack-id", + shape_handle: "shape-handle", + dnf_plan: dnf_plan, + views: %{["$sublink", "0"] => Keyword.get(opts, :subquery_view, MapSet.new())}, + dependency_handle_to_ref: %{dep_handle => {0, ["$sublink", "0"]}} + ) + end + + defp new_negated_state(opts \\ []) do + new_state(Keyword.put(opts, :shape, negated_shape())) + end + + defp dep_handle(state) do + state.dependency_handle_to_ref |> Map.keys() |> hd() + end + + defp shape do + Shape.new!("child", + where: "parent_id IN (SELECT id FROM public.parent WHERE value = 'keep')", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) + |> fill_handles() + end + + defp negated_shape do + Shape.new!("child", + where: "parent_id NOT IN (SELECT id FROM public.parent WHERE value = 'keep')", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) + |> fill_handles() + end + + defp fill_handles(shape) do + filled_deps = Enum.map(shape.shape_dependencies, &fill_handles/1) + handles = Enum.map(filled_deps, &Shape.generate_id/1) + %{shape | shape_dependencies: filled_deps, shape_dependencies_handles: handles} + end + + defp txn(xid, changes) do + %Transaction{xid: xid, changes: changes, num_changes: length(changes), lsn: lsn(xid)} + end + + defp lsn(value), do: Lsn.from_integer(value) + defp global_last_seen_lsn(value), do: {:global_last_seen_lsn, value} + + defp child_insert(id, parent_id) do + %Changes.NewRecord{ + relation: {"public", "child"}, + record: %{"id" => id, "parent_id" => parent_id, "name" => "child-#{id}"} + } + |> Changes.fill_key(["id"]) + end + + defp child_update(id, parent_id) do + child_update(id, parent_id, parent_id) + end + + defp child_update(id, old_parent_id, new_parent_id) do + Changes.UpdatedRecord.new( + relation: {"public", "child"}, + old_record: %{"id" => id, "parent_id" => old_parent_id, "name" => "child-#{id}-old"}, + record: %{"id" => id, "parent_id" => new_parent_id, "name" => "child-#{id}-new"} + ) + |> Changes.fill_key(["id"]) + end + + defp child_delete(id, parent_id) do + %Changes.DeletedRecord{ + relation: {"public", "child"}, + old_record: %{"id" => id, "parent_id" => parent_id, "name" => "child-#{id}"} + } + |> Changes.fill_key(["id"]) + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer_test.exs b/packages/sync-service/test/electric/shapes/consumer_test.exs index 6d0fe38cbe..8537d14e34 100644 --- a/packages/sync-service/test/electric/shapes/consumer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer_test.exs @@ -2,6 +2,7 @@ defmodule Electric.Shapes.ConsumerTest do use ExUnit.Case, async: true use Repatch.ExUnit, assert_expectations: true + alias Electric.LsnTracker alias Electric.Postgres.Lsn alias Electric.Replication.Changes.Relation alias Electric.Replication.Changes @@ -1889,15 +1890,11 @@ defmodule Electric.Shapes.ConsumerTest do # Mock query_move_in_async to simulate a query without hitting the database Repatch.patch( - Electric.Shapes.PartialModes, + Electric.Shapes.Consumer.Subqueries, :query_move_in_async, [mode: :shared], - fn _task_sup, _shape_handle, _shape, _where_clause, opts -> - consumer_pid = opts[:consumer_pid] - name = opts[:move_in_name] - results_fn = opts[:results_fn] - - send(parent, {:query_requested, name, consumer_pid, results_fn}) + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) :ok end @@ -1927,10 +1924,10 @@ defmodule Electric.Shapes.ConsumerTest do ctx.stack_id ) - assert_receive {:query_requested, name, ^consumer_pid, results_fn} + assert_receive {:query_requested, ^consumer_pid} # Snapshot here is intentionally before the update to make sure the update is considered shadowing - send(consumer_pid, {:pg_snapshot_known, name, {90, 95, []}}) + send(consumer_pid, {:pg_snapshot_known, {90, 95, []}}) # Now send an UPDATE (xid = 100) before move-in query completes # This should be converted to INSERT @@ -1950,41 +1947,117 @@ defmodule Electric.Shapes.ConsumerTest do assert :ok = ShapeLogCollector.handle_event(txn, ctx.stack_id) - # Should get new_changes notification for the UPDATE-as-INSERT - assert_receive {^ref, :new_changes, _offset}, @receive_timeout - - # Now write data for the move-in query - results_fn.( - [ - [ - "\"public\".\"test_table\"/\"1\"", - ["tag_does_not_matter"], - Jason.encode!(%{"value" => %{"id" => "1", "value" => "old"}}) - ] - ], - {90, 95, []} + send( + consumer_pid, + {:query_move_in_complete, + [ + %Electric.Shapes.Consumer.Subqueries.QueryRow{ + key: ~s'"public"."test_table"/"1"', + json: + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"}, + "headers" => %{ + "operation" => "insert", + "relation" => ["public", "test_table"] + } + }) + } + ], Lsn.from_integer(100)} ) - send(consumer_pid, {:query_move_in_complete, name, ["test_key"], {90, 95, []}}) - assert_receive {^ref, :new_changes, _offset}, @receive_timeout # Check storage for operations shape_storage = Storage.for_shape(shape_handle, ctx.storage) assert [ + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"operation" => "insert"}, - "value" => %{"id" => "1", "value" => "updated"} + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"} }, + %{"headers" => %{"control" => "snapshot-end"}}, %{ + "headers" => %{"operation" => "update"}, + "key" => ~s'"public"."test_table"/"1"' + } + ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) + end + + test "consumer splices a pending move-in on global_last_seen_lsn broadcast", ctx do + parent = self() + + Repatch.patch( + Electric.Shapes.Consumer.Subqueries, + :query_move_in_async, + [mode: :shared], + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) + :ok + end + ) + + Support.TestUtils.activate_mocks_for_descendant_procs(Consumer) + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + consumer_pid = Consumer.whereis(ctx.stack_id, shape_handle) + ref = Shapes.Consumer.register_for_changes(ctx.stack_id, shape_handle) + + ShapeLogCollector.handle_event( + complete_txn_fragment(100, Lsn.from_integer(50), [ + %Changes.NewRecord{ + relation: {"public", "other_table"}, + record: %{"id" => "1"}, + log_offset: LogOffset.new(Lsn.from_integer(50), 0) + } + ]), + ctx.stack_id + ) + + assert_receive {:query_requested, ^consumer_pid} + + send(consumer_pid, {:pg_snapshot_known, {100, 300, []}}) + + send( + consumer_pid, + {:query_move_in_complete, + [ + %Electric.Shapes.Consumer.Subqueries.QueryRow{ + key: ~s'"public"."test_table"/"1"', + json: + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"}, "headers" => %{ - "control" => "snapshot-end", - "xmin" => "90", - "xmax" => "95", - "xip_list" => [] + "operation" => "insert", + "relation" => ["public", "test_table"] } - } + }) + } + ], Lsn.from_integer(100)} + ) + + refute_receive {^ref, :new_changes, _}, 100 + + assert :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 100) + assert_receive {^ref, :new_changes, _offset}, @receive_timeout + + shape_storage = Storage.for_shape(shape_handle, ctx.storage) + + assert [ + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert"}, + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"} + }, + %{"headers" => %{"control" => "snapshot-end"}} ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) end end diff --git a/packages/sync-service/test/electric/shapes/dnf_plan_test.exs b/packages/sync-service/test/electric/shapes/dnf_plan_test.exs new file mode 100644 index 0000000000..5836f2c3dc --- /dev/null +++ b/packages/sync-service/test/electric/shapes/dnf_plan_test.exs @@ -0,0 +1,829 @@ +defmodule Electric.Shapes.DnfPlanTest do + use ExUnit.Case, async: true + + alias Electric.Replication.Eval.Parser + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Shape + + @refs %{ + ["id"] => :int4, + ["x"] => :int4, + ["y"] => :int4, + ["z"] => :int4, + ["status"] => :text, + ["name"] => :text, + ["a"] => :int4, + ["b"] => :int4 + } + + describe "compile/1 - no subqueries" do + test "returns :no_subqueries for shape without where clause" do + shape = make_shape(nil, []) + assert :no_subqueries = DnfPlan.compile(shape) + end + + test "returns :no_subqueries for shape without dependencies" do + where = parse_where(~S"x = 1") + shape = make_shape(where, []) + assert :no_subqueries = DnfPlan.compile(shape) + end + end + + describe "compile/1 - single subquery" do + test "single subquery shape" do + {where, deps} = parse_where_with_sublinks(~S"x IN (SELECT id FROM dep)", 1) + shape = make_shape(where, deps) + + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 1 + assert length(plan.disjuncts) == 1 + + # Single position, which is a subquery + assert map_size(plan.positions) == 1 + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.negated == false + assert pos0.dependency_index == 0 + assert pos0.subquery_ref == ["$sublink", "0"] + assert pos0.tag_columns == ["x"] + + assert plan.dependency_positions == %{0 => [0]} + assert plan.dependency_disjuncts == %{0 => [0]} + assert plan.has_negated_subquery == false + end + end + + describe "compile/1 - OR with subqueries" do + test "x IN sq1 OR y IN sq2" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 2 + assert length(plan.disjuncts) == 2 + + # Position 0: x IN sq1 + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.dependency_index == 0 + assert pos0.tag_columns == ["x"] + + # Position 1: y IN sq2 + pos1 = plan.positions[1] + assert pos1.is_subquery == true + assert pos1.dependency_index == 1 + assert pos1.tag_columns == ["y"] + + # Each dependency maps to its own position and disjunct + assert plan.dependency_positions == %{0 => [0], 1 => [1]} + assert plan.dependency_disjuncts == %{0 => [0], 1 => [1]} + assert plan.has_negated_subquery == false + end + + test "(x IN sq1 AND status = 'open') OR y IN sq2" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 3 + assert length(plan.disjuncts) == 2 + + # Find the subquery positions + subquery_positions = + plan.positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Enum.sort_by(fn {_pos, info} -> info.dependency_index end) + + assert length(subquery_positions) == 2 + + [{sq1_pos, sq1_info}, {sq2_pos, sq2_info}] = subquery_positions + assert sq1_info.dependency_index == 0 + assert sq1_info.tag_columns == ["x"] + assert sq2_info.dependency_index == 1 + assert sq2_info.tag_columns == ["y"] + + # Find the row predicate position + row_positions = + plan.positions + |> Enum.filter(fn {_pos, info} -> not info.is_subquery end) + + assert [{row_pos, row_info}] = row_positions + assert row_info.sql =~ "status" + assert row_info.is_subquery == false + assert row_info.dependency_index == nil + + # Disjunct 0 should contain sq1 + row predicate, disjunct 1 should contain sq2 + [d0, d1] = plan.disjuncts + d0_positions = Enum.map(d0, &elem(&1, 0)) |> MapSet.new() + d1_positions = Enum.map(d1, &elem(&1, 0)) |> MapSet.new() + + assert MapSet.member?(d0_positions, sq1_pos) + assert MapSet.member?(d0_positions, row_pos) + assert MapSet.member?(d1_positions, sq2_pos) + + # dependency_disjuncts: dep 0 in disjunct 0, dep 1 in disjunct 1 + assert plan.dependency_disjuncts[0] == [0] + assert plan.dependency_disjuncts[1] == [1] + end + end + + describe "compile/1 - AND with subqueries" do + test "x IN sq1 AND y IN sq2" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) AND y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + # AND produces a single disjunct + assert plan.position_count == 2 + assert length(plan.disjuncts) == 1 + + [d0] = plan.disjuncts + assert length(d0) == 2 + + # Both deps are in the same (only) disjunct + assert plan.dependency_disjuncts == %{0 => [0], 1 => [0]} + end + end + + describe "compile/1 - composite key subqueries" do + test "composite key subquery position" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x, y) IN (SELECT a, b FROM dep1)", + 1, + sublink_refs: %{["$sublink", "0"] => {:array, {:row, [:int4, :int4]}}}, + dep_columns: [["a", "b"]] + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 1 + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.tag_columns == {:hash_together, ["x", "y"]} + end + end + + describe "compile/1 - negated subqueries" do + test "NOT with subquery marks has_negated_subquery" do + {where, deps} = + parse_where_with_sublinks(~S"NOT x IN (SELECT id FROM dep1)", 1) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.has_negated_subquery == true + + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.negated == true + assert plan.dependency_polarities == %{0 => :negated} + end + + test "positive subquery does not mark has_negated_subquery" do + {where, deps} = + parse_where_with_sublinks(~S"x IN (SELECT id FROM dep1)", 1) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.has_negated_subquery == false + assert plan.dependency_polarities == %{0 => :positive} + end + end + + describe "compile/1 - nested subqueries compile per level" do + test "outer and inner shapes compile independently" do + # Outer shape: x IN sq1 (where sq1 itself has subqueries) + {outer_where, outer_deps} = + parse_where_with_sublinks(~S"x IN (SELECT id FROM dep1)", 1) + + outer_shape = make_shape(outer_where, outer_deps) + + # Inner shape: a IN sq2 (the inner subquery's own WHERE) + {inner_where, inner_deps} = + parse_where_with_sublinks(~S"a IN (SELECT id FROM dep2)", 1) + + inner_shape = make_shape(inner_where, inner_deps) + + # Each compiles independently + assert {:ok, outer_plan} = DnfPlan.compile(outer_shape) + assert {:ok, inner_plan} = DnfPlan.compile(inner_shape) + + # Each has its own positions + assert outer_plan.position_count == 1 + assert inner_plan.position_count == 1 + + # Each references its own dependency index 0 + assert outer_plan.dependency_positions == %{0 => [0]} + assert inner_plan.dependency_positions == %{0 => [0]} + end + end + + describe "compile/1 - distribution" do + test "AND distributes over OR with subqueries" do + # x IN sq1 AND (status = 'open' OR y IN sq2) + # Distributes to: (x IN sq1 AND status = 'open') OR (x IN sq1 AND y IN sq2) + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) AND (status = 'open' OR y IN (SELECT id FROM dep2))", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert length(plan.disjuncts) == 2 + + # dep 0 (sq1) should be in both disjuncts since AND distributes + assert plan.dependency_disjuncts[0] == [0, 1] + # dep 1 (sq2) should be in only the second disjunct + assert plan.dependency_disjuncts[1] == [1] + end + end + + @stack_id "test_stack" + @shape_handle "test_shape" + + describe "get_row_metadata/6 - single subquery" do + test "row included when value is in subquery view" do + {where, deps} = parse_where_with_sublinks(~S"x IN (SELECT id FROM dep)", 1) + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5])} + + assert {:ok, true, tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [true] + assert length(tags) == 1 + end + + test "row excluded when value is not in subquery view" do + {where, deps} = parse_where_with_sublinks(~S"x IN (SELECT id FROM dep)", 1) + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([99])} + + assert {:ok, false, _tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [false] + end + end + + describe "get_row_metadata/6 - OR with subqueries" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "included via first disjunct only", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, true, tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [true, false] + assert length(tags) == 2 + end + + test "included via second disjunct only", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, _tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [false, true] + end + + test "included via both disjuncts", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, _tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [true, true] + end + + test "excluded when neither disjunct satisfied", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([99]), ["$sublink", "1"] => MapSet.new([99])} + + assert {:ok, false, _tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [false, false] + end + end + + describe "get_row_metadata/6 - mixed row predicate and subquery" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "included via first disjunct when subquery matches and row predicate true", + %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, true, _tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + # All 3 positions: subquery true, row predicate true, sq2 false + assert Enum.count(active_conditions, & &1) == 2 + end + + test "excluded from first disjunct when row predicate false", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "closed"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, false, _tags, active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + # Row predicate position should be false + row_pred_pos = + plan.positions + |> Enum.find(fn {_pos, info} -> not info.is_subquery end) + |> elem(0) + + refute Enum.at(active_conditions, row_pred_pos) + end + + test "included via second disjunct even when first disjunct row predicate false", + %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "closed"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, _tags, _active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + end + end + + describe "get_row_metadata/6 - tags" do + test "tags have correct structure with slots per position" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, tags, _active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert length(tags) == 2 + + # Tag 0 (disjunct for x IN sq1): has hash at pos 0, empty at pos 1 + [tag0, tag1] = tags + [slot0_0, slot0_1] = String.split(tag0, "/") + assert slot0_0 != "" + assert slot0_1 == "" + + # Tag 1 (disjunct for y IN sq2): empty at pos 0, has hash at pos 1 + [slot1_0, slot1_1] = String.split(tag1, "/") + assert slot1_0 == "" + assert slot1_1 != "" + end + + test "row predicate positions get sentinel value in tags" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, true, tags, _active_conditions} = + DnfPlan.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + # The first disjunct's tag should contain a "1" sentinel for the row predicate position + [tag0 | _] = tags + slots = String.split(tag0, "/") + + # Find the row predicate position + row_pred_pos = + plan.positions + |> Enum.find(fn {_pos, info} -> not info.is_subquery end) + |> elem(0) + + assert Enum.at(slots, row_pred_pos) == "1" + end + end + + describe "get_row_metadata/6 - update scenarios" do + test "update that changes which disjuncts are satisfied" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + # Old record: status = 'open', included via disjunct 0 + old_record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + + assert {:ok, true, old_tags, old_ac} = + DnfPlan.get_row_metadata(plan, old_record, views, where, @stack_id, @shape_handle) + + # New record: status = 'closed', no longer via disjunct 0 but still via disjunct 1 + new_record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "closed"} + + assert {:ok, true, new_tags, new_ac} = + DnfPlan.get_row_metadata(plan, new_record, views, where, @stack_id, @shape_handle) + + # Row predicate position should have changed + row_pred_pos = + plan.positions + |> Enum.find(fn {_pos, info} -> not info.is_subquery end) + |> elem(0) + + assert Enum.at(old_ac, row_pred_pos) == true + assert Enum.at(new_ac, row_pred_pos) == false + + # removed_tags = old - new + removed_tags = old_tags -- new_tags + assert removed_tags == [] or length(removed_tags) >= 0 + end + + test "correct removed_tags when column values change" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + views = %{["$sublink", "0"] => MapSet.new([5, 99]), ["$sublink", "1"] => MapSet.new([10])} + + old_record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + + {:ok, _old_incl, old_tags, _old_ac} = + DnfPlan.get_row_metadata(plan, old_record, views, where, @stack_id, @shape_handle) + + # x changes from 5 to 99 + new_record = %{"id" => "1", "x" => "99", "y" => "10", "status" => "open"} + + {:ok, _new_incl, new_tags, _new_ac} = + DnfPlan.get_row_metadata(plan, new_record, views, where, @stack_id, @shape_handle) + + # Tag hashes should differ because x changed + [old_tag0, _] = old_tags + [new_tag0, _] = new_tags + assert old_tag0 != new_tag0 + + # But tag1 (y IN sq2) should be the same since y didn't change + [_, old_tag1] = old_tags + [_, new_tag1] = new_tags + assert old_tag1 == new_tag1 + + removed_tags = old_tags -- new_tags + assert length(removed_tags) == 1 + end + end + + describe "move_in_where_clause/5 - x IN sq1 OR y IN sq2" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "move on dep 0 generates candidate for sq1 and exclusion for sq2", + %{plan: plan, where: where} do + move_in_values = [1, 2, 3] + views = %{["$sublink", "0"] => MapSet.new([10]), ["$sublink", "1"] => MapSet.new([20, 30])} + + {sql, params} = + DnfPlan.move_in_where_clause(plan, 0, move_in_values, views, where.used_refs) + + # Candidate should reference move_in_values with = ANY ($1::...) + assert sql =~ "= ANY ($1::" + # Exclusion should reference sq2's current view + assert sql =~ "AND NOT" + assert sql =~ "= ANY ($2::" + + # First param is move_in_values, second is current view for sq2 + assert length(params) == 2 + assert Enum.at(params, 0) == [1, 2, 3] + assert Enum.sort(Enum.at(params, 1)) == [20, 30] + end + + test "move on dep 1 generates candidate for sq2 and exclusion for sq1", + %{plan: plan, where: where} do + move_in_values = [100] + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + {sql, params} = + DnfPlan.move_in_where_clause(plan, 1, move_in_values, views, where.used_refs) + + assert sql =~ "AND NOT" + assert length(params) == 2 + assert Enum.at(params, 0) == [100] + assert Enum.at(params, 1) == [5] + end + end + + describe "move_in_where_clause/5 - (x IN sq1 AND status = 'open') OR y IN sq2" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "move on dep 0 includes row predicate in candidate", + %{plan: plan, where: where} do + move_in_values = [1, 2] + views = %{["$sublink", "0"] => MapSet.new([10]), ["$sublink", "1"] => MapSet.new([20])} + + {sql, params} = + DnfPlan.move_in_where_clause(plan, 0, move_in_values, views, where.used_refs) + + # Candidate should include both the subquery condition and the row predicate + assert sql =~ "= ANY ($1::" + assert sql =~ ~s|"status" = 'open'| + # Exclusion should be sq2's disjunct + assert sql =~ "AND NOT" + assert length(params) == 2 + end + end + + describe "move_in_where_clause/5 - negated subqueries" do + test "uses positive delta membership for x NOT IN sq1" do + {where, deps} = + parse_where_with_sublinks(~S"NOT x IN (SELECT id FROM dep1)", 1) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + {sql, params} = + DnfPlan.move_in_where_clause( + plan, + 0, + [1, 2], + %{["$sublink", "0"] => MapSet.new([1, 2, 3])}, + where.used_refs + ) + + assert sql =~ ~s|"x" = ANY ($1::| + refute sql =~ ~s|NOT ("x" = ANY ($1::| + assert params == [[1, 2]] + end + + test "uses delta membership only for the triggering negated subquery position" do + {where, deps} = + parse_where_with_sublinks(~S"NOT (x = 7 OR y IN (SELECT id FROM dep1))", 1) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + {sql, params} = + DnfPlan.move_in_where_clause( + plan, + 0, + [5], + %{["$sublink", "0"] => MapSet.new([5, 6])}, + where.used_refs + ) + + assert sql =~ ~s|NOT ("x" = 7)| + assert sql =~ ~s|"y" = ANY ($1::| + refute sql =~ ~s|NOT ("y" = ANY ($1::| + assert params == [[5]] + end + end + + describe "make_move_in_broadcast/5" do + test "generates position-aware patterns" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + values = [{5, "5"}, {10, "10"}] + broadcast = DnfPlan.make_move_in_broadcast(plan, 0, values, @stack_id, @shape_handle) + + assert broadcast.headers.event == "move-in" + assert length(broadcast.headers.patterns) == 2 + + # All patterns should reference pos 0 (dep 0's position) + dep0_positions = Map.get(plan.dependency_positions, 0, []) + + Enum.each(broadcast.headers.patterns, fn pattern -> + assert pattern.pos in dep0_positions + assert is_binary(pattern.value) + end) + end + end + + describe "make_move_out_broadcast/5" do + test "generates position-aware patterns" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + values = [{5, "5"}] + broadcast = DnfPlan.make_move_out_broadcast(plan, 1, values, @stack_id, @shape_handle) + + assert broadcast.headers.event == "move-out" + assert length(broadcast.headers.patterns) == 1 + + dep1_positions = Map.get(plan.dependency_positions, 1, []) + [pattern] = broadcast.headers.patterns + assert pattern.pos in dep1_positions + end + end + + describe "active_conditions_sql/1" do + test "generates per-position boolean SQL expressions" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + sqls = DnfPlan.active_conditions_sql(plan) + + assert length(sqls) == plan.position_count + + # Each should be a boolean expression + Enum.each(sqls, fn sql -> + assert sql =~ "::boolean" + end) + end + end + + describe "tags_sql/3" do + test "generates per-disjunct tag SQL with position slots" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + sqls = DnfPlan.tags_sql(plan, @stack_id, @shape_handle) + + # One tag SQL per disjunct + assert length(sqls) == length(plan.disjuncts) + + # Each tag SQL should contain '/' separators between slots + Enum.each(sqls, fn sql -> + assert sql =~ "'/' ||" + end) + + # First disjunct should have md5 for subquery + sentinel for row predicate + [tag0_sql, _tag1_sql] = sqls + assert tag0_sql =~ "md5(" + assert tag0_sql =~ "'1'" + end + end + + # -- Helpers -- + + defp parse_where(where_clause) do + {:ok, pgquery} = Parser.parse_query(where_clause) + {:ok, expr} = Parser.validate_where_ast(pgquery, refs: @refs) + expr + end + + defp parse_where_with_sublinks(where_clause, num_deps, opts \\ []) do + sublink_refs = + Keyword.get_lazy(opts, :sublink_refs, fn -> + Map.new(0..(num_deps - 1), fn i -> + {["$sublink", "#{i}"], {:array, :int4}} + end) + end) + + dep_columns = Keyword.get(opts, :dep_columns, nil) + + sublink_queries = + Map.new(0..(num_deps - 1), fn i -> + cols = + if dep_columns do + Enum.at(dep_columns, i) |> Enum.join(", ") + else + "id" + end + + {i, "SELECT #{cols} FROM dep#{i + 1}"} + end) + + all_refs = Map.merge(@refs, sublink_refs) + {:ok, pgquery} = Parser.parse_query(where_clause) + + {:ok, expr} = + Parser.validate_where_ast(pgquery, + refs: all_refs, + sublink_queries: sublink_queries + ) + + deps = + Enum.map(0..(num_deps - 1), fn _i -> + %Shape{ + root_table: {"public", "dep"}, + root_table_id: 100, + root_pk: ["id"], + root_column_count: 1, + where: nil, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + end) + + {expr, deps} + end + + defp make_shape(where, deps) do + %Shape{ + root_table: {"public", "test"}, + root_table_id: 1, + root_pk: ["id"], + root_column_count: 5, + where: where, + selected_columns: ["id", "x", "y", "status"], + explicitly_selected_columns: ["id", "x", "y", "status"], + shape_dependencies: deps, + shape_dependencies_handles: Enum.with_index(deps, fn _, i -> "dep_handle_#{i}" end) + } + end +end diff --git a/packages/sync-service/test/electric/shapes/querying_test.exs b/packages/sync-service/test/electric/shapes/querying_test.exs index 02b50dae03..795761fcc1 100644 --- a/packages/sync-service/test/electric/shapes/querying_test.exs +++ b/packages/sync-service/test/electric/shapes/querying_test.exs @@ -1,7 +1,7 @@ defmodule Electric.Shapes.QueryingTest do use Support.TransactionCase, async: true - alias Electric.Shapes.Shape.SubqueryMoves + alias Electric.Shapes.DnfPlan alias Electric.Postgres.Inspector.DirectInspector alias Electric.Shapes.Shape alias Electric.Shapes.Querying @@ -348,12 +348,65 @@ defmodule Electric.Shapes.QueryingTest do ) assert [ - %{value: %{value: "10", parent_id: "1"}, headers: %{tags: [^tag1]}}, - %{value: %{value: "20", parent_id: nil}, headers: %{tags: [^tag_null]}}, - %{value: %{value: "30", parent_id: "2"}, headers: %{tags: [^tag2]}} + %{ + value: %{value: "10", parent_id: "1"}, + headers: %{ + tags: [^tag1 <> "/", "/1"], + active_conditions: [true, false] + } + }, + %{ + value: %{value: "20", parent_id: nil}, + headers: %{ + tags: [^tag_null <> "/", "/1"], + active_conditions: [false, true] + } + }, + %{ + value: %{value: "30", parent_id: "2"}, + headers: %{ + tags: [^tag2 <> "/", "/1"], + active_conditions: [true, false] + } + } ] = result end + test "if shape has a negated subquery, computes DNF tags and active conditions", %{ + db_conn: conn + } do + for statement <- [ + "CREATE TABLE parent (id SERIAL PRIMARY KEY, excluded BOOLEAN NOT NULL DEFAULT FALSE)", + "CREATE TABLE child (id SERIAL PRIMARY KEY, value INTEGER, parent_id INTEGER REFERENCES parent(id))", + "INSERT INTO parent (excluded) VALUES (false), (true)", + "INSERT INTO child (value, parent_id) VALUES (10, 1), (20, 2)" + ], + do: Postgrex.query!(conn, statement) + + shape = + Shape.new!("child", + where: "parent_id NOT IN (SELECT id FROM parent WHERE excluded = true)", + inspector: {DirectInspector, conn} + ) + + tag1 = + :crypto.hash(:md5, "dummy-stack-id" <> "dummy-shape-handle" <> "v:1") + |> Base.encode16(case: :lower) + + assert [ + %{ + value: %{value: "10", parent_id: "1"}, + headers: %{ + tags: [^tag1], + active_conditions: [true] + } + } + ] = + decode_stream( + Querying.stream_initial_data(conn, "dummy-stack-id", "dummy-shape-handle", shape) + ) + end + test "if shape has a subquery, tags the results (with composite keys)", %{db_conn: conn} do tag1 = :crypto.hash( @@ -438,7 +491,7 @@ defmodule Electric.Shapes.QueryingTest do end end - describe "query_move_in/5 with SubqueryMoves.move_in_where_clause/3" do + describe "query_move_in/5 with DnfPlan.move_in_where_clause/5" do test "builds the correct query which executes", %{db_conn: conn} do for statement <- [ "CREATE TABLE parent (id SERIAL PRIMARY KEY, value INTEGER)", @@ -455,13 +508,16 @@ defmodule Electric.Shapes.QueryingTest do ) |> fill_handles() - move_in_values = ["1", "2"] + {:ok, dnf_plan} = DnfPlan.compile(shape) + move_in_values = [1, 2] assert {where, params} = - SubqueryMoves.move_in_where_clause( - shape, - hd(shape.shape_dependencies_handles), - move_in_values + DnfPlan.move_in_where_clause( + dnf_plan, + 0, + move_in_values, + %{["$sublink", "0"] => MapSet.new()}, + shape.where.used_refs ) tag1 = @@ -503,13 +559,16 @@ defmodule Electric.Shapes.QueryingTest do ) |> fill_handles() - move_in_values = [{"1", "1"}, {"2", "2"}] + {:ok, dnf_plan} = DnfPlan.compile(shape) + move_in_values = [{1, 1}, {2, 2}] assert {where, params} = - SubqueryMoves.move_in_where_clause( - shape, - hd(shape.shape_dependencies_handles), - move_in_values + DnfPlan.move_in_where_clause( + dnf_plan, + 0, + move_in_values, + %{["$sublink", "0"] => MapSet.new()}, + shape.where.used_refs ) tag1 = diff --git a/packages/sync-service/test/electric/shapes/shape/subquery_moves_test.exs b/packages/sync-service/test/electric/shapes/shape/subquery_moves_test.exs deleted file mode 100644 index 46063cb284..0000000000 --- a/packages/sync-service/test/electric/shapes/shape/subquery_moves_test.exs +++ /dev/null @@ -1,218 +0,0 @@ -defmodule Electric.Shapes.Shape.SubqueryMovesTest do - use ExUnit.Case, async: true - - alias Electric.Replication.Eval - alias Electric.Shapes.Shape - alias Electric.Shapes.Shape.SubqueryMoves - - @inspector Support.StubInspector.new( - tables: ["parent", "child"], - columns: [ - %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, - %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}}, - %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}} - ] - ) - - @composite_inspector Support.StubInspector.new( - tables: ["parent", "child"], - columns: [ - %{name: "id1", type: "int4", pk_position: 0, type_id: {23, 1}}, - %{name: "id2", type: "text", pk_position: 1, type_id: {28, 1}}, - %{name: "col1", type: "int4", pk_position: nil, type_id: {23, 1}}, - %{name: "col2", type: "text", pk_position: nil, type_id: {28, 1}}, - %{name: "value", type: "int4", pk_position: nil, type_id: {23, 1}} - ] - ) - - describe "move_in_where_clause/3" do - test "generates ANY clause for single column subquery" do - # Query in the shape is normalized on casing, and we're matching that casing for this test - shape = - Shape.new!("child", - where: "parent_id IN (SELECT id FROM public.parent WHERE value = '1')", - inspector: @inspector - ) - |> fill_handles() - - move_ins = ["1", "2", "3"] - - {query, params} = - SubqueryMoves.move_in_where_clause( - shape, - Enum.at(shape.shape_dependencies_handles, 0), - move_ins - ) - - assert query == "parent_id = ANY ($1::text[]::int8[])" - assert params == [["1", "2", "3"]] - end - - test "generates unnest clause for composite key subquery" do - shape = - Shape.new!("child", - where: "(col1, col2) IN (SELECT id1, id2 FROM public.parent WHERE value = 1)", - inspector: @composite_inspector - ) - |> fill_handles() - - # Move-ins for composite keys come as tuples - move_ins = [{"1", "a"}, {"2", "b"}] - - {query, params} = - SubqueryMoves.move_in_where_clause( - shape, - Enum.at(shape.shape_dependencies_handles, 0), - move_ins - ) - - assert query == - "(col1, col2) IN (SELECT * FROM unnest($1::text[]::int4[], $2::text[]::text[]))" - - assert params == [["1", "2"], ["a", "b"]] - end - - test "handles shape without where clause in dependency" do - shape = - Shape.new!("child", - where: "parent_id IN (SELECT id FROM public.parent)", - inspector: @inspector - ) - |> fill_handles() - - move_ins = ["1"] - - {query, params} = - SubqueryMoves.move_in_where_clause( - shape, - Enum.at(shape.shape_dependencies_handles, 0), - move_ins - ) - - assert query == "parent_id = ANY ($1::text[]::int8[])" - assert params == [["1"]] - end - end - - describe "make_move_out_control_message/2" do - test "creates control message with patterns for single values" do - shape = %Shape{ - root_table: {"public", "child"}, - root_table_id: 1, - shape_dependencies_handles: ["dep-handle-1"], - tag_structure: [["parent_id"]] - } - - move_outs = [{"dep-handle-1", [{1, "1"}, {2, "2"}, {3, "3"}]}] - - message = - SubqueryMoves.make_move_out_control_message(shape, "stack-id", "shape-handle", move_outs) - - tag1 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:1") - |> Base.encode16(case: :lower) - - tag2 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:2") - |> Base.encode16(case: :lower) - - tag3 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:3") - |> Base.encode16(case: :lower) - - assert message == %{ - headers: %{ - event: "move-out", - patterns: [ - %{pos: 0, value: tag1}, - %{pos: 0, value: tag2}, - %{pos: 0, value: tag3} - ] - } - } - end - - test "creates patterns for composite values" do - shape = %Shape{ - root_table: {"public", "child"}, - root_table_id: 1, - shape_dependencies_handles: ["dep-handle-1"], - tag_structure: [[{:hash_together, ["col1", "col2"]}]] - } - - # Composite keys are represented as lists - move_outs = [{"dep-handle-1", [{{1, "a"}, {"1", "a"}}, {{2, "b"}, {"2", "b"}}]}] - - message = - SubqueryMoves.make_move_out_control_message(shape, "stack-id", "shape-handle", move_outs) - - tag1 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "col1:v:1col2:v:a") - |> Base.encode16(case: :lower) - - tag2 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "col1:v:2col2:v:b") - |> Base.encode16(case: :lower) - - assert message == %{ - headers: %{ - event: "move-out", - patterns: [%{pos: 0, value: tag1}, %{pos: 0, value: tag2}] - } - } - end - end - - describe "move_in_tag_structure/1" do - test "returns empty list for shape without where clause" do - shape = Shape.new!("child", inspector: @inspector) - - assert SubqueryMoves.move_in_tag_structure(shape) == {[], %{}} - end - - test "returns empty list for shape without dependencies" do - shape = Shape.new!("child", where: "parent_id > 5", inspector: @inspector) - - assert SubqueryMoves.move_in_tag_structure(shape) == {[], %{}} - end - - test "extracts single column reference from sublink" do - shape = - Shape.new!("child", - where: "parent_id IN (SELECT id FROM parent)", - inspector: @inspector - ) - - result = SubqueryMoves.move_in_tag_structure(shape) - - assert {[["parent_id"]], - %{["$sublink", "0"] => %Eval.Expr{eval: %Eval.Parser.Ref{path: ["parent_id"]}}}} = - result - end - - test "extracts composite key references from row expression" do - shape = - Shape.new!("child", - where: "(col1, col2) IN (SELECT id1, id2 FROM parent)", - inspector: @composite_inspector - ) - - result = SubqueryMoves.move_in_tag_structure(shape) - - assert {[[{:hash_together, ["col1", "col2"]}]], - %{ - ["$sublink", "0"] => %Eval.Expr{ - eval: %Eval.Parser.RowExpr{ - elements: [%Eval.Parser.Ref{path: ["col1"]}, %Eval.Parser.Ref{path: ["col2"]}] - } - } - }} = result - end - end - - defp fill_handles(shape) do - filled_deps = Enum.map(shape.shape_dependencies, &fill_handles/1) - handles = Enum.map(filled_deps, &Shape.generate_id/1) - %{shape | shape_dependencies: filled_deps, shape_dependencies_handles: handles} - end -end diff --git a/packages/sync-service/test/electric/shapes/shape_test.exs b/packages/sync-service/test/electric/shapes/shape_test.exs index ee772356ee..68a3d0e8c3 100644 --- a/packages/sync-service/test/electric/shapes/shape_test.exs +++ b/packages/sync-service/test/electric/shapes/shape_test.exs @@ -4,6 +4,7 @@ defmodule Electric.Shapes.ShapeTest do alias Electric.Replication.Changes.{NewRecord, DeletedRecord, UpdatedRecord} alias Electric.Replication.Eval.Parser alias Electric.Replication.Changes + alias Electric.Shapes.DnfPlan alias Electric.Shapes.Shape @where Parser.parse_and_validate_expression!("value ILIKE '%matches%'", @@ -296,6 +297,109 @@ defmodule Electric.Shapes.ShapeTest do ] end + test "uses DNF metadata for streamed changes when a subquery is combined with OR" do + where = + Parser.parse_and_validate_expression!( + "parent_id IN (SELECT id FROM parent WHERE include_parent = true) OR include_child = true", + refs: %{ + ["parent_id"] => :int4, + ["include_child"] => :bool, + ["$sublink", "0"] => {:array, :int4} + }, + sublink_queries: %{0 => "SELECT id FROM parent WHERE include_parent = true"} + ) + + shape = %Shape{ + root_table: {"public", "child"}, + root_table_id: @relation_id, + where: where, + selected_columns: ["id", "parent_id", "include_child"], + explicitly_selected_columns: ["id", "parent_id", "include_child"], + shape_dependencies: [ + %Shape{ + root_table: {"public", "parent"}, + root_table_id: 2, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + ] + } + + {:ok, dnf_plan} = DnfPlan.compile(shape) + + [converted] = + Shape.convert_change( + shape, + %NewRecord{ + relation: {"public", "child"}, + record: %{"id" => "1", "parent_id" => "1", "include_child" => "true"} + }, + stack_id: "test_stack", + shape_handle: "test_handle", + extra_refs: + {%{["$sublink", "0"] => MapSet.new()}, %{["$sublink", "0"] => MapSet.new()}}, + dnf_plan: dnf_plan + ) + + subquery_tag = + :crypto.hash(:md5, "test_stack" <> "test_handle" <> "v:1") + |> Base.encode16(case: :lower) + + assert converted.move_tags == [subquery_tag <> "/", "/1"] + assert converted.active_conditions == [false, true] + end + + test "keeps updates when only active_conditions change after column filtering" do + where = + Parser.parse_and_validate_expression!( + "parent_id IN (SELECT id FROM parent WHERE include_parent = true) OR include_child = true", + refs: %{ + ["parent_id"] => :int4, + ["include_child"] => :bool, + ["$sublink", "0"] => {:array, :int4} + }, + sublink_queries: %{0 => "SELECT id FROM parent WHERE include_parent = true"} + ) + + shape = %Shape{ + root_table: {"public", "child"}, + root_table_id: @relation_id, + where: where, + selected_columns: ["id"], + explicitly_selected_columns: ["id"], + shape_dependencies: [ + %Shape{ + root_table: {"public", "parent"}, + root_table_id: 2, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + ] + } + + {:ok, dnf_plan} = DnfPlan.compile(shape) + + [converted] = + Shape.convert_change( + shape, + %UpdatedRecord{ + relation: {"public", "child"}, + old_record: %{"id" => "1", "parent_id" => "1", "include_child" => "true"}, + record: %{"id" => "1", "parent_id" => "1", "include_child" => "true"} + }, + stack_id: "test_stack", + shape_handle: "test_handle", + extra_refs: + {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new()}}, + dnf_plan: dnf_plan + ) + + assert converted.old_record == %{"id" => "1"} + assert converted.record == %{"id" => "1"} + assert converted.active_conditions == [false, true] + assert converted.removed_move_tags == [] + end + test "correctly converts updates to deleted records with subqueries if the referenced set has changed" do shape = %Shape{ root_table: {"public", "table"}, @@ -610,6 +714,37 @@ defmodule Electric.Shapes.ShapeTest do ) end + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS accounts (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS users (id INT PRIMARY KEY, account_id INT REFERENCES accounts(id), active BOOLEAN NOT NULL DEFAULT false)" + ] + test "deduplicates identical subqueries onto one dependency", %{inspector: inspector} do + assert {:ok, + %Shape{ + where: where, + shape_dependencies: [ + %Shape{ + root_table: {"public", "accounts"}, + where: %{query: "id > 5"} + } + ], + subquery_comparison_expressions: comparison_expressions + }} = + Shape.new("users", + inspector: inspector, + where: + "(active = true OR account_id IN (SELECT id FROM accounts WHERE id > 5)) AND account_id IN (SELECT id FROM accounts WHERE id > 5)" + ) + + assert where.used_refs == %{ + ["active"] => :bool, + ["account_id"] => :int4, + ["$sublink", "0"] => {:array, :int4} + } + + assert Map.keys(comparison_expressions) == [["$sublink", "0"]] + end + @tag with_sql: [ "CREATE TABLE IF NOT EXISTS project (id INT PRIMARY KEY, value INT NOT NULL)", "CREATE TABLE IF NOT EXISTS item (id INT PRIMARY KEY, value INT NOT NULL)" diff --git a/packages/sync-service/test/support/pg_expression_generator.ex b/packages/sync-service/test/support/pg_expression_generator.ex index 65ba8a00e5..0c582295cd 100644 --- a/packages/sync-service/test/support/pg_expression_generator.ex +++ b/packages/sync-service/test/support/pg_expression_generator.ex @@ -259,4 +259,112 @@ defmodule Support.PgExpressionGenerator do ] |> one_of() end + + @doc """ + Generates arbitrary WHERE clause expressions by nesting datatype expressions + (including column references) with AND, OR, NOT, and boolean predicates at + multiple depth levels. + + Returns `{sql, refs}` tuples where `refs` is the map of column references + needed to parse the expression. + + This exercises the full combinatorial space — numeric, string, bool, and array + expressions mixed together with column refs and logical connectives — so that + any parseable WHERE clause can be verified for round-trip fidelity. + """ + def where_clause_generator do + # Column-ref-aware leaf expressions that mix literals with column references + refs = %{ + ["int_col"] => :int4, + ["float_col"] => :float8, + ["text_col"] => :text, + ["bool_col"] => :bool, + ["int_arr"] => {:array, :int4}, + ["text_arr"] => {:array, :text} + } + + int_ref = constant(~s|"int_col"|) + float_ref = constant(~s|"float_col"|) + text_ref = constant(~s|"text_col"|) + bool_ref = constant(~s|"bool_col"|) + int_arr_ref = constant(~s|"int_arr"|) + text_arr_ref = constant(~s|"text_arr"|) + + # Mix column refs with literals for each type + int_or_ref = one_of([int_gen(), int_ref]) + numeric_or_ref = one_of([numeric_gen(), int_ref, float_ref]) + str_or_ref = one_of([str_gen(), text_ref]) + bool_or_ref = one_of([bool_gen(), bool_ref]) + + numeric_with_refs = + one_of([ + expression_gen(nullable_type_gen(numeric_or_ref), [ + {:combine_op, numeric_op_gen()}, + {:unary_op, numeric_unary_op_gen()}, + {:comparison_op, comparison_op_gen()}, + {:range_op, range_comparison_op_gen()}, + {:membership_op, membership_op_gen()} + ]), + expression_gen(nullable_type_gen(int_or_ref), [ + {:combine_op, int_op_gen()}, + {:unary_op, int_unary_op_gen()}, + {:comparison_op, comparison_op_gen()}, + {:range_op, range_comparison_op_gen()}, + {:membership_op, membership_op_gen()} + ]) + ]) + + string_with_refs = + expression_gen(nullable_type_gen(str_or_ref), [ + {:combine_op, string_op_gen()}, + {:function_op, string_function_op_gen()}, + {:comparison_op, string_comparison_op_gen()}, + {:comparison_op, comparison_op_gen()}, + {:range_op, range_comparison_op_gen()}, + {:membership_op, membership_op_gen()} + ]) + + bool_with_refs = + expression_gen(nullable_type_gen(bool_or_ref), [ + {:comparison_op, bool_comparison_op_gen()}, + {:unary_op, bool_unary_op_gen()}, + {:predicate_op, predicate_op_gen()} + ]) + + array_with_refs = + one_of([ + expression_gen(one_of([int_arr_ref, array_gen(int_gen(), dimension: 1)]), [ + {:comparison_op, array_comparison_op_gen()}, + {:function_op, array_function_op_gen()}, + {:membership_op, membership_op_gen()} + ]), + expression_gen(one_of([text_arr_ref, array_gen(str_gen(), dimension: 1)]), [ + {:comparison_op, array_comparison_op_gen()}, + {:function_op, array_function_op_gen()}, + {:membership_op, membership_op_gen()} + ]) + ]) + + leaf = + one_of([ + numeric_with_refs, + string_with_refs, + bool_with_refs, + array_with_refs + ]) + + sql_gen = + nested_expression_gen( + leaf, + [ + {:combine_op, bool_comparison_op_gen()}, + {:unary_op, bool_unary_op_gen()}, + {:predicate_op, predicate_op_gen()} + ], + max_nesting: 3 + ) + + # Return {sql, refs} tuples + map(sql_gen, &{&1, refs}) + end end From 68d947c0bdb88bc7f0166599e88e1af430b23129 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 16:26:01 +0000 Subject: [PATCH 24/63] Buffer: remove ++ --- .../lib/electric/shapes/consumer/subqueries.ex | 10 +++++----- .../electric/shapes/consumer/subqueries/buffering.ex | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 28b97b0b88..423fef1cb8 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -231,7 +231,8 @@ defmodule Electric.Shapes.Consumer.Subqueries do @spec maybe_splice(Buffering.t()) :: {[output()], Steady.t() | Buffering.t()} def maybe_splice(%Buffering{} = state) do if ready_to_splice?(state) do - {pre_txns, post_txns} = Enum.split(state.buffered_txns, state.boundary_txn_count) + {pre_txns, post_txns} = + state.buffered_txns |> Enum.reverse() |> Enum.split(state.boundary_txn_count) move_in_broadcast = DnfPlan.make_move_in_broadcast( @@ -301,10 +302,9 @@ defmodule Electric.Shapes.Consumer.Subqueries do def maybe_buffer_boundary_from_snapshot(%Buffering{snapshot: nil} = state), do: state def maybe_buffer_boundary_from_snapshot(%Buffering{} = state) do - case Enum.find_index( - state.buffered_txns, - &(not Transaction.visible_in_snapshot?(&1, state.snapshot)) - ) do + case state.buffered_txns + |> Enum.reverse() + |> Enum.find_index(&(not Transaction.visible_in_snapshot?(&1, state.snapshot))) do nil -> state index -> %{state | boundary_txn_count: index} end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex index c3394bb948..fddb064040 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -124,7 +124,7 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, next_state = state |> Subqueries.maybe_buffer_boundary_from_txn(txn) - |> Map.update!(:buffered_txns, &(&1 ++ [txn])) + |> Map.update!(:buffered_txns, &[txn | &1]) Subqueries.maybe_splice(next_state) end From ed4414f03f315da5c78805ad20dac1fc3f956103 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 16:32:54 +0000 Subject: [PATCH 25/63] Move buffering to buffering state module --- .../electric/shapes/consumer/subqueries.ex | 105 -------------- .../shapes/consumer/subqueries/buffering.ex | 136 ++++++++++++++++-- 2 files changed, 123 insertions(+), 118 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 423fef1cb8..e30210fe0e 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -228,41 +228,6 @@ defmodule Electric.Shapes.Consumer.Subqueries do end end - @spec maybe_splice(Buffering.t()) :: {[output()], Steady.t() | Buffering.t()} - def maybe_splice(%Buffering{} = state) do - if ready_to_splice?(state) do - {pre_txns, post_txns} = - state.buffered_txns |> Enum.reverse() |> Enum.split(state.boundary_txn_count) - - move_in_broadcast = - DnfPlan.make_move_in_broadcast( - state.dnf_plan, - state.trigger_dep_index, - state.move_in_values, - state.stack_id, - state.shape_handle - ) - - outputs = - Enum.flat_map( - pre_txns, - &convert_transaction(&1, state, state.views_before_move) - ) ++ - [move_in_broadcast] ++ - state.move_in_rows ++ - Enum.flat_map( - post_txns, - &convert_transaction(&1, state, state.views_after_move) - ) - - state - |> to_steady_state() - |> drain_queue(outputs) - else - {[], state} - end - end - @spec convert_transaction(Transaction.t(), Steady.t() | Buffering.t(), map()) :: [ Changes.change() ] @@ -279,58 +244,6 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> mark_last_change() end - @spec maybe_buffer_boundary_from_txn(Buffering.t(), Transaction.t()) :: Buffering.t() - def maybe_buffer_boundary_from_txn(%Buffering{boundary_txn_count: boundary} = state, _txn) - when not is_nil(boundary), - do: state - - def maybe_buffer_boundary_from_txn(%Buffering{snapshot: nil} = state, _txn), do: state - - def maybe_buffer_boundary_from_txn(%Buffering{} = state, %Transaction{} = txn) do - if Transaction.visible_in_snapshot?(txn, state.snapshot) do - state - else - %{state | boundary_txn_count: length(state.buffered_txns)} - end - end - - @spec maybe_buffer_boundary_from_snapshot(Buffering.t()) :: Buffering.t() - def maybe_buffer_boundary_from_snapshot(%Buffering{boundary_txn_count: boundary} = state) - when not is_nil(boundary), - do: state - - def maybe_buffer_boundary_from_snapshot(%Buffering{snapshot: nil} = state), do: state - - def maybe_buffer_boundary_from_snapshot(%Buffering{} = state) do - case state.buffered_txns - |> Enum.reverse() - |> Enum.find_index(&(not Transaction.visible_in_snapshot?(&1, state.snapshot))) do - nil -> state - index -> %{state | boundary_txn_count: index} - end - end - - @spec maybe_buffer_boundary_from_lsn(Buffering.t(), Lsn.t()) :: Buffering.t() - def maybe_buffer_boundary_from_lsn(%Buffering{boundary_txn_count: boundary} = state, _lsn) - when not is_nil(boundary), - do: state - - def maybe_buffer_boundary_from_lsn(%Buffering{move_in_lsn: nil} = state, _lsn), do: state - - def maybe_buffer_boundary_from_lsn(%Buffering{} = state, %Lsn{} = lsn) do - case Lsn.compare(lsn, state.move_in_lsn) do - :lt -> state - _ -> %{state | boundary_txn_count: length(state.buffered_txns)} - end - end - - @spec maybe_buffer_boundary_from_seen_lsn(Buffering.t()) :: Buffering.t() - def maybe_buffer_boundary_from_seen_lsn(%Buffering{latest_seen_lsn: nil} = state), do: state - - def maybe_buffer_boundary_from_seen_lsn(%Buffering{} = state) do - maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) - end - @spec validate_dependency_handle!(Steady.t() | Buffering.t(), term()) :: :ok def validate_dependency_handle!(%{dependency_handle_to_ref: mapping}, dep_handle) do unless Map.has_key?(mapping, dep_handle) do @@ -395,24 +308,6 @@ defmodule Electric.Shapes.Consumer.Subqueries do end end - defp to_steady_state(%Buffering{} = state) do - %Steady{ - shape: state.shape, - stack_id: state.stack_id, - shape_handle: state.shape_handle, - dnf_plan: state.dnf_plan, - views: state.views_after_move, - dependency_handle_to_ref: state.dependency_handle_to_ref, - latest_seen_lsn: state.latest_seen_lsn, - queue: state.queue - } - end - - defp ready_to_splice?(%Buffering{} = state) do - not is_nil(state.snapshot) and not is_nil(state.move_in_rows) and - not is_nil(state.boundary_txn_count) - end - defp mark_last_change([]), do: [] defp mark_last_change(changes) do diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex index fddb064040..192aded29e 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -1,8 +1,12 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do @moduledoc false + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Subqueries alias Electric.Shapes.Consumer.Subqueries.MoveQueue alias Electric.Shapes.Consumer.Subqueries.Steady + alias Electric.Shapes.DnfPlan @enforce_keys [ :shape, @@ -41,16 +45,16 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do shape_handle: String.t(), dnf_plan: Electric.Shapes.DnfPlan.t(), trigger_dep_index: non_neg_integer(), - move_in_values: [Electric.Shapes.Consumer.Subqueries.move_value()], + move_in_values: [Subqueries.move_value()], views_before_move: %{[String.t()] => MapSet.t()}, views_after_move: %{[String.t()] => MapSet.t()}, dependency_handle_to_ref: %{String.t() => {non_neg_integer(), [String.t()]}}, snapshot: {term(), term(), [term()]} | nil, move_in_rows: [term()] | nil, - move_in_lsn: Electric.Postgres.Lsn.t() | nil, - latest_seen_lsn: Electric.Postgres.Lsn.t() | nil, + move_in_lsn: Lsn.t() | nil, + latest_seen_lsn: Lsn.t() | nil, boundary_txn_count: non_neg_integer() | nil, - buffered_txns: [Electric.Replication.Changes.Transaction.t()], + buffered_txns: [Transaction.t()], queue: MoveQueue.t(), query_started?: boolean() } @@ -59,7 +63,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do Steady.t(), non_neg_integer(), [String.t()], - [Electric.Shapes.Consumer.Subqueries.move_value()], + [Subqueries.move_value()], MoveQueue.t(), :move_in | :move_out ) :: t() @@ -93,6 +97,111 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do } end + @spec maybe_buffer_boundary_from_txn(t(), Transaction.t()) :: t() + def maybe_buffer_boundary_from_txn(%__MODULE__{boundary_txn_count: boundary} = state, _txn) + when not is_nil(boundary), + do: state + + def maybe_buffer_boundary_from_txn(%__MODULE__{snapshot: nil} = state, _txn), do: state + + def maybe_buffer_boundary_from_txn(%__MODULE__{} = state, %Transaction{} = txn) do + if Transaction.visible_in_snapshot?(txn, state.snapshot) do + state + else + %{state | boundary_txn_count: length(state.buffered_txns)} + end + end + + @spec maybe_buffer_boundary_from_snapshot(t()) :: t() + def maybe_buffer_boundary_from_snapshot(%__MODULE__{boundary_txn_count: boundary} = state) + when not is_nil(boundary), + do: state + + def maybe_buffer_boundary_from_snapshot(%__MODULE__{snapshot: nil} = state), do: state + + def maybe_buffer_boundary_from_snapshot(%__MODULE__{} = state) do + case state.buffered_txns + |> Enum.reverse() + |> Enum.find_index(&(not Transaction.visible_in_snapshot?(&1, state.snapshot))) do + nil -> state + index -> %{state | boundary_txn_count: index} + end + end + + @spec maybe_buffer_boundary_from_lsn(t(), Lsn.t()) :: t() + def maybe_buffer_boundary_from_lsn(%__MODULE__{boundary_txn_count: boundary} = state, _lsn) + when not is_nil(boundary), + do: state + + def maybe_buffer_boundary_from_lsn(%__MODULE__{move_in_lsn: nil} = state, _lsn), do: state + + def maybe_buffer_boundary_from_lsn(%__MODULE__{} = state, %Lsn{} = lsn) do + case Lsn.compare(lsn, state.move_in_lsn) do + :lt -> state + _ -> %{state | boundary_txn_count: length(state.buffered_txns)} + end + end + + @spec maybe_buffer_boundary_from_seen_lsn(t()) :: t() + def maybe_buffer_boundary_from_seen_lsn(%__MODULE__{latest_seen_lsn: nil} = state), do: state + + def maybe_buffer_boundary_from_seen_lsn(%__MODULE__{} = state) do + maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) + end + + @spec maybe_splice(t()) :: {[Subqueries.output()], Steady.t() | t()} + def maybe_splice(%__MODULE__{} = state) do + if ready_to_splice?(state) do + {pre_txns, post_txns} = + state.buffered_txns |> Enum.reverse() |> Enum.split(state.boundary_txn_count) + + move_in_broadcast = + DnfPlan.make_move_in_broadcast( + state.dnf_plan, + state.trigger_dep_index, + state.move_in_values, + state.stack_id, + state.shape_handle + ) + + outputs = + Enum.flat_map( + pre_txns, + &Subqueries.convert_transaction(&1, state, state.views_before_move) + ) ++ + [move_in_broadcast] ++ + state.move_in_rows ++ + Enum.flat_map( + post_txns, + &Subqueries.convert_transaction(&1, state, state.views_after_move) + ) + + state + |> to_steady_state() + |> Subqueries.drain_queue(outputs) + else + {[], state} + end + end + + defp ready_to_splice?(%__MODULE__{} = state) do + not is_nil(state.snapshot) and not is_nil(state.move_in_rows) and + not is_nil(state.boundary_txn_count) + end + + defp to_steady_state(%__MODULE__{} = state) do + %Steady{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + dnf_plan: state.dnf_plan, + views: state.views_after_move, + dependency_handle_to_ref: state.dependency_handle_to_ref, + latest_seen_lsn: state.latest_seen_lsn, + queue: state.queue + } + end + defp apply_dependency_move(subquery_view, move_in_values, :move_in) do add_move_in_values(subquery_view, move_in_values) end @@ -118,15 +227,16 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, for: Electric.Shapes.Consumer.Subqueries.Buffering do alias Electric.Replication.Changes.Transaction alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.Buffering alias Electric.Shapes.Consumer.Subqueries.MoveQueue def handle_event(state, %Transaction{} = txn) do next_state = state - |> Subqueries.maybe_buffer_boundary_from_txn(txn) + |> Buffering.maybe_buffer_boundary_from_txn(txn) |> Map.update!(:buffered_txns, &[txn | &1]) - Subqueries.maybe_splice(next_state) + Buffering.maybe_splice(next_state) end def handle_event(state, {:global_last_seen_lsn, lsn}) do @@ -134,8 +244,8 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, state |> Map.put(:latest_seen_lsn, lsn) - |> Subqueries.maybe_buffer_boundary_from_lsn(lsn) - |> Subqueries.maybe_splice() + |> Buffering.maybe_buffer_boundary_from_lsn(lsn) + |> Buffering.maybe_splice() end def handle_event(state, {:materializer_changes, dep_handle, payload}) do @@ -159,8 +269,8 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, def handle_event(state, {:pg_snapshot_known, snapshot}) do state |> Map.put(:snapshot, snapshot) - |> Subqueries.maybe_buffer_boundary_from_snapshot() - |> Subqueries.maybe_splice() + |> Buffering.maybe_buffer_boundary_from_snapshot() + |> Buffering.maybe_splice() end def handle_event(%{move_in_rows: rows}, {:query_move_in_complete, _new_rows, _move_in_lsn}) @@ -173,7 +283,7 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, state |> Map.put(:move_in_rows, rows) |> Map.put(:move_in_lsn, move_in_lsn) - |> Subqueries.maybe_buffer_boundary_from_seen_lsn() - |> Subqueries.maybe_splice() + |> Buffering.maybe_buffer_boundary_from_seen_lsn() + |> Buffering.maybe_splice() end end From 2e63aa0a115f7d9d84f756dd3bfb0ad0bab494b5 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 16:43:45 +0000 Subject: [PATCH 26/63] Update @spec --- .../lib/electric/shapes/consumer/subqueries.ex | 12 ++++++------ .../electric/shapes/consumer/subqueries/buffering.ex | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index e30210fe0e..19085ee096 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -44,8 +44,8 @@ defmodule Electric.Shapes.Consumer.Subqueries do } end - @spec handle_event(Steady.t() | Buffering.t(), term()) :: - {[output()], Steady.t() | Buffering.t()} + @spec handle_event(StateMachine.t(), term()) :: + {[output()], StateMachine.t()} def handle_event(state, event), do: StateMachine.handle_event(state, event) @spec normalize_global_lsn(Electric.Postgres.Lsn.t() | non_neg_integer()) :: @@ -148,7 +148,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> then(&{tag_structure, &1}) end - @spec drain_queue(Steady.t(), [output()]) :: {[output()], Steady.t() | Buffering.t()} + @spec drain_queue(Steady.t(), [output()]) :: {[output()], StateMachine.t()} def drain_queue(%Steady{} = state, outputs \\ []) do case MoveQueue.pop_next(state.queue) do nil -> @@ -228,7 +228,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do end end - @spec convert_transaction(Transaction.t(), Steady.t() | Buffering.t(), map()) :: [ + @spec convert_transaction(Transaction.t(), StateMachine.t(), map()) :: [ Changes.change() ] def convert_transaction(%Transaction{changes: changes}, %{shape: shape} = state, views) do @@ -244,7 +244,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> mark_last_change() end - @spec validate_dependency_handle!(Steady.t() | Buffering.t(), term()) :: :ok + @spec validate_dependency_handle!(StateMachine.t(), term()) :: :ok def validate_dependency_handle!(%{dependency_handle_to_ref: mapping}, dep_handle) do unless Map.has_key?(mapping, dep_handle) do raise ArgumentError, @@ -255,7 +255,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do :ok end - @spec make_move_out_control_message(Steady.t() | Buffering.t(), non_neg_integer(), [ + @spec make_move_out_control_message(StateMachine.t(), non_neg_integer(), [ move_value() ]) :: move_out_control() diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex index 192aded29e..10d0c97ba6 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -149,7 +149,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) end - @spec maybe_splice(t()) :: {[Subqueries.output()], Steady.t() | t()} + @spec maybe_splice(t()) :: {[Subqueries.output()], Subqueries.StateMachine.t()} def maybe_splice(%__MODULE__{} = state) do if ready_to_splice?(state) do {pre_txns, post_txns} = From 8ffa63648532aecd4b6fba990a983d8817eadce1 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 16:59:08 +0000 Subject: [PATCH 27/63] Move drain queue to steady --- .../electric/shapes/consumer/subqueries.ex | 103 ----------------- .../shapes/consumer/subqueries/buffering.ex | 2 +- .../shapes/consumer/subqueries/steady.ex | 106 +++++++++++++++++- 3 files changed, 106 insertions(+), 105 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 19085ee096..e0a33b56e8 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -148,86 +148,6 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> then(&{tag_structure, &1}) end - @spec drain_queue(Steady.t(), [output()]) :: {[output()], StateMachine.t()} - def drain_queue(%Steady{} = state, outputs \\ []) do - case MoveQueue.pop_next(state.queue) do - nil -> - {outputs, state} - - {{:move_out, dep_index, move_out_values}, queue} -> - subquery_ref = dep_ref_for_index(state, dep_index) - effect = DnfPlan.effect_for_dependency_move(state.dnf_plan, dep_index, :move_out) - - case effect do - :move_out -> - next_state = %{ - state - | queue: queue, - views: - Map.update!(state.views, subquery_ref, &remove_move_values(&1, move_out_values)) - } - - broadcast = - DnfPlan.make_move_out_broadcast( - state.dnf_plan, - dep_index, - move_out_values, - state.stack_id, - state.shape_handle - ) - - drain_queue(next_state, outputs ++ [broadcast]) - - :move_in -> - {outputs, - Buffering.from_steady( - state, - dep_index, - subquery_ref, - move_out_values, - queue, - :move_out - )} - end - - {{:move_in, dep_index, move_in_values}, queue} -> - subquery_ref = dep_ref_for_index(state, dep_index) - effect = DnfPlan.effect_for_dependency_move(state.dnf_plan, dep_index, :move_in) - - case effect do - :move_in -> - {outputs, - Buffering.from_steady( - state, - dep_index, - subquery_ref, - move_in_values, - queue, - :move_in - )} - - :move_out -> - next_state = %{ - state - | queue: queue, - views: - Map.update!(state.views, subquery_ref, &add_move_values(&1, move_in_values)) - } - - broadcast = - DnfPlan.make_move_out_broadcast( - state.dnf_plan, - dep_index, - move_in_values, - state.stack_id, - state.shape_handle - ) - - drain_queue(next_state, outputs ++ [broadcast]) - end - end - end - @spec convert_transaction(Transaction.t(), StateMachine.t(), map()) :: [ Changes.change() ] @@ -297,17 +217,6 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> Base.encode16(case: :lower) end - @doc """ - Returns the subquery ref path for a given dependency index, looking it up - via the dependency_handle_to_ref mapping. - """ - def dep_ref_for_index(%{dependency_handle_to_ref: mapping}, dep_index) do - case Enum.find(mapping, fn {_handle, {idx, _ref}} -> idx == dep_index end) do - {_handle, {_idx, ref}} -> ref - nil -> raise ArgumentError, "no dependency found for index #{dep_index}" - end - end - defp mark_last_change([]), do: [] defp mark_last_change(changes) do @@ -315,18 +224,6 @@ defmodule Electric.Shapes.Consumer.Subqueries do rest ++ [%{last | last?: true}] end - defp remove_move_values(subquery_view, move_values) do - Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> - MapSet.delete(view, value) - end) - end - - defp add_move_values(subquery_view, move_values) do - Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> - MapSet.put(view, value) - end) - end - defp fetch_opt!(opts, key) do case Map.fetch(opts, key) do {:ok, value} -> value diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex index 10d0c97ba6..b30042c678 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -178,7 +178,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do state |> to_steady_state() - |> Subqueries.drain_queue(outputs) + |> Steady.drain_queue(outputs) else {[], state} end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex index 6a0eaa0c77..cd1962fb40 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex @@ -1,7 +1,11 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do @moduledoc false + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.Buffering alias Electric.Shapes.Consumer.Subqueries.MoveQueue + alias Electric.Shapes.Consumer.Subqueries.StateMachine + alias Electric.Shapes.DnfPlan @enforce_keys [:shape, :stack_id, :shape_handle, :dnf_plan, :dependency_handle_to_ref] defstruct [ @@ -25,6 +29,105 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do latest_seen_lsn: Electric.Postgres.Lsn.t() | nil, queue: MoveQueue.t() } + + @spec drain_queue(t(), [Subqueries.output()]) :: {[Subqueries.output()], StateMachine.t()} + def drain_queue(%__MODULE__{} = state, outputs \\ []) do + case MoveQueue.pop_next(state.queue) do + nil -> + {outputs, state} + + {{:move_out, dep_index, move_out_values}, queue} -> + subquery_ref = dep_ref_for_index(state, dep_index) + effect = DnfPlan.effect_for_dependency_move(state.dnf_plan, dep_index, :move_out) + + case effect do + :move_out -> + next_state = %{ + state + | queue: queue, + views: + Map.update!(state.views, subquery_ref, &remove_move_values(&1, move_out_values)) + } + + broadcast = + DnfPlan.make_move_out_broadcast( + state.dnf_plan, + dep_index, + move_out_values, + state.stack_id, + state.shape_handle + ) + + drain_queue(next_state, outputs ++ [broadcast]) + + :move_in -> + {outputs, + Buffering.from_steady( + state, + dep_index, + subquery_ref, + move_out_values, + queue, + :move_out + )} + end + + {{:move_in, dep_index, move_in_values}, queue} -> + subquery_ref = dep_ref_for_index(state, dep_index) + effect = DnfPlan.effect_for_dependency_move(state.dnf_plan, dep_index, :move_in) + + case effect do + :move_in -> + {outputs, + Buffering.from_steady( + state, + dep_index, + subquery_ref, + move_in_values, + queue, + :move_in + )} + + :move_out -> + next_state = %{ + state + | queue: queue, + views: + Map.update!(state.views, subquery_ref, &add_move_values(&1, move_in_values)) + } + + broadcast = + DnfPlan.make_move_out_broadcast( + state.dnf_plan, + dep_index, + move_in_values, + state.stack_id, + state.shape_handle + ) + + drain_queue(next_state, outputs ++ [broadcast]) + end + end + end + + defp dep_ref_for_index(%{dependency_handle_to_ref: mapping}, dep_index) do + case Enum.find(mapping, fn {_handle, {idx, _ref}} -> idx == dep_index end) do + {_handle, {_idx, ref}} -> ref + nil -> raise ArgumentError, "no dependency found for index #{dep_index}" + end + end + + defp remove_move_values(subquery_view, move_values) do + Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> + MapSet.delete(view, value) + end) + end + + defp add_move_values(subquery_view, move_values) do + Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> + MapSet.put(view, value) + end) + end end defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, @@ -32,6 +135,7 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, alias Electric.Replication.Changes.Transaction alias Electric.Shapes.Consumer.Subqueries alias Electric.Shapes.Consumer.Subqueries.MoveQueue + alias Electric.Shapes.Consumer.Subqueries.Steady def handle_event(state, %Transaction{} = txn) do {Subqueries.convert_transaction(txn, state, state.views), state} @@ -48,7 +152,7 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, state |> Map.update!(:queue, &MoveQueue.enqueue(&1, dep_index, payload, dep_view)) - |> Subqueries.drain_queue() + |> Steady.drain_queue() end def handle_event(_state, {:pg_snapshot_known, _snapshot}) do From 4c5664f6c25fdf0d14ab406ad24bbf5140583f73 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 19 Mar 2026 17:24:56 +0000 Subject: [PATCH 28/63] Return actions --- .../lib/electric/shapes/consumer.ex | 44 +++-- .../electric/shapes/consumer/subqueries.ex | 10 +- .../sync-service/lib/electric/shapes/shape.ex | 9 +- .../shapes/consumer/subqueries_test.exs | 180 ++++++++++-------- 4 files changed, 141 insertions(+), 102 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 544a221b92..8e0ec9505a 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -1011,18 +1011,32 @@ defmodule Electric.Shapes.Consumer do defp apply_subquery_event(state, event, opts \\ []) do previous_subquery_state = state.subquery_state - {outputs, subquery_state} = Subqueries.handle_event(state.subquery_state, event) + {actions, subquery_state} = Subqueries.handle_event(state.subquery_state, event) state = %{state | subquery_state: subquery_state} state = maybe_start_move_in_query(state, previous_subquery_state) - case append_subquery_outputs(state, outputs, event, previous_subquery_state, opts) do - {state, nil, 0, 0} -> - {state, nil, 0, 0} + {state, notification, num_changes, total_size} = + Enum.reduce(actions, {state, nil, 0, 0}, fn + {:store, outputs}, {state, _notification, num_changes, total_size} -> + case append_subquery_outputs(state, outputs, opts) do + {state, nil, 0, 0} -> + {state, nil, num_changes, total_size} + + {state, range, new_changes, new_size} -> + {state, {range, state.latest_offset}, num_changes + new_changes, + total_size + new_size} + end + end) - {state, range, num_changes, total_size} -> - {state, {range, state.latest_offset}, num_changes, total_size} - end + latest_offset = + case notification do + nil -> nil + {_range, offset} -> offset + end + + state = finalize_subquery_flush_tracking(state, event, previous_subquery_state, latest_offset) + {state, notification, num_changes, total_size} end defp maybe_start_move_in_query( @@ -1044,12 +1058,7 @@ defmodule Electric.Shapes.Consumer do defp should_start_move_in_query?(%Buffering{query_started?: false}), do: true defp should_start_move_in_query?(_), do: false - defp append_subquery_outputs(state, [], event, previous_subquery_state, _opts) do - state = finalize_subquery_flush_tracking(state, event, previous_subquery_state, nil) - {state, nil, 0, 0} - end - - defp append_subquery_outputs(state, outputs, event, previous_subquery_state, opts) do + defp append_subquery_outputs(state, outputs, opts) do outputs = insert_move_in_snapshot_end(outputs) previous_offset = state.latest_offset @@ -1087,10 +1096,11 @@ defmodule Electric.Shapes.Consumer do end end) - state = - finalize_subquery_flush_tracking(state, event, previous_subquery_state, latest_offset) - - {state, {previous_offset, latest_offset}, num_changes, total_size} + if num_changes == 0 do + {state, nil, 0, 0} + else + {state, {previous_offset, latest_offset}, num_changes, total_size} + end end defp append_control_message_output(state, size, count, control_message) do diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index e0a33b56e8..665fc94acb 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -24,6 +24,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} @type output() :: Changes.change() | move_out_control() | QueryRow.t() + @type action() :: {:store, [output()]} def value_prefix, do: @value_prefix def null_sentinel, do: @null_sentinel @@ -45,8 +46,13 @@ defmodule Electric.Shapes.Consumer.Subqueries do end @spec handle_event(StateMachine.t(), term()) :: - {[output()], StateMachine.t()} - def handle_event(state, event), do: StateMachine.handle_event(state, event) + {[action()], StateMachine.t()} + def handle_event(state, event) do + {outputs, state} = StateMachine.handle_event(state, event) + + actions = if outputs == [], do: [], else: [{:store, outputs}] + {actions, state} + end @spec normalize_global_lsn(Electric.Postgres.Lsn.t() | non_neg_integer()) :: Electric.Postgres.Lsn.t() diff --git a/packages/sync-service/lib/electric/shapes/shape.ex b/packages/sync-service/lib/electric/shapes/shape.ex index aefbed88e0..007d9a8789 100644 --- a/packages/sync-service/lib/electric/shapes/shape.ex +++ b/packages/sync-service/lib/electric/shapes/shape.ex @@ -708,7 +708,8 @@ defmodule Electric.Shapes.Shape do |> put_row_metadata(new_metadata) ] - {false, false} -> [] + {false, false} -> + [] end converted_changes @@ -734,10 +735,10 @@ defmodule Electric.Shapes.Shape do refs, opts ) do - {:ok, - WhereClause.includes_record?(where, record, refs), + {:ok, WhereClause.includes_record?(where, record, refs), %{ - move_tags: make_tags_from_pattern(tag_structure, record, opts[:stack_id], opts[:shape_handle]), + move_tags: + make_tags_from_pattern(tag_structure, record, opts[:stack_id], opts[:shape_handle]), active_conditions: make_active_conditions(tag_structure) }} end diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs index 2951049088..1e238add3b 100644 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -23,21 +23,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do test "converts steady transactions against the current subquery view" do state = new_state(subquery_view: MapSet.new([1])) - {changes, state} = + {actions, state} = Subqueries.handle_event( state, txn(50, [child_insert("1", "1"), child_insert("2", "2")]) ) assert %Steady{} = state - assert [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}] = changes + assert [store: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}]] = actions end test "negated subquery turns dependency move-in into an outer move-out" do state = new_negated_state() dep_handle = dep_handle(state) - {changes, state} = + {actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} @@ -47,13 +47,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - %{ - headers: %{ - event: "move-out", - patterns: [%{pos: 0, value: _value}] + store: [ + %{ + headers: %{ + event: "move-out", + patterns: [%{pos: 0, value: _value}] + } } - } - ] = changes + ] + ] = actions end test "negated subquery turns dependency move-out into a buffered outer move-in" do @@ -82,15 +84,17 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} ) - {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) assert %Steady{views: %{["$sublink", "0"] => view}} = state assert view == MapSet.new() assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}} + ] + ] = actions end test "splices buffered transactions around the snapshot visibility boundary" do @@ -111,7 +115,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do query_row = child_insert("99", "1") - {changes, state} = + {actions, state} = Subqueries.handle_event(state, {:query_move_in_complete, [query_row], lsn(10)}) assert %Steady{views: views} = state @@ -119,10 +123,12 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] + ] = actions end test "splices move-in query rows between emitted pre and post boundary changes" do @@ -139,7 +145,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "2")])) - {changes, state} = + {actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} @@ -150,11 +156,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1, 2]) assert [ - %Changes.NewRecord{record: %{"id" => "10"}}, - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] = changes + store: [ + %Changes.NewRecord{record: %{"id" => "10"}}, + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] + ] = actions end test "splices updates that become a delete before the boundary and an insert after it" do @@ -177,7 +185,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {changes, state} = + {actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} @@ -188,11 +196,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1, 2]) assert [ - %Changes.DeletedRecord{old_record: %{"id" => "10"}}, - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] = changes + store: [ + %Changes.DeletedRecord{old_record: %{"id" => "10"}}, + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] + ] = actions end test "uses lsn updates to splice at the current buffer tail" do @@ -214,14 +224,14 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} ) - {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) assert %Steady{views: views} = state view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = - changes + assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = + actions end test "splices buffered inserts, updates, and deletes around an lsn boundary" do @@ -262,19 +272,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do ]) ) - {changes, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) assert %Steady{views: views} = state view = views[["$sublink", "0"]] assert view == MapSet.new([1]) assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %Changes.NewRecord{record: %{"id" => "11"}}, - %Changes.UpdatedRecord{record: %{"id" => "21"}}, - %Changes.DeletedRecord{old_record: %{"id" => "31"}, last?: true} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}}, + %Changes.UpdatedRecord{record: %{"id" => "21"}}, + %Changes.DeletedRecord{old_record: %{"id" => "31"}, last?: true} + ] + ] = actions end test "keeps the transaction splice boundary when a later lsn update arrives" do @@ -293,7 +305,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {[], state} = Subqueries.handle_event(state, txn(160, [child_insert("12", "1")])) {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {changes, state} = + {actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} @@ -304,11 +316,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %Changes.NewRecord{record: %{"id" => "11"}}, - %Changes.NewRecord{record: %{"id" => "12"}, last?: true} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "11"}}, + %Changes.NewRecord{record: %{"id" => "12"}, last?: true} + ] + ] = actions end test "keeps the lsn splice boundary when the snapshot later reveals invisible txns" do @@ -331,18 +345,20 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) - {changes, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) assert %Steady{views: views} = state view = views[["$sublink", "0"]] assert view == MapSet.new([1]) assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %Changes.NewRecord{record: %{"id" => "10"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %Changes.NewRecord{record: %{"id" => "10"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ] + ] = actions end test "waits for an lsn update even when the move-in query completes with an empty buffer" do @@ -365,14 +381,14 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert %Buffering{} = state - {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) assert %Steady{views: views} = state view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = - changes + assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = + actions end test "uses an lsn update that arrived before the move-in query completed" do @@ -388,7 +404,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {changes, state} = + {actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} @@ -398,8 +414,8 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = - changes + assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = + actions end test "uses an lsn update that was already seen before the move-in started" do @@ -416,7 +432,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {changes, state} = + {actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} @@ -426,8 +442,8 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = - changes + assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = + actions end test "defers queued move outs until after splice and starts the next move in" do @@ -454,7 +470,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} ) - {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) assert %Buffering{ move_in_values: [{2, "2"}], @@ -466,10 +482,12 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert views_after[["$sublink", "0"]] == MapSet.new([2]) assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + ] + ] = actions end test "applies a queued move out for the active move-in value after splice" do @@ -496,17 +514,19 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} ) - {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) assert %Steady{views: views} = state view = views[["$sublink", "0"]] assert view == MapSet.new() assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + ] + ] = actions end test "batches consecutive move ins into a single active move in" do @@ -559,14 +579,14 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} ) - {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) assert %Steady{views: views} = state view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}] = - changes + assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = + actions end test "merges queued move outs into a single control message after splice" do @@ -599,17 +619,19 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} ) - {changes, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) + {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) assert %Steady{views: views} = state view = views[["$sublink", "0"]] assert view == MapSet.new() assert [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{event: "move-out", patterns: patterns}} - ] = changes + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{event: "move-out", patterns: patterns}} + ] + ] = actions assert length(patterns) == 2 end From f2f07eb5aca4726d8ece67d458ce4dd8fd4cb0f8 Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 20 Mar 2026 08:32:42 +0000 Subject: [PATCH 29/63] Format where_clause_generator.ex --- .../test/support/oracle_harness/where_clause_generator.ex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex index 8a60563a57..54527bfb81 100644 --- a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex +++ b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex @@ -326,8 +326,7 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # (expr OR expr) AND (expr OR expr) — each expr is a subquery or atomic defp or_branches_and_composition do bind( - {subquery_or_atomic(), subquery_or_atomic(), - subquery_or_atomic(), subquery_or_atomic()}, + {subquery_or_atomic(), subquery_or_atomic(), subquery_or_atomic(), subquery_or_atomic()}, fn {{s1, _}, {s2, _}, {s3, _}, {s4, _}} -> constant({"(#{s1} OR #{s2}) AND (#{s3} OR #{s4})", true}) end @@ -365,5 +364,4 @@ defmodule Support.OracleHarness.WhereClauseGenerator do {1, or_composition(depth - 1)} ]) end - end From bb8622126165964d716a8159da3d33fe1ca6f53f Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 20 Mar 2026 08:34:09 +0000 Subject: [PATCH 30/63] Add start_move_in_query as action --- .../lib/electric/shapes/consumer.ex | 29 ++-- .../electric/shapes/consumer/subqueries.ex | 10 +- .../shapes/consumer/subqueries_test.exs | 138 +++++++++--------- 3 files changed, 87 insertions(+), 90 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 8e0ec9505a..85c6ffcef7 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -1014,19 +1014,22 @@ defmodule Electric.Shapes.Consumer do {actions, subquery_state} = Subqueries.handle_event(state.subquery_state, event) state = %{state | subquery_state: subquery_state} - state = maybe_start_move_in_query(state, previous_subquery_state) {state, notification, num_changes, total_size} = Enum.reduce(actions, {state, nil, 0, 0}, fn - {:store, outputs}, {state, _notification, num_changes, total_size} -> + {:store, outputs}, {state, notification, num_changes, total_size} -> case append_subquery_outputs(state, outputs, opts) do {state, nil, 0, 0} -> - {state, nil, num_changes, total_size} + {state, notification, num_changes, total_size} {state, range, new_changes, new_size} -> {state, {range, state.latest_offset}, num_changes + new_changes, total_size + new_size} end + + :start_move_in_query, {state, notification, num_changes, total_size} -> + start_move_in_query(state) + {state, notification, num_changes, total_size} end) latest_offset = @@ -1039,25 +1042,11 @@ defmodule Electric.Shapes.Consumer do {state, notification, num_changes, total_size} end - defp maybe_start_move_in_query( - %{subquery_state: %Buffering{} = buffering_state} = state, - _previous_subquery_state - ) do - if should_start_move_in_query?(buffering_state) do - Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) - |> Subqueries.query_move_in_async(state, buffering_state, self()) - - %{state | subquery_state: %{buffering_state | query_started?: true}} - else - state - end + defp start_move_in_query(%{subquery_state: %Buffering{} = buffering_state} = state) do + Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) + |> Subqueries.query_move_in_async(state, buffering_state, self()) end - defp maybe_start_move_in_query(state, _previous_subquery_state), do: state - - defp should_start_move_in_query?(%Buffering{query_started?: false}), do: true - defp should_start_move_in_query?(_), do: false - defp append_subquery_outputs(state, outputs, opts) do outputs = insert_move_in_snapshot_end(outputs) previous_offset = state.latest_offset diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 665fc94acb..0510237bd1 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -24,7 +24,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} @type output() :: Changes.change() | move_out_control() | QueryRow.t() - @type action() :: {:store, [output()]} + @type action() :: {:store, [output()]} | :start_move_in_query def value_prefix, do: @value_prefix def null_sentinel, do: @null_sentinel @@ -51,9 +51,15 @@ defmodule Electric.Shapes.Consumer.Subqueries do {outputs, state} = StateMachine.handle_event(state, event) actions = if outputs == [], do: [], else: [{:store, outputs}] - {actions, state} + maybe_start_move_in_query(actions, state) end + defp maybe_start_move_in_query(actions, %Buffering{query_started?: false} = state) do + {actions ++ [:start_move_in_query], %{state | query_started?: true}} + end + + defp maybe_start_move_in_query(actions, state), do: {actions, state} + @spec normalize_global_lsn(Electric.Postgres.Lsn.t() | non_neg_integer()) :: Electric.Postgres.Lsn.t() def normalize_global_lsn(%Lsn{} = lsn), do: lsn diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs index 1e238add3b..fa4ebced6b 100644 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -62,7 +62,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_negated_state(subquery_view: MapSet.new([1])) dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} @@ -76,9 +76,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert before_view == MapSet.new([1]) assert after_view == MapSet.new() - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -101,7 +101,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} @@ -109,9 +109,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert %Buffering{} = state - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) query_row = child_insert("99", "1") @@ -135,15 +135,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state(subquery_view: MapSet.new([1])) dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} ) - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "2")])) + {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "2")])) {actions, state} = Subqueries.handle_event( @@ -169,7 +169,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state(subquery_view: MapSet.new([1])) dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} @@ -177,13 +177,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do # Before the splice we still evaluate against the old view {1}, so moving # from parent 1 to parent 2 means the row leaves the shape and becomes a delete. - {[], state} = Subqueries.handle_event(state, txn(50, [child_update("10", "1", "2")])) + {_actions, state} = Subqueries.handle_event(state, txn(50, [child_update("10", "1", "2")])) # After the splice we evaluate against the new view {1, 2}, so moving from # parent 3 to parent 2 means the row enters the shape and becomes a new record. - {[], state} = Subqueries.handle_event(state, txn(150, [child_update("11", "3", "2")])) + {_actions, state} = Subqueries.handle_event(state, txn(150, [child_update("11", "3", "2")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) {actions, state} = Subqueries.handle_event( @@ -209,16 +209,16 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = Subqueries.handle_event(state, txn(120, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {_actions, state} = Subqueries.handle_event(state, txn(120, [child_insert("10", "1")])) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} @@ -238,13 +238,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, txn(120, [ @@ -254,15 +254,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do ]) ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} ) - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, txn(150, [ @@ -293,17 +293,17 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) - {[], state} = Subqueries.handle_event(state, txn(160, [child_insert("12", "1")])) - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + {_actions, state} = Subqueries.handle_event(state, txn(160, [child_insert("12", "1")])) + {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) {actions, state} = Subqueries.handle_event( @@ -329,21 +329,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} ) - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) {actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) @@ -365,15 +365,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} @@ -395,14 +395,14 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) {actions, state} = Subqueries.handle_event( @@ -422,15 +422,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) {actions, state} = Subqueries.handle_event( @@ -450,21 +450,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} ) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -482,11 +482,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert views_after[["$sublink", "0"]] == MapSet.new([2]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} - ] + {:store, + [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + ]}, + :start_move_in_query ] = actions end @@ -494,21 +496,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} ) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -533,7 +535,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}, {2, "2"}], move_out: []}} @@ -553,27 +555,27 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} ) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -593,27 +595,27 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state(subquery_view: MapSet.new([2])) dep_handle = dep_handle(state) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} ) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} ) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {[], state} = + {_actions, state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} From 4fb1feb7384e2009e606dc4a93ec211d079fed9c Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 20 Mar 2026 08:44:00 +0000 Subject: [PATCH 31/63] Update tests --- .../shapes/consumer/subqueries_test.exs | 126 +++++++++--------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs index fa4ebced6b..ee42a712c5 100644 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -62,7 +62,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_negated_state(subquery_view: MapSet.new([1])) dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} @@ -76,9 +76,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert before_view == MapSet.new([1]) assert after_view == MapSet.new() - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -101,7 +101,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} @@ -109,9 +109,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert %Buffering{} = state - {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) query_row = child_insert("99", "1") @@ -135,15 +135,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state(subquery_view: MapSet.new([1])) dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} ) - {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "2")])) + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "2")])) {actions, state} = Subqueries.handle_event( @@ -169,7 +169,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state(subquery_view: MapSet.new([1])) dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} @@ -177,13 +177,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do # Before the splice we still evaluate against the old view {1}, so moving # from parent 1 to parent 2 means the row leaves the shape and becomes a delete. - {_actions, state} = Subqueries.handle_event(state, txn(50, [child_update("10", "1", "2")])) + {[], state} = Subqueries.handle_event(state, txn(50, [child_update("10", "1", "2")])) # After the splice we evaluate against the new view {1, 2}, so moving from # parent 3 to parent 2 means the row enters the shape and becomes a new record. - {_actions, state} = Subqueries.handle_event(state, txn(150, [child_update("11", "3", "2")])) + {[], state} = Subqueries.handle_event(state, txn(150, [child_update("11", "3", "2")])) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) {actions, state} = Subqueries.handle_event( @@ -209,16 +209,16 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = Subqueries.handle_event(state, txn(120, [child_insert("10", "1")])) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {[], state} = Subqueries.handle_event(state, txn(120, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} @@ -238,13 +238,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, txn(120, [ @@ -254,15 +254,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do ]) ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} ) - {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, txn(150, [ @@ -293,17 +293,17 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) - {_actions, state} = Subqueries.handle_event(state, txn(160, [child_insert("12", "1")])) - {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + {[], state} = Subqueries.handle_event(state, txn(160, [child_insert("12", "1")])) + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) {actions, state} = Subqueries.handle_event( @@ -329,21 +329,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} ) - {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {_actions, state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {_actions, state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) + {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) {actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) @@ -365,15 +365,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} @@ -395,14 +395,14 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) {actions, state} = Subqueries.handle_event( @@ -422,15 +422,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) + {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) {actions, state} = Subqueries.handle_event( @@ -450,21 +450,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} ) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -496,21 +496,21 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} ) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -535,7 +535,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}, {2, "2"}], move_out: []}} @@ -555,27 +555,27 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state() dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} ) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} @@ -595,27 +595,27 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do state = new_state(subquery_view: MapSet.new([2])) dep_handle = dep_handle(state) - {_actions, state} = + {[:start_move_in_query], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} ) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} ) - {_actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) + {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - {_actions, state} = + {[], state} = Subqueries.handle_event( state, {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} From 9ad50558f421eaa78fee3a33485e8095717fdb6e Mon Sep 17 00:00:00 2001 From: rob Date: Sun, 22 Mar 2026 19:46:55 +0000 Subject: [PATCH 32/63] Hide Buffering from consumer --- .../lib/electric/shapes/consumer.ex | 162 +++++------------- .../electric/shapes/consumer/subqueries.ex | 38 +++- .../shapes/consumer/subqueries/buffering.ex | 5 + .../shapes/consumer/subqueries_test.exs | 78 +++++++-- 4 files changed, 145 insertions(+), 138 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 85c6ffcef7..f352df4a6d 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -5,7 +5,6 @@ defmodule Electric.Shapes.Consumer do alias Electric.Shapes.Consumer.PendingTxn alias Electric.Shapes.Consumer.State alias Electric.Shapes.Consumer.Subqueries - alias Electric.Shapes.Consumer.Subqueries.Buffering alias Electric.Shapes.Consumer.Subqueries.QueryRow alias Electric.Shapes.DnfPlan @@ -930,46 +929,36 @@ defmodule Electric.Shapes.Consumer do defp handle_txn_with_subqueries(%Transaction{} = txn, state) do timestamp = System.monotonic_time() - was_buffering = match?(%Buffering{}, state.subquery_state) {state, notification, num_changes, total_size} = apply_subquery_event(state, txn, default_xid: txn.xid) - cond do - notification -> - :ok = notify_new_changes(state, notification) - - OpenTelemetry.add_span_attributes(%{ - num_bytes: total_size, - actual_num_changes: num_changes - }) - - lag = calculate_replication_lag(txn.commit_timestamp) - OpenTelemetry.add_span_attributes(replication_lag: lag) - - Electric.Telemetry.OpenTelemetry.execute( - [:electric, :storage, :transaction_stored], - %{ - duration: System.monotonic_time() - timestamp, - bytes: total_size, - count: 1, - operations: num_changes, - replication_lag: lag - }, - Map.new(State.telemetry_attrs(state)) - ) + if notification do + :ok = notify_new_changes(state, notification) - state + OpenTelemetry.add_span_attributes(%{ + num_bytes: total_size, + actual_num_changes: num_changes + }) - was_buffering or match?(%Buffering{}, state.subquery_state) -> - state + lag = calculate_replication_lag(txn.commit_timestamp) + OpenTelemetry.add_span_attributes(replication_lag: lag) - true -> - Logger.debug(fn -> - "No relevant changes found for #{inspect(state.shape)} in txn #{txn.xid}" - end) + Electric.Telemetry.OpenTelemetry.execute( + [:electric, :storage, :transaction_stored], + %{ + duration: System.monotonic_time() - timestamp, + bytes: total_size, + count: 1, + operations: num_changes, + replication_lag: lag + }, + Map.new(State.telemetry_attrs(state)) + ) - consider_flushed(state, txn.last_log_offset) + state + else + state end end @@ -1010,45 +999,50 @@ defmodule Electric.Shapes.Consumer do defp txn_conversion_extra_refs(_state), do: nil defp apply_subquery_event(state, event, opts \\ []) do - previous_subquery_state = state.subquery_state {actions, subquery_state} = Subqueries.handle_event(state.subquery_state, event) state = %{state | subquery_state: subquery_state} - {state, notification, num_changes, total_size} = - Enum.reduce(actions, {state, nil, 0, 0}, fn - {:store, outputs}, {state, notification, num_changes, total_size} -> + {state, notification, num_changes, total_size, _latest_written} = + Enum.reduce(actions, {state, nil, 0, 0, nil}, fn + {:store, outputs}, {state, notification, num_changes, total_size, _latest_written} -> case append_subquery_outputs(state, outputs, opts) do {state, nil, 0, 0} -> - {state, notification, num_changes, total_size} + {state, notification, num_changes, total_size, nil} {state, range, new_changes, new_size} -> {state, {range, state.latest_offset}, num_changes + new_changes, - total_size + new_size} + total_size + new_size, state.latest_offset} end - :start_move_in_query, {state, notification, num_changes, total_size} -> + :start_move_in_query, {state, notification, num_changes, total_size, latest_written} -> start_move_in_query(state) - {state, notification, num_changes, total_size} + {state, notification, num_changes, total_size, latest_written} + + {:flush, source_offset}, {state, notification, num_changes, total_size, latest_written} -> + state = + if latest_written do + %{ + state + | txn_offset_mapping: + state.txn_offset_mapping ++ [{latest_written, source_offset}] + } + else + consider_flushed(state, source_offset) + end + + {state, notification, num_changes, total_size, latest_written} end) - latest_offset = - case notification do - nil -> nil - {_range, offset} -> offset - end - - state = finalize_subquery_flush_tracking(state, event, previous_subquery_state, latest_offset) {state, notification, num_changes, total_size} end - defp start_move_in_query(%{subquery_state: %Buffering{} = buffering_state} = state) do - Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) - |> Subqueries.query_move_in_async(state, buffering_state, self()) + defp start_move_in_query(state) do + supervisor = Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) + Subqueries.query_move_in_async(supervisor, state, state.subquery_state, self()) end defp append_subquery_outputs(state, outputs, opts) do - outputs = insert_move_in_snapshot_end(outputs) previous_offset = state.latest_offset {state, latest_offset, total_size, num_changes} = @@ -1122,70 +1116,6 @@ defmodule Electric.Shapes.Consumer do count + 1} end - defp finalize_subquery_flush_tracking(state, event, previous_subquery_state, latest_offset) do - case {previous_subquery_state, state.subquery_state, event} do - {%Buffering{move_in_values: move_in_values}, %Buffering{move_in_values: move_in_values}, - _event} -> - state - - {%Buffering{} = buffering_state, _current_subquery_state, _event} -> - buffered_txns = buffered_txns_for_flush(buffering_state, event) - maybe_track_buffered_flush(state, buffered_txns, latest_offset) - - {_subquery_state, _current_subquery_state, %Transaction{last_log_offset: last_log_offset}} -> - maybe_track_txn_flush(state, last_log_offset, latest_offset) - - _ -> - state - end - end - - defp buffered_txns_for_flush(%Buffering{buffered_txns: buffered_txns}, %Transaction{} = txn), - do: buffered_txns ++ [txn] - - defp buffered_txns_for_flush(%Buffering{buffered_txns: buffered_txns}, _event), - do: buffered_txns - - defp maybe_track_buffered_flush(state, [], _latest_offset), do: state - - defp maybe_track_buffered_flush(state, buffered_txns, nil) do - buffered_txns - |> List.last() - |> then(&consider_flushed(state, &1.last_log_offset)) - end - - defp maybe_track_buffered_flush(state, buffered_txns, latest_offset) do - last_log_offset = buffered_txns |> List.last() |> Map.fetch!(:last_log_offset) - - %{ - state - | txn_offset_mapping: state.txn_offset_mapping ++ [{latest_offset, last_log_offset}] - } - end - - defp insert_move_in_snapshot_end(outputs) do - {before_query_rows, remaining} = Enum.split_while(outputs, &(not match?(%QueryRow{}, &1))) - - case Enum.split_while(remaining, &match?(%QueryRow{}, &1)) do - {[], _rest} -> - outputs - - {query_rows, rest} -> - before_query_rows ++ query_rows ++ [snapshot_end_control_message()] ++ rest - end - end - - defp snapshot_end_control_message do - %{headers: %{control: "snapshot-end"}} - end - - defp maybe_track_txn_flush(state, last_log_offset, nil), - do: consider_flushed(state, last_log_offset) - - defp maybe_track_txn_flush(state, last_log_offset, latest_offset) do - %{state | txn_offset_mapping: state.txn_offset_mapping ++ [{latest_offset, last_log_offset}]} - end - defp handle_txn_with_truncate(xid, state) do # TODO: This is a very naive way to handle truncations: if ANY relevant truncates are # present in the transaction, we're considering the whole transaction empty, and diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 0510237bd1..80a2d0fecd 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -24,7 +24,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} @type output() :: Changes.change() | move_out_control() | QueryRow.t() - @type action() :: {:store, [output()]} | :start_move_in_query + @type action() :: {:store, [output()]} | :start_move_in_query | {:flush, term()} def value_prefix, do: @value_prefix def null_sentinel, do: @null_sentinel @@ -48,12 +48,42 @@ defmodule Electric.Shapes.Consumer.Subqueries do @spec handle_event(StateMachine.t(), term()) :: {[action()], StateMachine.t()} def handle_event(state, event) do - {outputs, state} = StateMachine.handle_event(state, event) + {outputs, new_state} = StateMachine.handle_event(state, event) actions = if outputs == [], do: [], else: [{:store, outputs}] - maybe_start_move_in_query(actions, state) + actions = actions ++ flush_actions(state, new_state, event) + maybe_start_move_in_query(actions, new_state) end + # Same Buffering state (same move_in_values before and after) → no flush needed + defp flush_actions( + %Buffering{move_in_values: move_in_values}, + %Buffering{move_in_values: move_in_values}, + _event + ), + do: [] + + # Was Buffering, now different (splice happened or new move-in) → flush based on buffered txns + defp flush_actions(%Buffering{} = prev_state, _new_state, event) do + buffered_txns = + case event do + %Transaction{} = txn -> prev_state.buffered_txns ++ [txn] + _ -> prev_state.buffered_txns + end + + case buffered_txns do + [] -> [] + txns -> [{:flush, txns |> List.last() |> Map.fetch!(:last_log_offset)}] + end + end + + # Steady + Transaction → flush with txn offset + defp flush_actions(_prev_state, _new_state, %Transaction{last_log_offset: last_log_offset}), + do: [{:flush, last_log_offset}] + + # Everything else → no flush + defp flush_actions(_prev_state, _new_state, _event), do: [] + defp maybe_start_move_in_query(actions, %Buffering{query_started?: false} = state) do {actions ++ [:start_move_in_query], %{state | query_started?: true}} end @@ -65,7 +95,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do def normalize_global_lsn(%Lsn{} = lsn), do: lsn def normalize_global_lsn(lsn) when is_integer(lsn), do: Lsn.from_integer(lsn) - @spec query_move_in_async(pid() | atom(), map(), Buffering.t(), pid()) :: :ok + @spec query_move_in_async(pid() | atom(), map(), StateMachine.t(), pid()) :: :ok def query_move_in_async( supervisor, consumer_state, diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex index b30042c678..e8d46a2c23 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -171,6 +171,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do ) ++ [move_in_broadcast] ++ state.move_in_rows ++ + [snapshot_end_control_message()] ++ Enum.flat_map( post_txns, &Subqueries.convert_transaction(&1, state, state.views_after_move) @@ -202,6 +203,10 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do } end + defp snapshot_end_control_message do + %{headers: %{control: "snapshot-end"}} + end + defp apply_dependency_move(subquery_view, move_in_values, :move_in) do add_move_in_values(subquery_view, move_in_values) end diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs index ee42a712c5..4e12dead52 100644 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -30,7 +30,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do ) assert %Steady{} = state - assert [store: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}]] = actions + assert [store: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}], flush: nil] = actions end test "negated subquery turns dependency move-in into an outer move-out" do @@ -92,7 +92,8 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert [ store: [ %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}} + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}} ] ] = actions end @@ -126,8 +127,10 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do store: [ %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] + ], + flush: nil ] = actions end @@ -160,8 +163,10 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do %Changes.NewRecord{record: %{"id" => "10"}}, %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] + ], + flush: nil ] = actions end @@ -200,8 +205,10 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do %Changes.DeletedRecord{old_record: %{"id" => "10"}}, %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] + ], + flush: nil ] = actions end @@ -230,8 +237,14 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = - actions + assert [ + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}} + ], + flush: nil + ] = actions end test "splices buffered inserts, updates, and deletes around an lsn boundary" do @@ -282,10 +295,12 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do store: [ %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %Changes.NewRecord{record: %{"id" => "11"}}, %Changes.UpdatedRecord{record: %{"id" => "21"}}, %Changes.DeletedRecord{old_record: %{"id" => "31"}, last?: true} - ] + ], + flush: nil ] = actions end @@ -319,9 +334,11 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do store: [ %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %Changes.NewRecord{record: %{"id" => "11"}}, %Changes.NewRecord{record: %{"id" => "12"}, last?: true} - ] + ], + flush: nil ] = actions end @@ -355,9 +372,11 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do store: [ %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %Changes.NewRecord{record: %{"id" => "10"}}, %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ] + ], + flush: nil ] = actions end @@ -387,8 +406,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = - actions + assert [ + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}} + ] + ] = actions end test "uses an lsn update that arrived before the move-in query completed" do @@ -414,8 +438,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = - actions + assert [ + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}} + ] + ] = actions end test "uses an lsn update that was already seen before the move-in started" do @@ -442,8 +471,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = - actions + assert [ + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}} + ] + ] = actions end test "defers queued move outs until after splice and starts the next move in" do @@ -486,6 +520,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do [ %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} ]}, :start_move_in_query @@ -526,6 +561,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do store: [ %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} ] ] = actions @@ -587,8 +623,13 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do view = views[["$sublink", "0"]] assert view == MapSet.new([1]) - assert [store: [%{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}]] = - actions + assert [ + store: [ + %{headers: %{event: "move-in"}}, + %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}} + ] + ] = actions end test "merges queued move outs into a single control message after splice" do @@ -631,6 +672,7 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do store: [ %{headers: %{event: "move-in"}}, %Changes.NewRecord{record: %{"id" => "99"}}, + %{headers: %{control: "snapshot-end"}}, %{headers: %{event: "move-out", patterns: patterns}} ] ] = actions From 5d65b3b115fed418859c2f7d7434d94cdba606b9 Mon Sep 17 00:00:00 2001 From: rob Date: Sun, 22 Mar 2026 22:25:30 +0000 Subject: [PATCH 33/63] Remove duplication by adding NoSubqueries module --- .../lib/electric/shapes/consumer.ex | 205 ++++++------------ .../electric/shapes/consumer/subqueries.ex | 11 +- .../consumer/subqueries/no_subqueries.ex | 80 +++++++ .../shapes/consumer/subqueries_test.exs | 4 +- 4 files changed, 157 insertions(+), 143 deletions(-) create mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index f352df4a6d..1b029efa8c 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -5,6 +5,7 @@ defmodule Electric.Shapes.Consumer do alias Electric.Shapes.Consumer.PendingTxn alias Electric.Shapes.Consumer.State alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.NoSubqueries alias Electric.Shapes.Consumer.Subqueries.QueryRow alias Electric.Shapes.DnfPlan @@ -368,9 +369,7 @@ defmodule Electric.Shapes.Consumer do # Invalidate if subquery runtime was not initialized because the DNF plan # failed to compile. All supported subquery shapes, including negated ones, # should have an active subquery runtime. - should_invalidate? = is_nil(state.subquery_state) - - if should_invalidate? do + if match?(%NoSubqueries{}, state.subquery_state) do stop_and_clean(state) else {state, notification, _num_changes, _total_size} = @@ -533,9 +532,6 @@ defmodule Electric.Shapes.Consumer do |> mark_for_removal() end - defp handle_event({:global_last_seen_lsn, _lsn}, %{subquery_state: nil} = state), - do: state - defp handle_event({:global_last_seen_lsn, _lsn} = event, state) do {state, notification, _num_changes, _total_size} = apply_subquery_event(state, event) @@ -862,72 +858,7 @@ defmodule Electric.Shapes.Consumer do ) end - defp do_handle_txn(%Transaction{xid: xid, changes: changes} = txn, state) do - %{shape: shape, writer: writer} = state - - if is_nil(state.subquery_state) do - case convert_txn_changes(changes, xid, state) do - :includes_truncate -> - handle_txn_with_truncate(txn.xid, state) - - {[], 0} -> - Logger.debug(fn -> - "No relevant changes found for #{inspect(shape)} in txn #{txn.xid}" - end) - - consider_flushed(state, txn.last_log_offset) - - {[], 0, _} -> - Logger.debug(fn -> - "No relevant changes found for #{inspect(shape)} in txn #{txn.xid}" - end) - - consider_flushed(state, txn.last_log_offset) - - {changes, num_changes, last_log_offset} -> - timestamp = System.monotonic_time() - - {lines, total_size} = prepare_log_entries(changes, xid, shape) - writer = ShapeCache.Storage.append_to_log!(lines, writer) - - OpenTelemetry.add_span_attributes(%{ - num_bytes: total_size, - actual_num_changes: num_changes - }) - - updated_state = %{ - state - | writer: writer, - latest_offset: last_log_offset, - txn_offset_mapping: - state.txn_offset_mapping ++ [{last_log_offset, txn.last_log_offset}] - } - - :ok = notify_new_changes(updated_state, changes, last_log_offset) - - lag = calculate_replication_lag(txn.commit_timestamp) - OpenTelemetry.add_span_attributes(replication_lag: lag) - - Electric.Telemetry.OpenTelemetry.execute( - [:electric, :storage, :transaction_stored], - %{ - duration: System.monotonic_time() - timestamp, - bytes: total_size, - count: 1, - operations: num_changes, - replication_lag: lag - }, - Map.new(State.telemetry_attrs(updated_state)) - ) - - updated_state - end - else - handle_txn_with_subqueries(txn, state) - end - end - - defp handle_txn_with_subqueries(%Transaction{} = txn, state) do + defp do_handle_txn(%Transaction{} = txn, state) do timestamp = System.monotonic_time() {state, notification, num_changes, total_size} = @@ -962,42 +893,6 @@ defmodule Electric.Shapes.Consumer do end end - defp convert_txn_changes(changes, _xid, state) do - case convert_fragment_changes( - changes, - state.stack_id, - state.shape_handle, - state.shape, - txn_conversion_extra_refs(state) - ) do - :includes_truncate -> - :includes_truncate - - {[], 0} -> - {[], 0, nil} - - {reversed_changes, num_changes, last_log_offset} -> - converted_changes = - reversed_changes - |> maybe_mark_last_change(%{}) - |> Enum.reverse() - - {converted_changes, num_changes, last_log_offset} - - result -> - result - end - end - - defp txn_conversion_extra_refs(%State{shape: shape, stack_id: stack_id, subquery_state: nil}) do - if Shape.are_deps_filled(shape) do - refs = Materializer.get_all_as_refs(shape, stack_id) - {refs, refs} - end - end - - defp txn_conversion_extra_refs(_state), do: nil - defp apply_subquery_event(state, event, opts \\ []) do {actions, subquery_state} = Subqueries.handle_event(state.subquery_state, event) @@ -1019,6 +914,10 @@ defmodule Electric.Shapes.Consumer do start_move_in_query(state) {state, notification, num_changes, total_size, latest_written} + {:truncate, xid}, {state, _notification, _num_changes, _total_size, _latest_written} -> + state = handle_txn_with_truncate(xid, state) + {state, nil, 0, 0, nil} + {:flush, source_offset}, {state, notification, num_changes, total_size, latest_written} -> state = if latest_written do @@ -1045,75 +944,90 @@ defmodule Electric.Shapes.Consumer do defp append_subquery_outputs(state, outputs, opts) do previous_offset = state.latest_offset - {state, latest_offset, total_size, num_changes} = - Enum.reduce(outputs, {state, previous_offset, 0, 0}, fn output, - {state, current_offset, size, count} -> + # Accumulate log items into batches, flushing when we hit a control message. + # This avoids per-change writes while still handling control messages that + # need their own storage API call. + {state, total_size, num_changes, pending_lines} = + Enum.reduce(outputs, {state, 0, 0, []}, fn output, {state, size, count, pending} -> case output do %QueryRow{key: key, json: json} -> json = IO.iodata_to_binary(json) - offset = LogOffset.increment(current_offset) - - writer = - ShapeCache.Storage.append_to_log!([{offset, key, :insert, json}], state.writer) + offset = LogOffset.increment(state.latest_offset) + line = {offset, key, :insert, json} - {%{state | writer: writer, latest_offset: offset}, offset, size + byte_size(json), - count + 1} + {%{state | latest_offset: offset}, size + byte_size(json), count + 1, + pending ++ [line]} %{headers: %{control: _}} = control_message -> - append_control_message_output(state, size, count, control_message) + state = flush_pending_lines(state, pending) + + {state, size, count} = + append_control_message_output(state, size, count, control_message) + + {state, size, count, []} %{headers: %{event: _}} = control_message -> - append_control_message_output(state, size, count, control_message) + state = flush_pending_lines(state, pending) + + {state, size, count} = + append_control_message_output(state, size, count, control_message) + + {state, size, count, []} %Changes.TruncatedRelation{} -> raise ArgumentError, "unexpected truncate emitted from subquery state machine" - %Changes.NewRecord{} = change -> - append_change_output(state, size, count, change, opts[:default_xid]) + change + when is_struct(change, Changes.NewRecord) or + is_struct(change, Changes.UpdatedRecord) or + is_struct(change, Changes.DeletedRecord) -> + {lines, line_size} = change_to_log_lines(change, opts[:default_xid], state.shape) + last_offset = lines |> List.last() |> elem(0) - %Changes.UpdatedRecord{} = change -> - append_change_output(state, size, count, change, opts[:default_xid]) - - %Changes.DeletedRecord{} = change -> - append_change_output(state, size, count, change, opts[:default_xid]) + {%{state | latest_offset: last_offset}, size + line_size, count + length(lines), + pending ++ lines} end end) + state = flush_pending_lines(state, pending_lines) + if num_changes == 0 do {state, nil, 0, 0} else - {state, {previous_offset, latest_offset}, num_changes, total_size} + {state, {previous_offset, state.latest_offset}, num_changes, total_size} end end + defp flush_pending_lines(state, []), do: state + + defp flush_pending_lines(state, lines) do + writer = ShapeCache.Storage.append_to_log!(lines, state.writer) + %{state | writer: writer} + end + defp append_control_message_output(state, size, count, control_message) do encoded = Jason.encode!(control_message) {{_, offset}, writer} = ShapeCache.Storage.append_control_message!(encoded, state.writer) - {%{state | writer: writer, latest_offset: offset}, offset, size + byte_size(encoded), - count + 1} + {%{state | writer: writer, latest_offset: offset}, size + byte_size(encoded), count + 1} end - defp append_change_output(state, size, count, change, xid) do + defp change_to_log_lines(change, xid, shape) do lines = change |> LogItems.from_change( xid, - Shape.pk(state.shape, change.relation), - state.shape.replica + Shape.pk(shape, change.relation), + shape.replica ) |> Enum.map(fn {offset, %{key: key} = log_item} -> {offset, key, log_item.headers.operation, Jason.encode!(log_item)} end) - writer = ShapeCache.Storage.append_to_log!(lines, state.writer) - last_offset = lines |> List.last() |> elem(0) - size_increase = Enum.reduce(lines, 0, fn {_, _, _, json}, acc -> acc + byte_size(json) end) - - {%{state | writer: writer, latest_offset: last_offset}, last_offset, size + size_increase, - count + 1} + size = Enum.reduce(lines, 0, fn {_, _, _, json}, acc -> acc + byte_size(json) end) + {lines, size} end defp handle_txn_with_truncate(xid, state) do @@ -1314,11 +1228,22 @@ defmodule Electric.Shapes.Consumer do _other -> # :no_subqueries or {:error, _} - no subquery runtime needed - state + initialize_no_subqueries(state) end end - defp initialize_subquery_runtime(state), do: state + defp initialize_subquery_runtime(state), do: initialize_no_subqueries(state) + + defp initialize_no_subqueries(state) do + %{ + state + | subquery_state: %NoSubqueries{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle + } + } + end defp all_materializers_alive?(state) do Enum.all?(state.shape.shape_dependencies_handles, fn shape_handle -> diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 80a2d0fecd..d1b19b5bf4 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -24,7 +24,8 @@ defmodule Electric.Shapes.Consumer.Subqueries do @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} @type output() :: Changes.change() | move_out_control() | QueryRow.t() - @type action() :: {:store, [output()]} | :start_move_in_query | {:flush, term()} + @type action() :: + {:store, [output()]} | :start_move_in_query | {:flush, term()} | {:truncate, term()} def value_prefix, do: @value_prefix def null_sentinel, do: @null_sentinel @@ -50,7 +51,13 @@ defmodule Electric.Shapes.Consumer.Subqueries do def handle_event(state, event) do {outputs, new_state} = StateMachine.handle_event(state, event) - actions = if outputs == [], do: [], else: [{:store, outputs}] + actions = + case outputs do + [] -> [] + [{:truncate, _xid} = truncate] -> [truncate] + _ -> [{:store, outputs}] + end + actions = actions ++ flush_actions(state, new_state, event) maybe_start_move_in_query(actions, new_state) end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex new file mode 100644 index 0000000000..c11abadf9c --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex @@ -0,0 +1,80 @@ +defmodule Electric.Shapes.Consumer.Subqueries.NoSubqueries do + @moduledoc false + + @enforce_keys [:shape, :stack_id, :shape_handle] + defstruct [:shape, :stack_id, :shape_handle] + + @type t() :: %__MODULE__{ + shape: Electric.Shapes.Shape.t(), + stack_id: String.t(), + shape_handle: String.t() + } +end + +defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, + for: Electric.Shapes.Consumer.Subqueries.NoSubqueries do + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Materializer + alias Electric.Shapes.Shape + + require Shape + + def handle_event(state, %Transaction{xid: xid, changes: changes}) do + extra_refs = + if Shape.are_deps_filled(state.shape) do + refs = Materializer.get_all_as_refs(state.shape, state.stack_id) + {refs, refs} + end + + result = + Enum.reduce_while(changes, [], fn change, acc -> + case change do + %Changes.TruncatedRelation{} -> + {:halt, {:truncate, xid}} + + _ -> + converted = + Shape.convert_change(state.shape, change, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + extra_refs: extra_refs + ) + + {:cont, acc ++ converted} + end + end) + + case result do + {:truncate, xid} -> + {[{:truncate, xid}], state} + + [] -> + {[], state} + + changes -> + {mark_last_change(changes), state} + end + end + + def handle_event(state, {:global_last_seen_lsn, _lsn}), do: {[], state} + + def handle_event(_state, {:pg_snapshot_known, _snapshot}) do + raise ArgumentError, "received {:pg_snapshot_known, snapshot} in NoSubqueries state" + end + + def handle_event(_state, {:query_move_in_complete, _rows, _move_in_lsn}) do + raise ArgumentError, "received {:query_move_in_complete, ...} in NoSubqueries state" + end + + def handle_event(_state, {:materializer_changes, _dep_handle, _payload}) do + raise ArgumentError, "received {:materializer_changes, ...} in NoSubqueries state" + end + + defp mark_last_change([]), do: [] + + defp mark_last_change(changes) do + {last, rest} = List.pop_at(changes, -1) + rest ++ [%{last | last?: true}] + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs index 4e12dead52..56d4c68fff 100644 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -30,7 +30,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do ) assert %Steady{} = state - assert [store: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}], flush: nil] = actions + + assert [store: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}], flush: nil] = + actions end test "negated subquery turns dependency move-in into an outer move-out" do From b5fe92990c784976020b816b3adb0d95a59bb958 Mon Sep 17 00:00:00 2001 From: rob Date: Sun, 22 Mar 2026 23:00:59 +0000 Subject: [PATCH 34/63] Don't acumulate batches --- .../lib/electric/shapes/consumer.ex | 95 ++++----- .../electric/shapes/consumer/subqueries.ex | 15 +- .../shapes/consumer/subqueries/buffering.ex | 32 +-- .../consumer/subqueries/no_subqueries.ex | 2 +- .../shapes/consumer/subqueries/steady.ex | 20 +- .../shapes/consumer/subqueries_test.exs | 186 ++++++++---------- 6 files changed, 151 insertions(+), 199 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 1b029efa8c..2c93eae169 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -897,28 +897,32 @@ defmodule Electric.Shapes.Consumer do {actions, subquery_state} = Subqueries.handle_event(state.subquery_state, event) state = %{state | subquery_state: subquery_state} + previous_offset = state.latest_offset - {state, notification, num_changes, total_size, _latest_written} = - Enum.reduce(actions, {state, nil, 0, 0, nil}, fn - {:store, outputs}, {state, notification, num_changes, total_size, _latest_written} -> - case append_subquery_outputs(state, outputs, opts) do + {state, num_changes, total_size, latest_written} = + Enum.reduce(actions, {state, 0, 0, nil}, fn + {:store, items}, {state, num_changes, total_size, _latest_written} -> + case write_items_to_log(state, items, opts) do {state, nil, 0, 0} -> - {state, notification, num_changes, total_size, nil} + {state, num_changes, total_size, nil} - {state, range, new_changes, new_size} -> - {state, {range, state.latest_offset}, num_changes + new_changes, - total_size + new_size, state.latest_offset} + {state, _range, new_changes, new_size} -> + {state, num_changes + new_changes, total_size + new_size, state.latest_offset} end - :start_move_in_query, {state, notification, num_changes, total_size, latest_written} -> + {:control, message}, {state, num_changes, total_size, latest_written} -> + {state, size, count} = append_control_message_output(state, 0, 0, message) + {state, num_changes + count, total_size + size, latest_written} + + :start_move_in_query, {state, num_changes, total_size, latest_written} -> start_move_in_query(state) - {state, notification, num_changes, total_size, latest_written} + {state, num_changes, total_size, latest_written} - {:truncate, xid}, {state, _notification, _num_changes, _total_size, _latest_written} -> + {:truncate, xid}, {state, _num_changes, _total_size, _latest_written} -> state = handle_txn_with_truncate(xid, state) - {state, nil, 0, 0, nil} + {state, 0, 0, nil} - {:flush, source_offset}, {state, notification, num_changes, total_size, latest_written} -> + {:flush, source_offset}, {state, num_changes, total_size, latest_written} -> state = if latest_written do %{ @@ -930,9 +934,14 @@ defmodule Electric.Shapes.Consumer do consider_flushed(state, source_offset) end - {state, notification, num_changes, total_size, latest_written} + {state, num_changes, total_size, latest_written} end) + notification = + if state.latest_offset != previous_offset do + {{previous_offset, state.latest_offset}, state.latest_offset} + end + {state, notification, num_changes, total_size} end @@ -941,68 +950,34 @@ defmodule Electric.Shapes.Consumer do Subqueries.query_move_in_async(supervisor, state, state.subquery_state, self()) end - defp append_subquery_outputs(state, outputs, opts) do + defp write_items_to_log(state, [], _opts), do: {state, nil, 0, 0} + + defp write_items_to_log(state, items, opts) do previous_offset = state.latest_offset - # Accumulate log items into batches, flushing when we hit a control message. - # This avoids per-change writes while still handling control messages that - # need their own storage API call. - {state, total_size, num_changes, pending_lines} = - Enum.reduce(outputs, {state, 0, 0, []}, fn output, {state, size, count, pending} -> - case output do + {lines, total_size, state} = + Enum.reduce(items, {[], 0, state}, fn item, {lines, size, state} -> + case item do %QueryRow{key: key, json: json} -> json = IO.iodata_to_binary(json) offset = LogOffset.increment(state.latest_offset) line = {offset, key, :insert, json} - - {%{state | latest_offset: offset}, size + byte_size(json), count + 1, - pending ++ [line]} - - %{headers: %{control: _}} = control_message -> - state = flush_pending_lines(state, pending) - - {state, size, count} = - append_control_message_output(state, size, count, control_message) - - {state, size, count, []} - - %{headers: %{event: _}} = control_message -> - state = flush_pending_lines(state, pending) - - {state, size, count} = - append_control_message_output(state, size, count, control_message) - - {state, size, count, []} - - %Changes.TruncatedRelation{} -> - raise ArgumentError, "unexpected truncate emitted from subquery state machine" + {lines ++ [line], size + byte_size(json), %{state | latest_offset: offset}} change when is_struct(change, Changes.NewRecord) or is_struct(change, Changes.UpdatedRecord) or is_struct(change, Changes.DeletedRecord) -> - {lines, line_size} = change_to_log_lines(change, opts[:default_xid], state.shape) - last_offset = lines |> List.last() |> elem(0) + {new_lines, line_size} = change_to_log_lines(change, opts[:default_xid], state.shape) + last_offset = new_lines |> List.last() |> elem(0) - {%{state | latest_offset: last_offset}, size + line_size, count + length(lines), - pending ++ lines} + {lines ++ new_lines, size + line_size, %{state | latest_offset: last_offset}} end end) - state = flush_pending_lines(state, pending_lines) - - if num_changes == 0 do - {state, nil, 0, 0} - else - {state, {previous_offset, state.latest_offset}, num_changes, total_size} - end - end - - defp flush_pending_lines(state, []), do: state - - defp flush_pending_lines(state, lines) do writer = ShapeCache.Storage.append_to_log!(lines, state.writer) - %{state | writer: writer} + state = %{state | writer: writer} + {state, {previous_offset, state.latest_offset}, length(lines), total_size} end defp append_control_message_output(state, size, count, control_message) do diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index d1b19b5bf4..baa91c7c3a 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -23,9 +23,10 @@ defmodule Electric.Shapes.Consumer.Subqueries do @type move_value() :: {term(), term()} @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} - @type output() :: Changes.change() | move_out_control() | QueryRow.t() + @type sm_action() :: + {:store, [Changes.change() | QueryRow.t()]} | {:control, map()} | {:truncate, term()} @type action() :: - {:store, [output()]} | :start_move_in_query | {:flush, term()} | {:truncate, term()} + sm_action() | :start_move_in_query | {:flush, term()} def value_prefix, do: @value_prefix def null_sentinel, do: @null_sentinel @@ -49,15 +50,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do @spec handle_event(StateMachine.t(), term()) :: {[action()], StateMachine.t()} def handle_event(state, event) do - {outputs, new_state} = StateMachine.handle_event(state, event) - - actions = - case outputs do - [] -> [] - [{:truncate, _xid} = truncate] -> [truncate] - _ -> [{:store, outputs}] - end - + {actions, new_state} = StateMachine.handle_event(state, event) actions = actions ++ flush_actions(state, new_state, event) maybe_start_move_in_query(actions, new_state) end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex index e8d46a2c23..0eaa5b92b2 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -149,7 +149,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) end - @spec maybe_splice(t()) :: {[Subqueries.output()], Subqueries.StateMachine.t()} + @spec maybe_splice(t()) :: {[Subqueries.sm_action()], Subqueries.StateMachine.t()} def maybe_splice(%__MODULE__{} = state) do if ready_to_splice?(state) do {pre_txns, post_txns} = @@ -164,27 +164,31 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do state.shape_handle ) - outputs = - Enum.flat_map( - pre_txns, - &Subqueries.convert_transaction(&1, state, state.views_before_move) - ) ++ - [move_in_broadcast] ++ - state.move_in_rows ++ - [snapshot_end_control_message()] ++ - Enum.flat_map( - post_txns, - &Subqueries.convert_transaction(&1, state, state.views_after_move) - ) + actions = + store_action(pre_txns, state, state.views_before_move) ++ + [{:control, move_in_broadcast}] ++ + store_rows_action(state.move_in_rows) ++ + [{:control, snapshot_end_control_message()}] ++ + store_action(post_txns, state, state.views_after_move) state |> to_steady_state() - |> Steady.drain_queue(outputs) + |> Steady.drain_queue(actions) else {[], state} end end + defp store_action(txns, state, views) do + case Enum.flat_map(txns, &Subqueries.convert_transaction(&1, state, views)) do + [] -> [] + changes -> [{:store, changes}] + end + end + + defp store_rows_action([]), do: [] + defp store_rows_action(rows), do: [{:store, rows}] + defp ready_to_splice?(%__MODULE__{} = state) do not is_nil(state.snapshot) and not is_nil(state.move_in_rows) and not is_nil(state.boundary_txn_count) diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex index c11abadf9c..65d11ee0a1 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex @@ -53,7 +53,7 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, {[], state} changes -> - {mark_last_change(changes), state} + {[{:store, mark_last_change(changes)}], state} end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex index cd1962fb40..61d8694f01 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex @@ -30,11 +30,12 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do queue: MoveQueue.t() } - @spec drain_queue(t(), [Subqueries.output()]) :: {[Subqueries.output()], StateMachine.t()} - def drain_queue(%__MODULE__{} = state, outputs \\ []) do + @spec drain_queue(t(), [Subqueries.sm_action()]) :: + {[Subqueries.sm_action()], StateMachine.t()} + def drain_queue(%__MODULE__{} = state, actions \\ []) do case MoveQueue.pop_next(state.queue) do nil -> - {outputs, state} + {actions, state} {{:move_out, dep_index, move_out_values}, queue} -> subquery_ref = dep_ref_for_index(state, dep_index) @@ -58,10 +59,10 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do state.shape_handle ) - drain_queue(next_state, outputs ++ [broadcast]) + drain_queue(next_state, actions ++ [{:control, broadcast}]) :move_in -> - {outputs, + {actions, Buffering.from_steady( state, dep_index, @@ -78,7 +79,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do case effect do :move_in -> - {outputs, + {actions, Buffering.from_steady( state, dep_index, @@ -105,7 +106,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do state.shape_handle ) - drain_queue(next_state, outputs ++ [broadcast]) + drain_queue(next_state, actions ++ [{:control, broadcast}]) end end end @@ -138,7 +139,10 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, alias Electric.Shapes.Consumer.Subqueries.Steady def handle_event(state, %Transaction{} = txn) do - {Subqueries.convert_transaction(txn, state, state.views), state} + case Subqueries.convert_transaction(txn, state, state.views) do + [] -> {[], state} + changes -> {[{:store, changes}], state} + end end def handle_event(state, {:global_last_seen_lsn, lsn}) do diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs index 56d4c68fff..18ff7a0e2e 100644 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -49,14 +49,12 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{ - headers: %{ - event: "move-out", - patterns: [%{pos: 0, value: _value}] - } + control: %{ + headers: %{ + event: "move-out", + patterns: [%{pos: 0, value: _value}] } - ] + } ] = actions end @@ -92,11 +90,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new() assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}} - ] + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}} ] = actions end @@ -126,13 +122,11 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ], - flush: nil + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}]}, + {:flush, nil} ] = actions end @@ -161,14 +155,12 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1, 2]) assert [ - store: [ - %Changes.NewRecord{record: %{"id" => "10"}}, - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ], - flush: nil + {:store, [%Changes.NewRecord{record: %{"id" => "10"}}]}, + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}]}, + {:flush, nil} ] = actions end @@ -203,14 +195,12 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1, 2]) assert [ - store: [ - %Changes.DeletedRecord{old_record: %{"id" => "10"}}, - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ], - flush: nil + {:store, [%Changes.DeletedRecord{old_record: %{"id" => "10"}}]}, + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}]}, + {:flush, nil} ] = actions end @@ -240,12 +230,10 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}} - ], - flush: nil + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:flush, nil} ] = actions end @@ -294,15 +282,16 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %Changes.NewRecord{record: %{"id" => "11"}}, - %Changes.UpdatedRecord{record: %{"id" => "21"}}, - %Changes.DeletedRecord{old_record: %{"id" => "31"}, last?: true} - ], - flush: nil + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:store, + [ + %Changes.NewRecord{record: %{"id" => "11"}}, + %Changes.UpdatedRecord{record: %{"id" => "21"}}, + %Changes.DeletedRecord{old_record: %{"id" => "31"}, last?: true} + ]}, + {:flush, nil} ] = actions end @@ -333,14 +322,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %Changes.NewRecord{record: %{"id" => "11"}}, - %Changes.NewRecord{record: %{"id" => "12"}, last?: true} - ], - flush: nil + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:store, + [ + %Changes.NewRecord{record: %{"id" => "11"}}, + %Changes.NewRecord{record: %{"id" => "12"}, last?: true} + ]}, + {:flush, nil} ] = actions end @@ -371,14 +361,15 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %Changes.NewRecord{record: %{"id" => "10"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ], - flush: nil + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:store, + [ + %Changes.NewRecord{record: %{"id" => "10"}}, + %Changes.NewRecord{record: %{"id" => "11"}, last?: true} + ]}, + {:flush, nil} ] = actions end @@ -409,11 +400,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}} - ] + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}} ] = actions end @@ -441,11 +430,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}} - ] + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}} ] = actions end @@ -474,11 +461,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}} - ] + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}} ] = actions end @@ -518,13 +503,10 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert views_after[["$sublink", "0"]] == MapSet.new([2]) assert [ - {:store, - [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} - ]}, + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:control, %{headers: %{event: "move-out", patterns: [%{pos: 0}]}}}, :start_move_in_query ] = actions end @@ -560,12 +542,10 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new() assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} - ] + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:control, %{headers: %{event: "move-out", patterns: [%{pos: 0}]}}} ] = actions end @@ -626,11 +606,9 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new([1]) assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}} - ] + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}} ] = actions end @@ -671,12 +649,10 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert view == MapSet.new() assert [ - store: [ - %{headers: %{event: "move-in"}}, - %Changes.NewRecord{record: %{"id" => "99"}}, - %{headers: %{control: "snapshot-end"}}, - %{headers: %{event: "move-out", patterns: patterns}} - ] + {:control, %{headers: %{event: "move-in"}}}, + {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, + {:control, %{headers: %{control: "snapshot-end"}}}, + {:control, %{headers: %{event: "move-out", patterns: patterns}}} ] = actions assert length(patterns) == 2 From 868feed9ae8aab9d63153c0ad6fc547b654ca26d Mon Sep 17 00:00:00 2001 From: rob Date: Sun, 22 Mar 2026 23:03:59 +0000 Subject: [PATCH 35/63] REMOVE: skip hacked Filter test --- packages/sync-service/test/electric/shapes/filter_test.exs | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index e1bdc3969a..b713613a0a 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -972,6 +972,7 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) end + @tag skip: true @tag with_sql: [ "CREATE TABLE IF NOT EXISTS or_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS or_child (id INT PRIMARY KEY, par_id INT REFERENCES or_parent(id), value TEXT NOT NULL)" From f233f773a666ca8f05a3d7da3352949601f86394 Mon Sep 17 00:00:00 2001 From: rob Date: Sun, 22 Mar 2026 23:04:08 +0000 Subject: [PATCH 36/63] Fix warning --- packages/sync-service/lib/electric/shapes/consumer.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 2c93eae169..ee7e6f67a0 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -899,7 +899,7 @@ defmodule Electric.Shapes.Consumer do state = %{state | subquery_state: subquery_state} previous_offset = state.latest_offset - {state, num_changes, total_size, latest_written} = + {state, num_changes, total_size, _latest_written} = Enum.reduce(actions, {state, 0, 0, nil}, fn {:store, items}, {state, num_changes, total_size, _latest_written} -> case write_items_to_log(state, items, opts) do From cf83767f8124af9f4a16e8d9f3cf091f6ae8d502 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 12:57:28 +0000 Subject: [PATCH 37/63] Docs: Add Filter plan --- .../simple-subqueries-filter-plan.md | 357 ++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 packages/sync-service/simple-subqueries-filter-plan.md diff --git a/packages/sync-service/simple-subqueries-filter-plan.md b/packages/sync-service/simple-subqueries-filter-plan.md new file mode 100644 index 0000000000..6772c37ab5 --- /dev/null +++ b/packages/sync-service/simple-subqueries-filter-plan.md @@ -0,0 +1,357 @@ +# Implementation Plan: Simple Subqueries Filter + +Related: +- `./simple-subqueries-filter.md` +- `./simple-subqueries.md` +- `./simple-subqueries-with-dnf-plan.md` + +## Goal + +Replace the current "route every subquery shape on root-table changes" behaviour +with an ETS-backed reverse index that gives `Shapes.Filter` a small candidate +set, while keeping the existing splice/buffering correctness model for +subquery moves. + +## Current Baseline + +- `Filter.add_shape/3` already adds subquery shapes into `WhereCondition`, but + `Filter.shapes_affected_by_record/3` also unions in + `subquery_shape_ids_for_table/2`, so every subquery shape for the root table + is routed for every root-table row change. +- `WhereCondition.other_shapes_affected/4` still evaluates subqueries by + calling `WhereClause.includes_record?/3` with `refs_fun.(shape)`, which means + filter-side evaluation needs access to full subquery views. +- The consumer subquery runtime already has the exact view timeline we need: + `Steady.views`, `Buffering.views_before_move`, and `Buffering.views_after_move`. +- `DnfPlan` already preserves the per-position metadata that matters for + subqueries, including repeated occurrences of the same dependency and + negation. That is a better source of filter metadata than the legacy + `shape.subquery_comparison_expressions` map. + +## Design Choice + +The reverse index should store actual subquery membership, not the complement. + +Concretely: + +- for a positive predicate, candidates come from the matching membership rows +- for a negated predicate, candidates come from the registered negated shapes + for that position minus the matching membership rows + +That is the key refinement to the proposal in +`simple-subqueries-filter.md`. It keeps the ETS data structure representable +and still yields the same safe candidate-set semantics during move buffering. + +## Runtime Shape + +We need three distinct things: + +1. static metadata for each subquery position on a root table +2. dynamic membership entries that consumers update as their dependency views + move +3. a conservative fallback for shapes whose reverse-index state is not ready + yet + +## Stage 1: Add a Stack-Scoped Reverse Index Runtime + +Create a new module, for example `Electric.Shapes.Filter.SubqueryIndex`, with a +named public ETS table per stack. + +Suggested storage shape: + +- candidate lookup entries: + - `{{root_table, position_id, typed_value}, shape_handle} -> true` +- exact membership entries for callback-based evaluation: + - `{{shape_handle, subquery_ref, typed_value}, true}` +- per-position metadata: + - `{root_table, position_id} -> %{expr: Expr.t(), dependency_index: non_neg_integer(), subquery_ref: [String.t()], polarity: :positive | :negated}` +- per-position registered handles: + - `{{root_table, position_id, :positive}, shape_handle} -> true` + - `{{root_table, position_id, :negated}, shape_handle} -> true` +- per-shape position metadata for teardown: + - `{shape_handle, :positions} -> [{root_table, position_id}, ...]` + +Notes: + +- `position_id` should come from `DnfPlan` position metadata, not from + `shape.subquery_comparison_expressions`, because repeated dependency + occurrences can share a dependency handle but still need distinct filter + lookups. +- exact membership should be keyed by canonical `subquery_ref` / dependency + index, not by `position_id`. `WhereClause` evaluation only sees the + canonicalized sublink refs in the expression AST, so the callback can answer + "is this value in this shape's current dependency view?" but it does not know + which DNF position triggered the check. +- The table should be initialized in a stack-owned bootstrap path such as + `ShapeLogCollector.init/1`, not lazily inside a consumer, so the router and + the consumers agree on the table name from startup onward. + +## Stage 2: Register Static Position Metadata in the Filter + +Touch points: + +- `lib/electric/shapes/filter.ex` +- `lib/electric/shapes/dnf_plan.ex` (read-only dependency) +- new `lib/electric/shapes/filter/subquery_index.ex` + +On `Filter.add_shape/3`: + +- detect shapes with dependencies +- compile `DnfPlan` for those shapes +- extract every subquery position that can be evaluated from the root-table + record +- register its static metadata and shape-handle membership in the + `SubqueryIndex` + +On `Filter.remove_shape/3`: + +- unregister the shape from the static position metadata +- delete all reverse-index rows for the shape + +Important fallback: + +- keep a conservative "not ready yet" set for subquery shapes +- `restore_shapes` currently adds shapes to the router before their consumer + exists +- until a consumer has seeded its dynamic membership rows, the filter must + continue routing that shape conservatively + +That means the current `subquery_shapes_table` should not disappear +immediately. It should become a temporary "fallback subquery shapes" table and +only be retired once readiness handoff exists. + +## Stage 3: Seed and Maintain Dynamic Membership from the Consumer Runtime + +Touch points: + +- `lib/electric/shapes/consumer.ex` +- `lib/electric/shapes/consumer/subqueries.ex` +- `lib/electric/shapes/consumer/subqueries/steady.ex` +- `lib/electric/shapes/consumer/subqueries/buffering.ex` + +Add reverse-index state to the subquery runtime: + +- table name or module access +- the list of subquery positions for the outer shape +- enough metadata to map dependency-index changes onto position updates + +During `initialize_subquery_runtime/1`: + +- compile the `DnfPlan` +- load the current dependency views from materializers as today +- seed reverse-index membership rows from those views before the shape is + treated as "ready" for indexed routing + +For newly created shapes: + +- seed the reverse index before `ShapeLogCollector.add_shape/4` makes the shape + active in the router, so there is no under-routing window + +For restored shapes: + +- register them conservatively during `restore_shapes` +- after the consumer seeds its membership rows, send a small readiness message + back to the `ShapeLogCollector` / `EventRouter` so the shape can leave the + conservative fallback set + +During steady-state dependency moves: + +- update the in-memory views exactly as today +- mirror the same dependency delta into the reverse index immediately + +During buffering: + +- the reverse index must track whichever view image the runtime is using as the + current candidate source +- the simplest rule is: when `Buffering.from_steady/6` creates + `views_after_move`, apply the same delta to the reverse index right there +- that keeps the ETS state aligned with the consumer's candidate view while + buffered transactions are later converted against `views_before_move` and + `views_after_move` + +Because the ETS table stores positive membership, negation needs no special +write-path logic. Negated candidate sets are derived by complement at read +time. + +On teardown: + +- remove all reverse-index rows for the shape when the shape is cleaned up or + removed from the router + +## Stage 4: Use the Reverse Index in `Filter.affected_shapes/2` + +Touch points: + +- `lib/electric/shapes/filter.ex` +- new `lib/electric/shapes/filter/subquery_index.ex` + +Replace the unconditional subquery union in +`Filter.shapes_affected_by_record/3` with: + +1. `WhereCondition.affected_shapes/3` +2. reverse-index candidates for subquery positions on the table +3. exact verification of those candidates + +Reverse-index lookup flow: + +- enumerate registered subquery positions for the root table +- evaluate the left-hand comparison expression against the incoming record +- look up matching positive-membership rows +- derive: + - positive candidates directly from the lookup + - negated candidates from `all_negated_handles(position) - matched_handles(position)` +- union all candidate handles across positions + +For the first implementation, candidate verification should be done by +evaluating the full shape predicate for each candidate shape, not by trying to +fold subquery lookups into the `WhereCondition` tree immediately. + +That is the lowest-risk slice: + +- it removes the worst over-routing +- it reuses the exact predicate semantics we already have +- it keeps the existing equality/inclusion tree untouched + +If this proves too expensive in practice, we can later add a real subquery +index operation to `WhereCondition.optimise_where/1`, but that should be a +follow-up, not part of the first slice. + +## Stage 5: Add Callback-Based Subquery Evaluation to `WhereClause` + +Touch points: + +- `lib/electric/shapes/where_clause.ex` +- `lib/electric/replication/eval/runner.ex` or `lib/pg_interop/sublink.ex` + +Do not replace the existing refs-map path used by `Shape.convert_change/3`. +That path still needs full subquery views for exact change conversion. + +Instead: + +- keep the current `includes_record?/3` behaviour for conversion code +- add a callback-based variant for filter-side exact verification + +Suggested API shape: + +- `includes_record?(where, record, extra_refs, subquery_member?)` +- or a distinct helper such as + `includes_record_with_subquery_membership?(where, record, subquery_member?)` + +The callback should look like: + +- `fn subquery_ref, typed_value -> boolean end` + +That `subquery_ref` is the canonical dependency ref from the validated WHERE +clause, for example `["$sublink", "0"]`, not a DNF `position_id`. + +That split is intentional: + +- candidate routing needs `position_id`, because repeated occurrences of the + same dependency can appear in different positions and compare against + different root-table columns +- exact `WhereClause` verification only needs canonical dependency membership, + because the AST has already been rewritten to shared dependency refs + +Implementation options: + +- extend `PgInterop.Sublink.member?/2` to accept a wrapper value that delegates + to the callback +- or special-case `sublink_membership_check` execution in `Runner` + +The important thing is that filter-side verification can ask: + +- "does this specific shape currently contain this typed value for this + canonical subquery ref / dependency?" + +using the reverse index, without loading full `MapSet` views into the filter. + +## Stage 6: Remove the Legacy Always-Route Path + +Once the reverse-index path is green: + +- retire the unconditional `subquery_shape_ids_for_table/2` union for + root-table record changes +- narrow `subquery_shapes_table` to only the conservative startup fallback, or + remove it completely if readiness is explicit +- simplify `all_shape_ids/1` and `shape_ids_for_table/2` so they do not depend + on duplicated subquery shape bookkeeping + +Keep the existing safety valve: + +- if filter-side evaluation crashes or cannot determine the result, return all + shapes for safety + +Also keep a conservative fallback for unsupported subquery positions: + +- if a shape's subquery test expression cannot be evaluated from a root-table + record, leave that shape on the fallback routing path instead of partially + indexing it + +## Suggested Test Plan + +### Filter unit tests + +Extend `test/electric/shapes/filter_test.exs` to cover: + +- positive single-column subquery routing +- non-matching values no longer route the shape +- `field = const AND subquery` still verifies the row predicate after candidate + lookup +- `LIKE` + subquery in `other_shapes` uses the callback path instead of full + `refs_fun` views +- multiple shapes sharing a value +- multiple shapes with different values +- negated subquery candidate derivation by complement +- repeated dependency with two positions: + - `a IN sq OR b IN sq` +- composite-key subquery positions +- fallback behaviour for unsupported positions +- `remove_shape/3` cleanup of ETS entries + +### Consumer / subquery-runtime tests + +Extend `test/electric/shapes/consumer/subqueries_test.exs` to cover: + +- initial seeding of membership rows from current dependency views +- steady `move_in` and `move_out` updates to the reverse index +- buffering updates the reverse index at move start +- readiness handoff for restored shapes +- teardown removes reverse-index rows +- negated shapes produce the expected candidate visibility without special + write-time polarity handling + +### Integration tests + +Add or extend integration coverage for: + +- root-table changes no longer waking unrelated subquery shapes +- `AND` + subquery mixed predicates +- repeated dependency positions +- negated subquery move-out / move-in regressions +- stack restart / shape restore preserving conservative correctness until + consumers reseed the reverse index + +## Recommended Delivery Order + +1. Add `SubqueryIndex` storage and static registration. +2. Add callback-based `WhereClause` evaluation. +3. Seed and update dynamic membership from the consumer runtime. +4. Add readiness handoff for restored shapes. +5. Switch `Filter.shapes_affected_by_record/3` to reverse-index candidates plus + exact verification. +6. Remove the legacy always-route path once tests are green. + +## Expected Touch Points + +- `lib/electric/shapes/filter.ex` +- `lib/electric/shapes/filter/where_condition.ex` +- `lib/electric/shapes/where_clause.ex` +- `lib/electric/shapes/consumer.ex` +- `lib/electric/shapes/consumer/subqueries.ex` +- `lib/electric/shapes/consumer/subqueries/steady.ex` +- `lib/electric/shapes/consumer/subqueries/buffering.ex` +- `lib/electric/replication/shape_log_collector.ex` +- new `lib/electric/shapes/filter/subquery_index.ex` +- `test/electric/shapes/filter_test.exs` +- `test/electric/shapes/consumer/subqueries_test.exs` +- relevant integration tests under `test/integration/` From 2968cc0282f2e42cce3cdc277a7bae51a86e561d Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 12:58:18 +0000 Subject: [PATCH 38/63] Update WhereClause to accept subquery function --- .../lib/electric/replication/eval/runner.ex | 33 ++++++++++++++++--- .../electric/shapes/filter/where_condition.ex | 6 +++- .../sync-service/lib/electric/shapes/shape.ex | 3 +- .../lib/electric/shapes/where_clause.ex | 20 +++++++++-- .../electric/replication/eval/runner_test.exs | 20 +++++++++++ 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/packages/sync-service/lib/electric/replication/eval/runner.ex b/packages/sync-service/lib/electric/replication/eval/runner.ex index 45c94353a1..8319159484 100644 --- a/packages/sync-service/lib/electric/replication/eval/runner.ex +++ b/packages/sync-service/lib/electric/replication/eval/runner.ex @@ -50,18 +50,43 @@ defmodule Electric.Replication.Eval.Runner do @doc """ Run a PG function parsed by `Electric.Replication.Eval.Parser` based on the inputs """ - @spec execute(Expr.t(), map()) :: {:ok, term()} | {:error, {%Func{}, [term()]}} - def execute(%Expr{} = tree, ref_values) do - Walker.fold(tree.eval, &do_execute/3, ref_values) + @spec execute(Expr.t(), map(), keyword()) :: {:ok, term()} | {:error, {%Func{}, [term()]}} + def execute(%Expr{} = tree, ref_values, opts \\ []) do + ctx = %{refs: ref_values, subquery_member?: Keyword.get(opts, :subquery_member?)} + Walker.fold(tree.eval, &do_execute/3, ctx) catch {:could_not_compute, func} -> {:error, func} end defp do_execute(%Const{value: value}, _, _), do: {:ok, value} - defp do_execute(%Ref{path: path}, _, refs), do: {:ok, Map.fetch!(refs, path)} + + defp do_execute( + %Ref{path: ["$sublink", _] = path}, + _, + %{refs: refs, subquery_member?: subquery_member?} + ) + when is_function(subquery_member?, 2) do + {:ok, Map.get(refs, path, {:subquery_ref, path})} + end + + defp do_execute(%Ref{path: path}, _, %{refs: refs}), do: {:ok, Map.fetch!(refs, path)} defp do_execute(%Array{}, %{elements: elements}, _), do: {:ok, elements} defp do_execute(%RowExpr{}, %{elements: elements}, _), do: {:ok, List.to_tuple(elements)} + defp do_execute( + %Func{name: "sublink_membership_check"} = func, + %{args: [value, {:subquery_ref, path}]}, + %{subquery_member?: subquery_member?} + ) + when is_function(subquery_member?, 2) do + {:ok, + try do + subquery_member?.(path, value) + rescue + _ -> throw({:could_not_compute, %{func | args: [value, {:subquery_ref, path}]}}) + end} + end + defp do_execute(%Func{strict?: false} = func, %{args: args}, _) do # For a non-strict function, we don't care about nil values in the arguments {:ok, try_apply(func, args)} diff --git a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex index ea75a05b53..1f173408f9 100644 --- a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex +++ b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex @@ -275,7 +275,11 @@ defmodule Electric.Shapes.Filter.WhereCondition do fn -> for {shape_id, where} <- other_shapes, shape = Filter.get_shape(filter, shape_id), - WhereClause.includes_record?(where, record, refs_fun.(shape)), + WhereClause.includes_record?( + where, + record, + WhereClause.subquery_member_from_refs(refs_fun.(shape)) + ), into: MapSet.new() do shape_id end diff --git a/packages/sync-service/lib/electric/shapes/shape.ex b/packages/sync-service/lib/electric/shapes/shape.ex index 007d9a8789..186de01806 100644 --- a/packages/sync-service/lib/electric/shapes/shape.ex +++ b/packages/sync-service/lib/electric/shapes/shape.ex @@ -735,7 +735,8 @@ defmodule Electric.Shapes.Shape do refs, opts ) do - {:ok, WhereClause.includes_record?(where, record, refs), + {:ok, + WhereClause.includes_record?(where, record, WhereClause.subquery_member_from_refs(refs)), %{ move_tags: make_tags_from_pattern(tag_structure, record, opts[:stack_id], opts[:shape_handle]), diff --git a/packages/sync-service/lib/electric/shapes/where_clause.ex b/packages/sync-service/lib/electric/shapes/where_clause.ex index 15b5688ad9..6175e6336d 100644 --- a/packages/sync-service/lib/electric/shapes/where_clause.ex +++ b/packages/sync-service/lib/electric/shapes/where_clause.ex @@ -1,15 +1,29 @@ defmodule Electric.Shapes.WhereClause do + alias PgInterop.Sublink alias Electric.Replication.Eval.Runner - def includes_record?(where_clause, record, extra_refs \\ %{}) + @spec includes_record?(Electric.Replication.Eval.Expr.t() | nil, map(), ([String.t()], term() -> + boolean())) :: + boolean() + def includes_record?(where_clause, record, subquery_member? \\ fn _, _ -> false end) def includes_record?(nil = _where_clause, _record, _), do: true - def includes_record?(where_clause, record, extra_refs) do + def includes_record?(where_clause, record, subquery_member?) + when is_function(subquery_member?, 2) do with {:ok, refs} <- Runner.record_to_ref_values(where_clause.used_refs, record), - {:ok, evaluated} <- Runner.execute(where_clause, Map.merge(refs, extra_refs)) do + {:ok, evaluated} <- + Runner.execute(where_clause, refs, subquery_member?: subquery_member?) do if is_nil(evaluated), do: false, else: evaluated else _ -> false end end + + @spec subquery_member_from_refs(map()) :: ([String.t()], term() -> boolean()) + def subquery_member_from_refs(extra_refs) when is_map(extra_refs) do + fn subquery_ref, typed_value -> + typed_value + |> Sublink.member?(Map.get(extra_refs, subquery_ref, [])) + end + end end diff --git a/packages/sync-service/test/electric/replication/eval/runner_test.exs b/packages/sync-service/test/electric/replication/eval/runner_test.exs index ecfecca32d..c6b151ab62 100644 --- a/packages/sync-service/test/electric/replication/eval/runner_test.exs +++ b/packages/sync-service/test/electric/replication/eval/runner_test.exs @@ -194,6 +194,26 @@ defmodule Electric.Replication.Eval.RunnerTest do |> Runner.execute(%{["test"] => 4, ["$sublink", "0"] => MapSet.new([2, 3, 4])}) end + test "subquery with callback-backed membership" do + expr = + ~S|test IN (SELECT val FROM tester)| + |> Parser.parse_and_validate_expression!( + refs: %{["test"] => :int4, ["$sublink", "0"] => {:array, :int4}}, + sublink_queries: %{0 => "SELECT val FROM tester"} + ) + + subquery_member? = fn + ["$sublink", "0"], 4 -> true + ["$sublink", "0"], _ -> false + end + + assert {:ok, true} = + Runner.execute(expr, %{["test"] => 4}, subquery_member?: subquery_member?) + + assert {:ok, false} = + Runner.execute(expr, %{["test"] => 5}, subquery_member?: subquery_member?) + end + test "subquery with row expression" do assert {:ok, true} = ~S|(test1, test2) IN (SELECT val1, val2 FROM tester)| From bf8fd4d3053c4a58a53f24eaa0dbe83f936c0d59 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 13:17:07 +0000 Subject: [PATCH 39/63] Docs: Updated filter plan to remove hack --- .../simple-subqueries-filter-plan.md | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/packages/sync-service/simple-subqueries-filter-plan.md b/packages/sync-service/simple-subqueries-filter-plan.md index 6772c37ab5..2cf33d0010 100644 --- a/packages/sync-service/simple-subqueries-filter-plan.md +++ b/packages/sync-service/simple-subqueries-filter-plan.md @@ -18,9 +18,17 @@ subquery moves. `Filter.shapes_affected_by_record/3` also unions in `subquery_shape_ids_for_table/2`, so every subquery shape for the root table is routed for every root-table row change. +- That `subquery_shapes_table` path is a temporary oversend hack, not the + desired fallback design. It currently oversends even after startup for shapes + whose root-table predicate should reject the row, including non-optimisable + `other_shapes` cases such as `OR + subquery`. - `WhereCondition.other_shapes_affected/4` still evaluates subqueries by calling `WhereClause.includes_record?/3` with `refs_fun.(shape)`, which means filter-side evaluation needs access to full subquery views. +- `ShapeLogCollector` no longer wires `Materializer.get_all_as_refs/2` into the + production `EventRouter`, so the old `refs_fun` path is no longer a viable + runtime answer for exact filter-side subquery checks. The reverse-index + callback path is required to remove the oversend hack safely. - The consumer subquery runtime already has the exact view timeline we need: `Steady.views`, `Buffering.views_before_move`, and `Buffering.views_after_move`. - `DnfPlan` already preserves the per-position metadata that matters for @@ -115,6 +123,9 @@ Important fallback: exists - until a consumer has seeded its dynamic membership rows, the filter must continue routing that shape conservatively +- once a shape is ready, this fallback must stop oversending it; readiness + fallback is only for startup/restore and unsupported positions, not for + steady-state root-table routing That means the current `subquery_shapes_table` should not disappear immediately. It should become a temporary "fallback subquery shapes" table and @@ -211,6 +222,9 @@ That is the lowest-risk slice: - it removes the worst over-routing - it reuses the exact predicate semantics we already have - it keeps the existing equality/inclusion tree untouched +- it restores correctness for non-optimisable `other_shapes` predicates, so the + current skipped `OR + subquery` regression can be re-enabled instead of being + masked by always-route behaviour If this proves too expensive in practice, we can later add a real subquery index operation to `WhereCondition.optimise_where/1`, but that should be a @@ -264,6 +278,9 @@ The important thing is that filter-side verification can ask: canonical subquery ref / dependency?" using the reverse index, without loading full `MapSet` views into the filter. +This is also the production replacement for the old `refs_fun`-backed filter +checks; after the always-route hack is removed, filter verification must work +without `ShapeLogCollector` materializer refs. ## Stage 6: Remove the Legacy Always-Route Path @@ -275,6 +292,8 @@ Once the reverse-index path is green: remove it completely if readiness is explicit - simplify `all_shape_ids/1` and `shape_ids_for_table/2` so they do not depend on duplicated subquery shape bookkeeping +- re-enable any regression tests currently skipped because the oversend hack + masks false positives Keep the existing safety valve: @@ -297,6 +316,9 @@ Extend `test/electric/shapes/filter_test.exs` to cover: - non-matching values no longer route the shape - `field = const AND subquery` still verifies the row predicate after candidate lookup +- re-enable the current skipped `OR + subquery` regression and assert that a + non-matching root-table row is not routed just because the shape has + dependencies - `LIKE` + subquery in `other_shapes` uses the callback path instead of full `refs_fun` views - multiple shapes sharing a value From a093509f4980e02bc25790ad3cf99fded7a74f85 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 13:39:48 +0000 Subject: [PATCH 40/63] Docs: Update filter plan --- .../simple-subqueries-filter-plan.md | 86 ++++++++++++++++--- 1 file changed, 74 insertions(+), 12 deletions(-) diff --git a/packages/sync-service/simple-subqueries-filter-plan.md b/packages/sync-service/simple-subqueries-filter-plan.md index 2cf33d0010..7d5d825a4c 100644 --- a/packages/sync-service/simple-subqueries-filter-plan.md +++ b/packages/sync-service/simple-subqueries-filter-plan.md @@ -139,12 +139,39 @@ Touch points: - `lib/electric/shapes/consumer/subqueries.ex` - `lib/electric/shapes/consumer/subqueries/steady.ex` - `lib/electric/shapes/consumer/subqueries/buffering.ex` +- new pure helper such as + `lib/electric/shapes/filter/subquery_index_projection.ex` + +Architecture note: + +- keep the current split where `Subqueries.handle_event/2` is pure and returns + actions, and `Consumer.apply_subquery_event/3` is the place that performs + ETS writes / async queries / log writes +- do not have `Steady` or `Buffering` write into the reverse index directly +- instead, add a new action shape such as `{:subquery_index, ops}` or similar, + and let the consumer execute those ops after applying the subquery state + transition Add reverse-index state to the subquery runtime: - table name or module access - the list of subquery positions for the outer shape - enough metadata to map dependency-index changes onto position updates +- two logical projections: + - exact subquery views already kept in memory by the state machine + - a routing projection materialized into the filter index + +The routing projection is what should drive `Filter` candidate lookup: + +- in steady state, routing projection = current exact view +- while buffering one dependency move: + - positive positions use `before ∪ after` + - negated positions use `before ∩ after` as the matched-membership set that + the filter subtracts from `all_negated_handles(position)` + +That logic is sufficiently subtle that it should live in its own pure helper +module rather than being open-coded across `Steady`, `Buffering`, and +`Consumer`. During `initialize_subquery_runtime/1`: @@ -152,6 +179,8 @@ During `initialize_subquery_runtime/1`: - load the current dependency views from materializers as today - seed reverse-index membership rows from those views before the shape is treated as "ready" for indexed routing +- seed the routing projection by diffing from an empty projection and emitting + reverse-index ops through the normal action path For newly created shapes: @@ -168,17 +197,31 @@ For restored shapes: During steady-state dependency moves: - update the in-memory views exactly as today -- mirror the same dependency delta into the reverse index immediately +- compute the routing-projection delta in the pure helper +- emit reverse-index ops as actions +- let `Consumer.apply_subquery_event/3` execute those ops imperatively During buffering: -- the reverse index must track whichever view image the runtime is using as the - current candidate source -- the simplest rule is: when `Buffering.from_steady/6` creates - `views_after_move`, apply the same delta to the reverse index right there -- that keeps the ETS state aligned with the consumer's candidate view while - buffered transactions are later converted against `views_before_move` and - `views_after_move` +- do not think in terms of two separate filter-side evaluations +- buffering has two subquery views, `views_before_move` and + `views_after_move`, but the filter should collapse them into one + conservative routing projection +- the pure helper should own that derivation and emit the diff from: + - steady exact view -> buffering conservative projection + - buffering conservative projection -> next steady exact view after splice +- queued dependency moves should continue to be modelled exactly as today in + the subquery state machine; the projection helper only cares about the + active routing image before and after each state transition + +For the first indexed slice: + +- candidate lookup can use the buffering routing projection +- use the index to shrink the candidate set +- do not exact-reject actively buffering shapes in `Filter`; oversending is + acceptable, undersending is not +- rely on `Shape.convert_change/3` as the authoritative final filter for + buffered transactions Because the ETS table stores positive membership, negation needs no special write-path logic. Negated candidate sets are derived by complement at read @@ -201,7 +244,8 @@ Replace the unconditional subquery union in 1. `WhereCondition.affected_shapes/3` 2. reverse-index candidates for subquery positions on the table -3. exact verification of those candidates +3. exact verification of steady-state candidates, with buffering candidates + allowed to oversend Reverse-index lookup flow: @@ -213,9 +257,13 @@ Reverse-index lookup flow: - negated candidates from `all_negated_handles(position) - matched_handles(position)` - union all candidate handles across positions -For the first implementation, candidate verification should be done by -evaluating the full shape predicate for each candidate shape, not by trying to -fold subquery lookups into the `WhereCondition` tree immediately. +For the first implementation: + +- steady-state candidate verification should be done by evaluating the full + shape predicate for each candidate shape, not by trying to fold subquery + lookups into the `WhereCondition` tree immediately +- buffering shapes should use the conservative routing projection and may + oversend through to `Shape.convert_change/3` That is the lowest-risk slice: @@ -305,6 +353,8 @@ Also keep a conservative fallback for unsupported subquery positions: - if a shape's subquery test expression cannot be evaluated from a root-table record, leave that shape on the fallback routing path instead of partially indexing it +- buffering shapes likewise may oversend through to `Shape.convert_change/3`; + the routing projection exists to avoid undersend, not to make buffering exact ## Suggested Test Plan @@ -342,6 +392,16 @@ Extend `test/electric/shapes/consumer/subqueries_test.exs` to cover: - negated shapes produce the expected candidate visibility without special write-time polarity handling +Add focused unit tests for the new projection helper, for example in +`test/electric/shapes/filter/subquery_index_projection_test.exs`, covering: + +- steady exact view -> routing projection +- steady -> buffering projection for positive positions (`before ∪ after`) +- steady -> buffering projection for negated positions (`before ∩ after`) +- buffering -> steady projection after splice +- repeated positions sharing one dependency +- emitted add/remove ops for seed, move, and teardown + ### Integration tests Add or extend integration coverage for: @@ -374,6 +434,8 @@ Add or extend integration coverage for: - `lib/electric/shapes/consumer/subqueries/buffering.ex` - `lib/electric/replication/shape_log_collector.ex` - new `lib/electric/shapes/filter/subquery_index.ex` +- new `lib/electric/shapes/filter/subquery_index_projection.ex` - `test/electric/shapes/filter_test.exs` - `test/electric/shapes/consumer/subqueries_test.exs` +- `test/electric/shapes/filter/subquery_index_projection_test.exs` - relevant integration tests under `test/integration/` From 401adc99661c7b6534f72a4909af115f0305c5d2 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 13:45:12 +0000 Subject: [PATCH 41/63] Add SubqueryIndex storage and static registration (Stage 1) Add a new ETS-backed reverse index module (SubqueryIndex) that stores per-position metadata and shape-handle polarity for subquery filtering. Wire it into Filter.add_shape/remove_shape to register/unregister shapes with compiled DnfPlan positions. Shapes start in a fallback set until their consumer seeds dynamic membership. Co-Authored-By: Claude Opus 4.6 --- .../lib/electric/shapes/filter.ex | 27 +- .../electric/shapes/filter/subquery_index.ex | 341 ++++++++++++++++++ .../test/electric/shapes/filter_test.exs | 3 +- 3 files changed, 367 insertions(+), 4 deletions(-) create mode 100644 packages/sync-service/lib/electric/shapes/filter/subquery_index.ex diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index 639954f195..f776689a26 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -18,7 +18,9 @@ defmodule Electric.Shapes.Filter do alias Electric.Replication.Changes.Relation alias Electric.Replication.Changes.TruncatedRelation alias Electric.Replication.Changes.UpdatedRecord + alias Electric.Shapes.DnfPlan alias Electric.Shapes.Filter + alias Electric.Shapes.Filter.SubqueryIndex alias Electric.Shapes.Filter.WhereCondition alias Electric.Shapes.Shape alias Electric.Telemetry.OpenTelemetry @@ -32,6 +34,7 @@ defmodule Electric.Shapes.Filter do :eq_index_table, :incl_index_table, :subquery_shapes_table, + :subquery_index, :refs_fun ] @@ -47,6 +50,7 @@ defmodule Electric.Shapes.Filter do eq_index_table: :ets.new(:filter_eq, [:set, :private]), incl_index_table: :ets.new(:filter_incl, [:set, :private]), subquery_shapes_table: :ets.new(:filter_subquery, [:set, :private]), + subquery_index: SubqueryIndex.new(), refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end) } end @@ -82,15 +86,26 @@ defmodule Electric.Shapes.Filter do end defp maybe_track_subquery_shape( - %Filter{subquery_shapes_table: table}, + %Filter{subquery_shapes_table: table, subquery_index: index}, shape_id, - %Shape{shape_dependencies: [_ | _], root_table: root_table} + %Shape{shape_dependencies: [_ | _], root_table: root_table} = shape ) do :ets.insert(table, {{root_table, shape_id}, true}) + maybe_register_subquery_index(index, shape_id, root_table, shape) end defp maybe_track_subquery_shape(_filter, _shape_id, _shape), do: :ok + defp maybe_register_subquery_index(index, shape_id, root_table, shape) do + case DnfPlan.compile(shape) do + {:ok, plan} -> + SubqueryIndex.register_shape(index, shape_id, root_table, plan) + + _ -> + :ok + end + end + defp get_or_create_table_condition(filter, table_name) do case :ets.lookup(filter.tables_table, table_name) do [] -> @@ -126,11 +141,12 @@ defmodule Electric.Shapes.Filter do end defp maybe_untrack_subquery_shape( - %Filter{subquery_shapes_table: table}, + %Filter{subquery_shapes_table: table, subquery_index: index}, shape_id, %Shape{shape_dependencies: [_ | _], root_table: root_table} ) do :ets.delete(table, {root_table, shape_id}) + SubqueryIndex.unregister_shape(index, shape_id, root_table) end defp maybe_untrack_subquery_shape(_filter, _shape_id, _shape), do: :ok @@ -251,4 +267,9 @@ defmodule Electric.Shapes.Filter do [] -> nil end end + + @doc """ + Get the subquery index. Used by consumers to seed/update membership. + """ + def subquery_index(%Filter{subquery_index: index}), do: index end diff --git a/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex new file mode 100644 index 0000000000..69a246ba7a --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex @@ -0,0 +1,341 @@ +defmodule Electric.Shapes.Filter.SubqueryIndex do + @moduledoc """ + ETS-backed reverse index for subquery membership in the Filter. + + Stores per-position static metadata and dynamic membership entries + that consumers update as their dependency views change. + + ## ETS key patterns + + - Candidate lookup: `{{root_table, position_id, typed_value}, shape_handle}` + - Exact membership (for callback-based evaluation): + `{{shape_handle, subquery_ref, typed_value}, true}` + - Per-position metadata: + `{{:position_meta, root_table, position_id}, %{...}}` + - Per-position registered handles: + `{{root_table, position_id, :positive}, shape_handle}` + `{{root_table, position_id, :negated}, shape_handle}` + - Per-shape positions (for teardown): + `{{:shape_positions, shape_handle}, [{root_table, position_id}, ...]}` + - Fallback shapes (not yet seeded by consumer): + `{{:fallback, root_table, shape_handle}, true}` + """ + + alias Electric.Replication.Eval.Expr + alias Electric.Replication.Eval.Runner + alias Electric.Shapes.DnfPlan + + @type t :: :ets.tid() + + @doc """ + Create a new SubqueryIndex ETS table. + """ + @spec new() :: t() + def new do + :ets.new(:subquery_index, [:bag, :private]) + end + + @doc """ + Register a shape's subquery positions from a compiled DnfPlan. + + Extracts every subquery position that can be evaluated from the + root-table record and registers its static metadata and shape-handle + polarity in the index. + """ + @spec register_shape(t(), term(), term(), DnfPlan.t()) :: :ok + def register_shape(table, shape_handle, root_table, %DnfPlan{} = plan) do + positions = + plan.positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Enum.map(fn {pos, info} -> + # Register position metadata (idempotent - :bag will add duplicates, but + # we look up by match so this is fine; we deduplicate on read) + :ets.insert(table, { + {:position_meta, root_table, pos}, + %{ + ast: info.ast, + dependency_index: info.dependency_index, + subquery_ref: info.subquery_ref, + polarity: if(info.negated, do: :negated, else: :positive) + } + }) + + # Register shape handle under the appropriate polarity + polarity = if info.negated, do: :negated, else: :positive + :ets.insert(table, {{root_table, pos, polarity}, shape_handle}) + + {root_table, pos} + end) + + # Store shape positions for teardown + :ets.insert(table, {{:shape_positions, shape_handle}, positions}) + + # Register as fallback until consumer seeds membership + :ets.insert(table, {{:fallback, root_table, shape_handle}, true}) + + :ok + end + + @doc """ + Unregister a shape from the index. + + Removes all reverse-index rows, polarity registrations, + position metadata (if no other shapes use it), and fallback entries. + """ + @spec unregister_shape(t(), term(), term()) :: :ok + def unregister_shape(table, shape_handle, root_table) do + # Get shape positions + positions = + case :ets.lookup(table, {:shape_positions, shape_handle}) do + [{_, pos_list}] -> pos_list + [] -> [] + end + + # Remove polarity registrations and candidate lookup entries + for {rt, pos} <- positions do + :ets.match_delete(table, {{rt, pos, :positive}, shape_handle}) + :ets.match_delete(table, {{rt, pos, :negated}, shape_handle}) + # Remove candidate lookup entries for this shape + :ets.match_delete(table, {{rt, pos, :_}, shape_handle}) + end + + # Remove exact membership entries + :ets.match_delete(table, {{shape_handle, :_, :_}, true}) + + # Remove shape positions + :ets.delete(table, {:shape_positions, shape_handle}) + + # Remove fallback entry + :ets.delete(table, {:fallback, root_table, shape_handle}) + + :ok + end + + @doc """ + Seed membership entries from a dependency view. + + Called by the consumer when it has loaded its initial views. + Adds candidate lookup entries and exact membership entries. + """ + @spec seed_membership(t(), term(), term(), [String.t()], non_neg_integer(), MapSet.t()) :: :ok + def seed_membership(table, shape_handle, root_table, subquery_ref, dep_index, view) do + # Find all positions for this dependency index on this root table + positions = positions_for_dependency(table, root_table, dep_index) + + # Add candidate lookup entries for each position + for {_rt, pos} <- positions, value <- view do + :ets.insert(table, {{root_table, pos, value}, shape_handle}) + end + + # Add exact membership entries keyed by canonical subquery_ref + for value <- view do + :ets.insert(table, {{shape_handle, subquery_ref, value}, true}) + end + + :ok + end + + @doc """ + Mark a shape as ready (remove from fallback set). + + Called after the consumer has seeded all its dependency views. + """ + @spec mark_ready(t(), term(), term()) :: :ok + def mark_ready(table, shape_handle, root_table) do + :ets.delete(table, {:fallback, root_table, shape_handle}) + :ok + end + + @doc """ + Add a value to the reverse index for a specific shape and dependency. + """ + @spec add_value(t(), term(), term(), [String.t()], non_neg_integer(), term()) :: :ok + def add_value(table, shape_handle, root_table, subquery_ref, dep_index, value) do + positions = positions_for_dependency(table, root_table, dep_index) + + for {_rt, pos} <- positions do + :ets.insert(table, {{root_table, pos, value}, shape_handle}) + end + + :ets.insert(table, {{shape_handle, subquery_ref, value}, true}) + :ok + end + + @doc """ + Remove a value from the reverse index for a specific shape and dependency. + """ + @spec remove_value(t(), term(), term(), [String.t()], non_neg_integer(), term()) :: :ok + def remove_value(table, shape_handle, root_table, subquery_ref, dep_index, value) do + positions = positions_for_dependency(table, root_table, dep_index) + + for {_rt, pos} <- positions do + :ets.match_delete(table, {{root_table, pos, value}, shape_handle}) + end + + :ets.match_delete(table, {{shape_handle, subquery_ref, value}, true}) + :ok + end + + @doc """ + Get candidate shape handles for a record change on a root table. + + Evaluates the left-hand comparison expression for each registered + subquery position against the record, looks up matching handles, + and derives candidates for both positive and negated positions. + + Returns the union of all candidate handles plus any fallback shapes. + """ + @spec candidates_for_record(t(), term(), map()) :: MapSet.t() + def candidates_for_record(table, root_table, record) do + positions = registered_positions(table, root_table) + + indexed_candidates = + Enum.reduce(positions, MapSet.new(), fn {pos, meta}, acc -> + case evaluate_position_lhs(meta, record) do + {:ok, typed_value} -> + positive = positive_candidates(table, root_table, pos, typed_value) + negated = negated_candidates(table, root_table, pos, typed_value) + acc |> MapSet.union(positive) |> MapSet.union(negated) + + :error -> + # Can't evaluate - include all shapes for this position as candidates + all_position_shapes(table, root_table, pos) + |> MapSet.union(acc) + end + end) + + MapSet.union(indexed_candidates, fallback_shapes(table, root_table)) + end + + @doc """ + Check if a specific shape has a value in its current dependency view + for a canonical subquery ref. + + Used for callback-based exact verification in WhereClause evaluation. + """ + @spec member?(t(), term(), [String.t()], term()) :: boolean() + def member?(table, shape_handle, subquery_ref, typed_value) do + :ets.member(table, {shape_handle, subquery_ref, typed_value}) + end + + @doc """ + Returns all shape handles that are in the fallback set for a table. + """ + @spec fallback_shapes(t(), term()) :: MapSet.t() + def fallback_shapes(table, root_table) do + table + |> :ets.match({{:fallback, root_table, :"$1"}, :_}) + |> List.flatten() + |> MapSet.new() + end + + @doc """ + Check if a shape is in the fallback set. + """ + @spec fallback?(t(), term(), term()) :: boolean() + def fallback?(table, shape_handle, root_table) do + :ets.member(table, {:fallback, root_table, shape_handle}) + end + + @doc """ + Check if a shape has any registered positions. + """ + @spec has_positions?(t(), term()) :: boolean() + def has_positions?(table, shape_handle) do + :ets.member(table, {:shape_positions, shape_handle}) + end + + # -- Private helpers -- + + defp registered_positions(table, root_table) do + table + |> :ets.match({{:position_meta, root_table, :"$1"}, :"$2"}) + |> Enum.map(fn [pos, meta] -> {pos, meta} end) + |> Enum.uniq_by(fn {pos, _} -> pos end) + end + + defp positions_for_dependency(table, root_table, dep_index) do + table + |> :ets.match({{:position_meta, root_table, :"$1"}, :"$2"}) + |> Enum.filter(fn [_pos, meta] -> meta.dependency_index == dep_index end) + |> Enum.map(fn [pos, _meta] -> {root_table, pos} end) + |> Enum.uniq() + end + + defp evaluate_position_lhs(meta, record) do + # Extract the test expression from the sublink_membership_check AST + testexpr = extract_testexpr(meta.ast) + expr = Expr.wrap_parser_part(testexpr) + + case Runner.record_to_ref_values(expr.used_refs, record) do + {:ok, ref_values} -> + case Runner.execute(expr, ref_values) do + {:ok, value} -> {:ok, value} + _ -> :error + end + + _ -> + :error + end + end + + defp extract_testexpr(%{name: "sublink_membership_check", args: [testexpr, _]}), do: testexpr + + defp positive_candidates(table, root_table, pos, typed_value) do + # Lookup shapes that have this value in their positive membership + matching = + table + |> :ets.lookup({root_table, pos, typed_value}) + |> Enum.map(fn {_, shape_handle} -> shape_handle end) + |> MapSet.new() + + # Only include shapes that are registered as positive for this position + positive_shapes = + table + |> :ets.lookup({root_table, pos, :positive}) + |> Enum.map(fn {_, shape_handle} -> shape_handle end) + |> MapSet.new() + + MapSet.intersection(matching, positive_shapes) + end + + defp negated_candidates(table, root_table, pos, typed_value) do + # All shapes registered as negated for this position + all_negated = + table + |> :ets.lookup({root_table, pos, :negated}) + |> Enum.map(fn {_, shape_handle} -> shape_handle end) + |> MapSet.new() + + if MapSet.size(all_negated) == 0 do + MapSet.new() + else + # Shapes that have this value in their membership (which means the + # negated predicate is FALSE for them - the value IS in the subquery) + matching = + table + |> :ets.lookup({root_table, pos, typed_value}) + |> Enum.map(fn {_, shape_handle} -> shape_handle end) + |> MapSet.new() + + # Negated candidates = all negated shapes MINUS those that match + MapSet.difference(all_negated, matching) + end + end + + defp all_position_shapes(table, root_table, pos) do + positive = + table + |> :ets.lookup({root_table, pos, :positive}) + |> Enum.map(fn {_, shape_handle} -> shape_handle end) + |> MapSet.new() + + negated = + table + |> :ets.lookup({root_table, pos, :negated}) + |> Enum.map(fn {_, shape_handle} -> shape_handle end) + |> MapSet.new() + + MapSet.union(positive, negated) + end +end diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index b713613a0a..9b7bc16f3a 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -521,7 +521,8 @@ defmodule Electric.Shapes.FilterTest do where_cond: :ets.tab2list(filter.where_cond_table) |> Enum.sort(), eq_index: :ets.tab2list(filter.eq_index_table) |> Enum.sort(), incl_index: :ets.tab2list(filter.incl_index_table) |> Enum.sort(), - subquery_shapes: :ets.tab2list(filter.subquery_shapes_table) |> Enum.sort() + subquery_shapes: :ets.tab2list(filter.subquery_shapes_table) |> Enum.sort(), + subquery_index: :ets.tab2list(filter.subquery_index) |> Enum.sort() } end From a182581a96edb2f6d4a0c548f04f66c359bfc686 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 13:45:42 +0000 Subject: [PATCH 42/63] Add callback-based subquery evaluation via SubqueryIndex (Stage 2) Add subquery_member_from_index/2 to WhereClause that creates a subquery_member? callback backed by the SubqueryIndex ETS table. This enables filter-side exact verification without loading full MapSet views, replacing the old refs_fun-backed path. Co-Authored-By: Claude Opus 4.6 --- .../lib/electric/shapes/where_clause.ex | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/packages/sync-service/lib/electric/shapes/where_clause.ex b/packages/sync-service/lib/electric/shapes/where_clause.ex index 6175e6336d..83cd53b803 100644 --- a/packages/sync-service/lib/electric/shapes/where_clause.ex +++ b/packages/sync-service/lib/electric/shapes/where_clause.ex @@ -1,6 +1,7 @@ defmodule Electric.Shapes.WhereClause do alias PgInterop.Sublink alias Electric.Replication.Eval.Runner + alias Electric.Shapes.Filter.SubqueryIndex @spec includes_record?(Electric.Replication.Eval.Expr.t() | nil, map(), ([String.t()], term() -> boolean())) :: @@ -26,4 +27,18 @@ defmodule Electric.Shapes.WhereClause do |> Sublink.member?(Map.get(extra_refs, subquery_ref, [])) end end + + @doc """ + Build a subquery_member? callback that queries the SubqueryIndex. + + Used for filter-side exact verification: checks whether a specific + shape currently contains a typed value for a canonical subquery ref. + """ + @spec subquery_member_from_index(SubqueryIndex.t(), term()) :: + ([String.t()], term() -> boolean()) + def subquery_member_from_index(index, shape_handle) do + fn subquery_ref, typed_value -> + SubqueryIndex.member?(index, shape_handle, subquery_ref, typed_value) + end + end end From d4d7b314663bd7bfd921bbdbc24653e51f25efc0 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 13:49:02 +0000 Subject: [PATCH 43/63] Seed and maintain dynamic membership from consumer runtime (Stage 3) Make SubqueryIndex ETS table :public and discoverable via persistent_term so consumers can write membership entries. Consumer seeds initial views into the index during initialize_subquery_runtime and marks shapes ready. Dynamic updates are applied by diffing routing views before/after each subquery state transition, with conservative projections during buffering (union for positive, intersection for negated dependencies). Co-Authored-By: Claude Opus 4.6 --- .../lib/electric/shapes/consumer.ex | 107 ++++++++++++++++++ .../lib/electric/shapes/filter.ex | 2 +- .../electric/shapes/filter/subquery_index.ex | 29 ++++- 3 files changed, 134 insertions(+), 4 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index ee7e6f67a0..ff8927af9b 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -8,6 +8,7 @@ defmodule Electric.Shapes.Consumer do alias Electric.Shapes.Consumer.Subqueries.NoSubqueries alias Electric.Shapes.Consumer.Subqueries.QueryRow alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Filter.SubqueryIndex import Electric.Shapes.Consumer.State, only: :macros require Electric.Replication.LogOffset @@ -894,9 +895,11 @@ defmodule Electric.Shapes.Consumer do end defp apply_subquery_event(state, event, opts \\ []) do + old_subquery_state = state.subquery_state {actions, subquery_state} = Subqueries.handle_event(state.subquery_state, event) state = %{state | subquery_state: subquery_state} + update_subquery_index_from_state_change(state, old_subquery_state, subquery_state) previous_offset = state.latest_offset {state, num_changes, total_size, _latest_written} = @@ -1188,6 +1191,9 @@ defmodule Electric.Shapes.Consumer do {Map.put(views, ref, view), Map.put(mapping, handle, {index, ref})} end) + # Seed the reverse index with initial membership + seed_subquery_index(state, views) + %{ state | subquery_state: @@ -1220,6 +1226,107 @@ defmodule Electric.Shapes.Consumer do } end + defp seed_subquery_index(state, views) do + case SubqueryIndex.for_stack(state.stack_id) do + nil -> + :ok + + index -> + root_table = state.shape.root_table + + for {ref, view} <- views do + dep_index = ref |> List.last() |> String.to_integer() + + SubqueryIndex.seed_membership( + index, + state.shape_handle, + root_table, + ref, + dep_index, + view + ) + end + + SubqueryIndex.mark_ready(index, state.shape_handle, root_table) + end + end + + # Compare old and new subquery state views and update the index accordingly. + # For Steady states, we diff the exact views. + # For Steady->Buffering transitions, we use the conservative projection + # (union for positive, intersection for negated). + # For Buffering->Steady (splice), we diff from the buffering projection + # to the new steady exact view. + defp update_subquery_index_from_state_change(state, old_state, new_state) do + case SubqueryIndex.for_stack(state.stack_id) do + nil -> :ok + index -> do_update_subquery_index(index, state, old_state, new_state) + end + end + + defp do_update_subquery_index(index, state, old_state, new_state) do + old_views = get_routing_views(old_state) + new_views = get_routing_views(new_state) + + if old_views == new_views do + :ok + else + root_table = state.shape.root_table + shape_handle = state.shape_handle + + for {ref, new_view} <- new_views do + old_view = Map.get(old_views, ref, MapSet.new()) + dep_index = ref |> List.last() |> String.to_integer() + + added = MapSet.difference(new_view, old_view) + removed = MapSet.difference(old_view, new_view) + + for value <- added do + SubqueryIndex.add_value(index, shape_handle, root_table, ref, dep_index, value) + end + + for value <- removed do + SubqueryIndex.remove_value(index, shape_handle, root_table, ref, dep_index, value) + end + end + + :ok + end + end + + # Get the routing-relevant views from a subquery state. + # In steady state: exact views. + # In buffering: conservative projection (union for positive deps). + defp get_routing_views(%Subqueries.Steady{views: views}), do: views + + defp get_routing_views(%Subqueries.Buffering{ + views_before_move: before, + views_after_move: after_move, + dnf_plan: plan, + trigger_dep_index: trigger_dep + }) do + # For the triggering dependency, use union (conservative for positive) + # or intersection (conservative for negated) + polarity = Map.get(plan.dependency_polarities, trigger_dep, :positive) + + Map.merge(before, after_move, fn ref, before_view, after_view -> + dep_index = ref |> List.last() |> String.to_integer() + + if dep_index == trigger_dep do + case polarity do + :positive -> MapSet.union(before_view, after_view) + :negated -> MapSet.intersection(before_view, after_view) + end + else + # Non-triggering dependencies: use the current view (same in both) + after_view + end + end) + end + + defp get_routing_views(%NoSubqueries{}), do: %{} + defp get_routing_views(_), do: %{} + defp all_materializers_alive?(state) do Enum.all?(state.shape.shape_dependencies_handles, fn shape_handle -> name = Materializer.name(state.stack_id, shape_handle) diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index f776689a26..92e1fdffb2 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -50,7 +50,7 @@ defmodule Electric.Shapes.Filter do eq_index_table: :ets.new(:filter_eq, [:set, :private]), incl_index_table: :ets.new(:filter_incl, [:set, :private]), subquery_shapes_table: :ets.new(:filter_subquery, [:set, :private]), - subquery_index: SubqueryIndex.new(), + subquery_index: SubqueryIndex.new(Keyword.take(opts, [:stack_id])), refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end) } end diff --git a/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex index 69a246ba7a..47caf48837 100644 --- a/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex @@ -29,10 +29,33 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do @doc """ Create a new SubqueryIndex ETS table. + + The table is `:public` so that consumer processes can seed and update + membership entries from their own process, while the ShapeLogCollector + (which owns the Filter) reads candidates during event routing. + + If `stack_id` is provided, the table ref is stored in persistent_term + so consumers can look it up via `for_stack/1`. + """ + @spec new(keyword()) :: t() + def new(opts \\ []) do + table = :ets.new(:subquery_index, [:bag, :public]) + + if stack_id = Keyword.get(opts, :stack_id) do + :persistent_term.put({__MODULE__, stack_id}, table) + end + + table + end + + @doc """ + Look up the SubqueryIndex table for a stack. + + Returns the table ref or `nil` if not yet initialized. """ - @spec new() :: t() - def new do - :ets.new(:subquery_index, [:bag, :private]) + @spec for_stack(String.t()) :: t() | nil + def for_stack(stack_id) do + :persistent_term.get({__MODULE__, stack_id}, nil) end @doc """ From 991bff4ce5a7bd29577cdf39289dfa79f179ba23 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 13:50:53 +0000 Subject: [PATCH 44/63] Use reverse index for subquery routing in Filter (Stage 5) Replace the unconditional subquery_shape_ids_for_table union in shapes_affected_by_record with reverse-index candidate lookup plus exact WHERE clause verification. Candidate shapes are verified against the full predicate using the SubqueryIndex-backed subquery_member? callback. Fallback shapes (not yet seeded by their consumer) pass through without verification for safety. Co-Authored-By: Claude Opus 4.6 --- .../lib/electric/shapes/filter.ex | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index 92e1fdffb2..a6caa7ca8d 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -23,6 +23,7 @@ defmodule Electric.Shapes.Filter do alias Electric.Shapes.Filter.SubqueryIndex alias Electric.Shapes.Filter.WhereCondition alias Electric.Shapes.Shape + alias Electric.Shapes.WhereClause alias Electric.Telemetry.OpenTelemetry require Logger @@ -221,7 +222,47 @@ defmodule Electric.Shapes.Filter do WhereCondition.affected_shapes(filter, where_cond_id, record) end - MapSet.union(from_where_condition, subquery_shape_ids_for_table(filter, table_name)) + from_subquery_index = + subquery_index_candidates(filter, table_name, record) + + MapSet.union(from_where_condition, from_subquery_index) + end + + # Use the reverse index to find candidate subquery shapes, then verify + # each candidate against the full WHERE clause for exact correctness. + # Fallback shapes (not yet seeded) pass through without verification. + defp subquery_index_candidates( + %Filter{subquery_index: index} = filter, + table_name, + record + ) do + candidates = SubqueryIndex.candidates_for_record(index, table_name, record) + + if MapSet.size(candidates) == 0 do + MapSet.new() + else + fallbacks = SubqueryIndex.fallback_shapes(index, table_name) + + # Verify non-fallback candidates against the full WHERE clause + for shape_id <- candidates, + # Fallback shapes pass through without verification + MapSet.member?(fallbacks, shape_id) or + verify_candidate(filter, shape_id, record), + into: MapSet.new() do + shape_id + end + end + end + + defp verify_candidate(%Filter{subquery_index: index} = filter, shape_id, record) do + case get_shape(filter, shape_id) do + nil -> + false + + shape -> + subquery_member? = WhereClause.subquery_member_from_index(index, shape_id) + WhereClause.includes_record?(shape.where, record, subquery_member?) + end end defp all_shape_ids(%Filter{} = filter) do From 844da637c0d63ce6bccfd239b5cf30cf3ac3965a Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 13:55:32 +0000 Subject: [PATCH 45/63] Remove legacy always-route path, use index for all subquery routing (Stage 6) Remove refs_fun from Filter struct and WhereCondition - subquery evaluation now uses SubqueryIndex-backed callbacks exclusively. Replace the unconditional subquery_shape_ids_for_table union with reverse-index candidate lookup in shapes_affected_by_record. Re-enable the previously skipped OR+subquery test which now works correctly with seeded index membership. Update existing tests to clarify they test fallback (unseeded) behavior. Co-Authored-By: Claude Opus 4.6 --- .../lib/electric/shapes/filter.ex | 6 +- .../electric/shapes/filter/where_condition.ex | 11 ++-- .../test/electric/shapes/filter_test.exs | 58 +++++++------------ 3 files changed, 30 insertions(+), 45 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index a6caa7ca8d..40a56f82b8 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -35,8 +35,7 @@ defmodule Electric.Shapes.Filter do :eq_index_table, :incl_index_table, :subquery_shapes_table, - :subquery_index, - :refs_fun + :subquery_index ] @type t :: %Filter{} @@ -51,8 +50,7 @@ defmodule Electric.Shapes.Filter do eq_index_table: :ets.new(:filter_eq, [:set, :private]), incl_index_table: :ets.new(:filter_incl, [:set, :private]), subquery_shapes_table: :ets.new(:filter_subquery, [:set, :private]), - subquery_index: SubqueryIndex.new(Keyword.take(opts, [:stack_id])), - refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end) + subquery_index: SubqueryIndex.new(Keyword.take(opts, [:stack_id])) } end diff --git a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex index 1f173408f9..6fbe2ffd9f 100644 --- a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex +++ b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex @@ -265,8 +265,12 @@ defmodule Electric.Shapes.Filter.WhereCondition do ) end - defp other_shapes_affected(%Filter{refs_fun: refs_fun} = filter, table, condition_id, record) - when is_function(refs_fun, 1) do + defp other_shapes_affected( + %Filter{subquery_index: index} = _filter, + table, + condition_id, + record + ) do [{_, {_index_keys, other_shapes}}] = :ets.lookup(table, condition_id) OpenTelemetry.with_child_span( @@ -274,11 +278,10 @@ defmodule Electric.Shapes.Filter.WhereCondition do [shape_count: map_size(other_shapes)], fn -> for {shape_id, where} <- other_shapes, - shape = Filter.get_shape(filter, shape_id), WhereClause.includes_record?( where, record, - WhereClause.subquery_member_from_refs(refs_fun.(shape)) + WhereClause.subquery_member_from_index(index, shape_id) ), into: MapSet.new() do shape_id diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index 9b7bc16f3a..774eef812c 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -778,7 +778,7 @@ defmodule Electric.Shapes.FilterTest do } end - describe "subquery shapes are always routed in filter" do + describe "subquery shapes routing in filter" do import Support.DbSetup import Support.DbStructureSetup import Support.ComponentSetup @@ -795,7 +795,7 @@ defmodule Electric.Shapes.FilterTest do "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" ] - test "subquery shape is always routed for root table changes", + test "unseeded subquery shape is always routed for root table changes (fallback)", %{inspector: inspector} do {:ok, shape} = Shape.new("child", @@ -803,12 +803,8 @@ defmodule Electric.Shapes.FilterTest do where: "par_id = 7 AND id IN (SELECT id FROM parent)" ) - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([1, 2, 3])} - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape) insert_matching = %NewRecord{ @@ -844,7 +840,7 @@ defmodule Electric.Shapes.FilterTest do "CREATE TABLE IF NOT EXISTS incl_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS incl_child (id INT PRIMARY KEY, par_id INT REFERENCES incl_parent(id), tags int[] NOT NULL)" ] - test "subquery shape ignores inclusion and subquery values for routing", + test "unseeded subquery shape ignores inclusion and subquery values for routing (fallback)", %{inspector: inspector} do {:ok, shape} = Shape.new("incl_child", @@ -852,13 +848,8 @@ defmodule Electric.Shapes.FilterTest do where: "tags @> '{1,2}' AND id IN (SELECT id FROM incl_parent)" ) - # Create refs_fun that returns sublink values based on the shape - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([10, 20, 30])} - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape) insert_matching = %NewRecord{ @@ -887,7 +878,7 @@ defmodule Electric.Shapes.FilterTest do "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" ] - test "all subquery shapes for the table are routed when multiple shapes exist", %{ + test "all unseeded subquery shapes for the table are routed (fallback)", %{ inspector: inspector } do {:ok, shape1} = @@ -902,16 +893,8 @@ defmodule Electric.Shapes.FilterTest do where: "par_id = 8 AND id IN (SELECT id FROM parent)" ) - refs_fun = fn shape -> - if shape.where.query =~ "par_id = 7" do - %{["$sublink", "0"] => MapSet.new([1, 2])} - else - %{["$sublink", "0"] => MapSet.new([3, 4])} - end - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape1) |> Filter.add_shape("shape2", shape2) @@ -941,7 +924,7 @@ defmodule Electric.Shapes.FilterTest do "CREATE TABLE IF NOT EXISTS nested_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS nested_child (id INT PRIMARY KEY, field1 INT NOT NULL, field2 INT REFERENCES nested_parent(id))" ] - test "subquery shape with nested equality conditions is always routed", %{ + test "unseeded subquery shape with nested equality conditions is always routed (fallback)", %{ inspector: inspector } do {:ok, shape} = @@ -950,12 +933,8 @@ defmodule Electric.Shapes.FilterTest do where: "field1 = 10 AND field2 = 20 AND id IN (SELECT id FROM nested_parent)" ) - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([1, 2, 3])} - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape) insert_matching = %NewRecord{ @@ -973,7 +952,6 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) end - @tag skip: true @tag with_sql: [ "CREATE TABLE IF NOT EXISTS or_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS or_child (id INT PRIMARY KEY, par_id INT REFERENCES or_parent(id), value TEXT NOT NULL)" @@ -984,20 +962,26 @@ defmodule Electric.Shapes.FilterTest do # Shape with OR combining a subquery and a simple condition. # OR is not optimisable, so the shape lands in other_shapes AND # gets registered in the sublink inverted index. Root table changes - # must still be routed to this shape. + # must still be routed to this shape once seeded. {:ok, shape} = Shape.new("or_child", inspector: inspector, where: "par_id IN (SELECT id FROM or_parent) OR value = 'target'" ) - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([1, 2, 3])} + filter = Filter.new() + filter = Filter.add_shape(filter, "shape1", shape) + + # Seed the reverse index with subquery membership values + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape.root_table + + for value <- [1, 2, 3] do + Filter.SubqueryIndex.add_value(index, "shape1", root_table, subquery_ref, 0, value) end - filter = - Filter.new(refs_fun: refs_fun) - |> Filter.add_shape("shape1", shape) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) # Record matching the OR's simple condition (value = 'target') insert_matching_value = %NewRecord{ From 5883847e350b38aaf1115fcc1d5b4b0532cbd0d5 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 15:45:15 +0000 Subject: [PATCH 46/63] Finish off filter --- .../lib/electric/shapes/consumer.ex | 8 +- .../lib/electric/shapes/filter.ex | 116 ++----- .../lib/electric/shapes/filter/index.ex | 7 +- .../shapes/filter/indexes/equality_index.ex | 19 +- .../shapes/filter/indexes/inclusion_index.ex | 11 +- .../shapes/filter/indexes/subquery_index.ex | 103 ++++++ .../electric/shapes/filter/subquery_index.ex | 292 +++++++++--------- .../electric/shapes/filter/where_condition.ex | 79 ++++- .../lib/electric/shapes/where_clause.ex | 28 +- .../test/electric/shapes/filter_test.exs | 110 ++++++- 10 files changed, 500 insertions(+), 273 deletions(-) create mode 100644 packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index ff8927af9b..e5af2e70a9 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -1268,9 +1268,7 @@ defmodule Electric.Shapes.Consumer do old_views = get_routing_views(old_state) new_views = get_routing_views(new_state) - if old_views == new_views do - :ok - else + if old_views != new_views do root_table = state.shape.root_table shape_handle = state.shape_handle @@ -1289,9 +1287,9 @@ defmodule Electric.Shapes.Consumer do SubqueryIndex.remove_value(index, shape_handle, root_table, ref, dep_index, value) end end - - :ok end + + :ok end # Get the routing-relevant views from a subquery state. diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index 40a56f82b8..2a294d96b1 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -23,7 +23,6 @@ defmodule Electric.Shapes.Filter do alias Electric.Shapes.Filter.SubqueryIndex alias Electric.Shapes.Filter.WhereCondition alias Electric.Shapes.Shape - alias Electric.Shapes.WhereClause alias Electric.Telemetry.OpenTelemetry require Logger @@ -34,7 +33,7 @@ defmodule Electric.Shapes.Filter do :where_cond_table, :eq_index_table, :incl_index_table, - :subquery_shapes_table, + :subquery_cond_table, :subquery_index ] @@ -49,7 +48,7 @@ defmodule Electric.Shapes.Filter do where_cond_table: :ets.new(:filter_where, [:set, :private]), eq_index_table: :ets.new(:filter_eq, [:set, :private]), incl_index_table: :ets.new(:filter_incl, [:set, :private]), - subquery_shapes_table: :ets.new(:filter_subquery, [:set, :private]), + subquery_cond_table: :ets.new(:filter_subquery_cond, [:set, :private]), subquery_index: SubqueryIndex.new(Keyword.take(opts, [:stack_id])) } end @@ -79,32 +78,32 @@ defmodule Electric.Shapes.Filter do where_cond_id = get_or_create_table_condition(filter, shape.root_table) WhereCondition.add_shape(filter, where_cond_id, shape_id, shape.where) - maybe_track_subquery_shape(filter, shape_id, shape) + maybe_register_subquery_shape(filter, shape_id, shape) filter end - defp maybe_track_subquery_shape( - %Filter{subquery_shapes_table: table, subquery_index: index}, + defp maybe_register_subquery_shape( + %Filter{subquery_index: index}, shape_id, %Shape{shape_dependencies: [_ | _], root_table: root_table} = shape ) do - :ets.insert(table, {{root_table, shape_id}, true}) - maybe_register_subquery_index(index, shape_id, root_table, shape) - end - - defp maybe_track_subquery_shape(_filter, _shape_id, _shape), do: :ok - - defp maybe_register_subquery_index(index, shape_id, root_table, shape) do - case DnfPlan.compile(shape) do - {:ok, plan} -> - SubqueryIndex.register_shape(index, shape_id, root_table, plan) + try do + case DnfPlan.compile(shape) do + {:ok, plan} -> + SubqueryIndex.register_shape(index, shape_id, root_table, plan) + _ -> + SubqueryIndex.register_fallback_shape(index, shape_id, root_table) + end + rescue _ -> - :ok + SubqueryIndex.register_fallback_shape(index, shape_id, root_table) end end + defp maybe_register_subquery_shape(_filter, _shape_id, _shape), do: :ok + defp get_or_create_table_condition(filter, table_name) do case :ets.lookup(filter.tables_table, table_name) do [] -> @@ -133,22 +132,21 @@ defmodule Electric.Shapes.Filter do :ok -> :ok end - maybe_untrack_subquery_shape(filter, shape_id, shape) + maybe_unregister_subquery_shape(filter, shape_id, shape) :ets.delete(filter.shapes_table, shape_id) filter end - defp maybe_untrack_subquery_shape( - %Filter{subquery_shapes_table: table, subquery_index: index}, + defp maybe_unregister_subquery_shape( + %Filter{subquery_index: index}, shape_id, %Shape{shape_dependencies: [_ | _], root_table: root_table} ) do - :ets.delete(table, {root_table, shape_id}) SubqueryIndex.unregister_shape(index, shape_id, root_table) end - defp maybe_untrack_subquery_shape(_filter, _shape_id, _shape), do: :ok + defp maybe_unregister_subquery_shape(_filter, _shape_id, _shape), do: :ok @doc """ Returns the shape IDs for all shapes that have been added to the filter @@ -211,73 +209,25 @@ defmodule Electric.Shapes.Filter do end defp shapes_affected_by_record(filter, table_name, record) do - from_where_condition = + candidates_from_where_condition = case :ets.lookup(filter.tables_table, table_name) do [] -> MapSet.new() [{_, where_cond_id}] -> - WhereCondition.affected_shapes(filter, where_cond_id, record) - end - - from_subquery_index = - subquery_index_candidates(filter, table_name, record) - - MapSet.union(from_where_condition, from_subquery_index) - end - - # Use the reverse index to find candidate subquery shapes, then verify - # each candidate against the full WHERE clause for exact correctness. - # Fallback shapes (not yet seeded) pass through without verification. - defp subquery_index_candidates( - %Filter{subquery_index: index} = filter, - table_name, - record - ) do - candidates = SubqueryIndex.candidates_for_record(index, table_name, record) - - if MapSet.size(candidates) == 0 do - MapSet.new() - else - fallbacks = SubqueryIndex.fallback_shapes(index, table_name) - - # Verify non-fallback candidates against the full WHERE clause - for shape_id <- candidates, - # Fallback shapes pass through without verification - MapSet.member?(fallbacks, shape_id) or - verify_candidate(filter, shape_id, record), - into: MapSet.new() do - shape_id + WhereCondition.affected_shapes(filter, where_cond_id, table_name, record) end - end - end - - defp verify_candidate(%Filter{subquery_index: index} = filter, shape_id, record) do - case get_shape(filter, shape_id) do - nil -> - false - shape -> - subquery_member? = WhereClause.subquery_member_from_index(index, shape_id) - WhereClause.includes_record?(shape.where, record, subquery_member?) - end + candidates_from_where_condition end defp all_shape_ids(%Filter{} = filter) do - from_where_conditions = - :ets.foldl( - fn {_table_name, where_cond_id}, acc -> - MapSet.union(acc, WhereCondition.all_shape_ids(filter, where_cond_id)) - end, - MapSet.new(), - filter.tables_table - ) - - MapSet.union( - from_where_conditions, - filter.subquery_shapes_table - |> :ets.select([{{{:_, :"$1"}, :_}, [], [:"$1"]}]) - |> MapSet.new() + :ets.foldl( + fn {_table_name, where_cond_id}, acc -> + MapSet.union(acc, WhereCondition.all_shape_ids(filter, where_cond_id)) + end, + MapSet.new(), + filter.tables_table ) end @@ -288,13 +238,7 @@ defmodule Electric.Shapes.Filter do [{_, where_cond_id}] -> WhereCondition.all_shape_ids(filter, where_cond_id) end - MapSet.union(from_where_condition, subquery_shape_ids_for_table(filter, table_name)) - end - - defp subquery_shape_ids_for_table(%Filter{subquery_shapes_table: table}, table_name) do - table - |> :ets.select([{{{table_name, :"$1"}, :_}, [], [:"$1"]}]) - |> MapSet.new() + from_where_condition end @doc """ diff --git a/packages/sync-service/lib/electric/shapes/filter/index.ex b/packages/sync-service/lib/electric/shapes/filter/index.ex index 9ac79afcab..1078c87f6d 100644 --- a/packages/sync-service/lib/electric/shapes/filter/index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/index.ex @@ -10,9 +10,12 @@ defmodule Electric.Shapes.Filter.Index do alias Electric.Shapes.Filter alias Electric.Shapes.Filter.Indexes.EqualityIndex alias Electric.Shapes.Filter.Indexes.InclusionIndex + alias Electric.Shapes.Filter.Indexes.SubqueryIndex defp module_for("="), do: EqualityIndex defp module_for("@>"), do: InclusionIndex + defp module_for("subquery"), do: SubqueryIndex + defp module_for("$subquery"), do: SubqueryIndex # "in" delegates to EqualityIndex, registering the shape under each value def add_shape(%Filter{} = filter, where_cond_id, shape_id, %{operation: "in"} = optimisation) do @@ -54,8 +57,8 @@ defmodule Electric.Shapes.Filter.Index do module_for(op).remove_shape(filter, where_cond_id, shape_id, optimisation) end - def affected_shapes(%Filter{} = filter, where_cond_id, field, operation, record) do - module_for(operation).affected_shapes(filter, where_cond_id, field, record) + def affected_shapes(%Filter{} = filter, where_cond_id, table_name, field, operation, record) do + module_for(operation).affected_shapes(filter, where_cond_id, field, table_name, record) end def all_shape_ids(%Filter{} = filter, where_cond_id, field, operation) do diff --git a/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex b/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex index 421e375c95..90e0e43953 100644 --- a/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex @@ -68,17 +68,24 @@ defmodule Electric.Shapes.Filter.Indexes.EqualityIndex do end end - def affected_shapes(%Filter{eq_index_table: table} = filter, condition_id, field, record) do + def affected_shapes( + %Filter{eq_index_table: table} = filter, + condition_id, + field, + table_name, + record + ) do case :ets.lookup(table, {:type, condition_id, field}) do [] -> MapSet.new() - [{_, type}] -> affected_shapes_for_type(filter, table, condition_id, field, record, type) + [{_, type}] -> + affected_shapes_for_type(filter, table, condition_id, field, table_name, record, type) end end - defp affected_shapes_for_type(filter, table, condition_id, field, record, type) do + defp affected_shapes_for_type(filter, table, condition_id, field, table_name, record, type) do case value_from_record(record, field, type) do {:ok, value} -> - affected_shapes_for_value(filter, table, condition_id, field, value, record) + affected_shapes_for_value(filter, table, condition_id, field, value, table_name, record) :error -> raise RuntimeError, @@ -86,13 +93,13 @@ defmodule Electric.Shapes.Filter.Indexes.EqualityIndex do end end - defp affected_shapes_for_value(filter, table, condition_id, field, value, record) do + defp affected_shapes_for_value(filter, table, condition_id, field, value, table_name, record) do case :ets.lookup(table, {condition_id, field, value}) do [] -> MapSet.new() [{_, {_type, next_condition_id}}] -> - WhereCondition.affected_shapes(filter, next_condition_id, record) + WhereCondition.affected_shapes(filter, next_condition_id, table_name, record, false) end end diff --git a/packages/sync-service/lib/electric/shapes/filter/indexes/inclusion_index.ex b/packages/sync-service/lib/electric/shapes/filter/indexes/inclusion_index.ex index f6ba03ec8e..6a9d34051f 100644 --- a/packages/sync-service/lib/electric/shapes/filter/indexes/inclusion_index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/indexes/inclusion_index.ex @@ -215,7 +215,13 @@ defmodule Electric.Shapes.Filter.Indexes.InclusionIndex do defp node_empty?(%{keys: [], condition_id: nil}), do: true defp node_empty?(_), do: false - def affected_shapes(%Filter{incl_index_table: table} = filter, condition_id, field, record) do + def affected_shapes( + %Filter{incl_index_table: table} = filter, + condition_id, + field, + table_name, + record + ) do case :ets.lookup(table, {:type, condition_id, field}) do [] -> MapSet.new() @@ -233,6 +239,7 @@ defmodule Electric.Shapes.Filter.Indexes.InclusionIndex do table: table, condition_id: condition_id, field: field, + table_name: table_name, record: record } @@ -264,7 +271,7 @@ defmodule Electric.Shapes.Filter.Indexes.InclusionIndex do defp shapes_affected_by_node(_ctx, %{condition_id: nil}), do: nil defp shapes_affected_by_node(ctx, %{condition_id: condition_id}) do - WhereCondition.affected_shapes(ctx.filter, condition_id, ctx.record) + WhereCondition.affected_shapes(ctx.filter, condition_id, ctx.table_name, ctx.record, false) end # key matches value, so check the child then continue with the rest diff --git a/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex b/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex new file mode 100644 index 0000000000..5721dbdc4c --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex @@ -0,0 +1,103 @@ +defmodule Electric.Shapes.Filter.Indexes.SubqueryIndex do + @moduledoc """ + Routes shapes whose current WhereCondition node is a pure subquery predicate. + + Storage is kept in `subquery_cond_table` as: + + - `{{condition_id, shape_id}, next_condition_id}` + - `{:count, condition_id} -> non_neg_integer()` + + The actual subquery membership and candidate lookup live in + `Electric.Shapes.Filter.SubqueryIndex`; this module just ties those candidates + into the `WhereCondition` tree. + """ + + alias Electric.Shapes.Filter + alias Electric.Shapes.Filter.SubqueryIndex, as: ReverseIndex + alias Electric.Shapes.Filter.WhereCondition + + @spec add_shape(Filter.t(), reference(), term(), map()) :: :ok + def add_shape(%Filter{subquery_cond_table: table} = filter, condition_id, shape_id, optimisation) do + next_condition_id = make_ref() + WhereCondition.init(filter, next_condition_id) + WhereCondition.add_shape(filter, next_condition_id, shape_id, optimisation.and_where) + + :ets.insert(table, {{condition_id, shape_id}, next_condition_id}) + increment_count(table, condition_id) + :ok + end + + @spec remove_shape(Filter.t(), reference(), term(), map()) :: :deleted | :ok + def remove_shape(%Filter{subquery_cond_table: table} = filter, condition_id, shape_id, optimisation) do + case :ets.lookup(table, {condition_id, shape_id}) do + [] -> + :deleted + + [{_, next_condition_id}] -> + _ = WhereCondition.remove_shape(filter, next_condition_id, shape_id, optimisation.and_where) + :ets.delete(table, {condition_id, shape_id}) + + if decrement_count(table, condition_id) == 0 do + :deleted + else + :ok + end + end + end + + @spec affected_shapes(Filter.t(), reference(), term(), String.t(), map()) :: MapSet.t() + def affected_shapes( + %Filter{subquery_cond_table: table, subquery_index: reverse_index} = filter, + condition_id, + _field, + table_name, + record + ) do + candidates = ReverseIndex.candidates_for_record(reverse_index, table_name, record) + + table + |> :ets.match({{condition_id, :"$1"}, :"$2"}) + |> Enum.reduce(MapSet.new(), fn [shape_id, next_condition_id], acc -> + if MapSet.member?(candidates, shape_id) do + MapSet.union( + acc, + WhereCondition.affected_shapes(filter, next_condition_id, table_name, record, false) + ) + else + acc + end + end) + end + + @spec all_shape_ids(Filter.t(), reference(), term()) :: MapSet.t() + def all_shape_ids(%Filter{subquery_cond_table: table} = filter, condition_id, _field) do + table + |> :ets.match({{condition_id, :"$1"}, :"$2"}) + |> Enum.reduce(MapSet.new(), fn [_shape_id, next_condition_id], acc -> + MapSet.union(acc, WhereCondition.all_shape_ids(filter, next_condition_id)) + end) + end + + defp increment_count(table, condition_id) do + count_key = {:count, condition_id} + + case :ets.lookup(table, count_key) do + [] -> :ets.insert(table, {count_key, 1}) + [{_, count}] -> :ets.insert(table, {count_key, count + 1}) + end + end + + defp decrement_count(table, condition_id) do + count_key = {:count, condition_id} + [{_, count}] = :ets.lookup(table, count_key) + new_count = count - 1 + + if new_count == 0 do + :ets.delete(table, count_key) + else + :ets.insert(table, {count_key, new_count}) + end + + new_count + end +end diff --git a/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex index 47caf48837..873c04ebb7 100644 --- a/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex @@ -7,17 +7,16 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do ## ETS key patterns - - Candidate lookup: `{{root_table, position_id, typed_value}, shape_handle}` + - Candidate lookup: `{{root_table, position_id, typed_value}, true}` - Exact membership (for callback-based evaluation): - `{{shape_handle, subquery_ref, typed_value}, true}` + `{{:membership, shape_handle, subquery_ref, typed_value}, true}` - Per-position metadata: `{{:position_meta, root_table, position_id}, %{...}}` - - Per-position registered handles: - `{{root_table, position_id, :positive}, shape_handle}` - `{{root_table, position_id, :negated}, shape_handle}` - - Per-shape positions (for teardown): - `{{:shape_positions, shape_handle}, [{root_table, position_id}, ...]}` - - Fallback shapes (not yet seeded by consumer): + - Per-table registered positions: + `{{:table_position, root_table}, position_id}` + - Per-shape positions: + `{{:shape_positions, shape_handle}, [position_id, ...]}` + - Fallback shapes (not yet seeded or not indexable): `{{:fallback, root_table, shape_handle}, true}` """ @@ -26,6 +25,7 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do alias Electric.Shapes.DnfPlan @type t :: :ets.tid() + @type position_id :: {term(), non_neg_integer()} @doc """ Create a new SubqueryIndex ETS table. @@ -61,9 +61,8 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do @doc """ Register a shape's subquery positions from a compiled DnfPlan. - Extracts every subquery position that can be evaluated from the - root-table record and registers its static metadata and shape-handle - polarity in the index. + Each position id is scoped to the shape so shapes on the same root table do + not collide just because their DNF plans both have a position `0`. """ @spec register_shape(t(), term(), term(), DnfPlan.t()) :: :ok def register_shape(table, shape_handle, root_table, %DnfPlan{} = plan) do @@ -71,10 +70,10 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do plan.positions |> Enum.filter(fn {_pos, info} -> info.is_subquery end) |> Enum.map(fn {pos, info} -> - # Register position metadata (idempotent - :bag will add duplicates, but - # we look up by match so this is fine; we deduplicate on read) + position_id = {shape_handle, pos} + :ets.insert(table, { - {:position_meta, root_table, pos}, + {:position_meta, root_table, position_id}, %{ ast: info.ast, dependency_index: info.dependency_index, @@ -83,52 +82,43 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do } }) - # Register shape handle under the appropriate polarity - polarity = if info.negated, do: :negated, else: :positive - :ets.insert(table, {{root_table, pos, polarity}, shape_handle}) - - {root_table, pos} + :ets.insert(table, {{:table_position, root_table}, position_id}) + position_id end) - # Store shape positions for teardown :ets.insert(table, {{:shape_positions, shape_handle}, positions}) - - # Register as fallback until consumer seeds membership :ets.insert(table, {{:fallback, root_table, shape_handle}, true}) :ok end + @doc """ + Register a shape on the conservative fallback path without any indexed + positions. Used for shapes whose subquery positions are not indexable. + """ + @spec register_fallback_shape(t(), term(), term()) :: :ok + def register_fallback_shape(table, shape_handle, root_table) do + :ets.insert_new(table, {{:shape_positions, shape_handle}, []}) + :ets.insert(table, {{:fallback, root_table, shape_handle}, true}) + :ok + end + @doc """ Unregister a shape from the index. - Removes all reverse-index rows, polarity registrations, - position metadata (if no other shapes use it), and fallback entries. + Removes all reverse-index rows, exact membership entries, static position + metadata, and fallback/buffering flags for the shape. """ @spec unregister_shape(t(), term(), term()) :: :ok def unregister_shape(table, shape_handle, root_table) do - # Get shape positions - positions = - case :ets.lookup(table, {:shape_positions, shape_handle}) do - [{_, pos_list}] -> pos_list - [] -> [] - end - - # Remove polarity registrations and candidate lookup entries - for {rt, pos} <- positions do - :ets.match_delete(table, {{rt, pos, :positive}, shape_handle}) - :ets.match_delete(table, {{rt, pos, :negated}, shape_handle}) - # Remove candidate lookup entries for this shape - :ets.match_delete(table, {{rt, pos, :_}, shape_handle}) + for position_id <- positions_for_shape(table, shape_handle) do + :ets.match_delete(table, {{root_table, position_id, :_}, :_}) + :ets.delete(table, {:position_meta, root_table, position_id}) + :ets.match_delete(table, {{:table_position, root_table}, position_id}) end - # Remove exact membership entries - :ets.match_delete(table, {{shape_handle, :_, :_}, true}) - - # Remove shape positions + :ets.match_delete(table, {{:membership, shape_handle, :_, :_}, true}) :ets.delete(table, {:shape_positions, shape_handle}) - - # Remove fallback entry :ets.delete(table, {:fallback, root_table, shape_handle}) :ok @@ -142,26 +132,21 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do """ @spec seed_membership(t(), term(), term(), [String.t()], non_neg_integer(), MapSet.t()) :: :ok def seed_membership(table, shape_handle, root_table, subquery_ref, dep_index, view) do - # Find all positions for this dependency index on this root table - positions = positions_for_dependency(table, root_table, dep_index) + positions = positions_for_shape_dependency(table, shape_handle, root_table, dep_index) - # Add candidate lookup entries for each position - for {_rt, pos} <- positions, value <- view do - :ets.insert(table, {{root_table, pos, value}, shape_handle}) + for position_id <- positions, value <- view do + add_candidate_value(table, root_table, position_id, value) end - # Add exact membership entries keyed by canonical subquery_ref for value <- view do - :ets.insert(table, {{shape_handle, subquery_ref, value}, true}) + add_exact_member(table, shape_handle, subquery_ref, value) end :ok end @doc """ - Mark a shape as ready (remove from fallback set). - - Called after the consumer has seeded all its dependency views. + Mark a shape as ready for indexed routing. """ @spec mark_ready(t(), term(), term()) :: :ok def mark_ready(table, shape_handle, root_table) do @@ -170,32 +155,70 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do end @doc """ - Add a value to the reverse index for a specific shape and dependency. + Add a value to both the routing index and the exact membership set for a + specific shape dependency. """ @spec add_value(t(), term(), term(), [String.t()], non_neg_integer(), term()) :: :ok def add_value(table, shape_handle, root_table, subquery_ref, dep_index, value) do - positions = positions_for_dependency(table, root_table, dep_index) + positions = positions_for_shape_dependency(table, shape_handle, root_table, dep_index) - for {_rt, pos} <- positions do - :ets.insert(table, {{root_table, pos, value}, shape_handle}) + for position_id <- positions do + add_candidate_value(table, root_table, position_id, value) end - :ets.insert(table, {{shape_handle, subquery_ref, value}, true}) + add_exact_member(table, shape_handle, subquery_ref, value) :ok end @doc """ - Remove a value from the reverse index for a specific shape and dependency. + Remove a value from both the routing index and the exact membership set for a + specific shape dependency. """ @spec remove_value(t(), term(), term(), [String.t()], non_neg_integer(), term()) :: :ok def remove_value(table, shape_handle, root_table, subquery_ref, dep_index, value) do - positions = positions_for_dependency(table, root_table, dep_index) + positions = positions_for_shape_dependency(table, shape_handle, root_table, dep_index) - for {_rt, pos} <- positions do - :ets.match_delete(table, {{root_table, pos, value}, shape_handle}) + for position_id <- positions do + remove_candidate_value(table, root_table, position_id, value) end - :ets.match_delete(table, {{shape_handle, subquery_ref, value}, true}) + remove_exact_member(table, shape_handle, subquery_ref, value) + :ok + end + + @doc """ + Add a value to one concrete routing position. + """ + @spec add_candidate_value(t(), term(), position_id(), term()) :: :ok + def add_candidate_value(table, root_table, position_id, value) do + :ets.insert(table, {{root_table, position_id, value}, true}) + :ok + end + + @doc """ + Remove a value from one concrete routing position. + """ + @spec remove_candidate_value(t(), term(), position_id(), term()) :: :ok + def remove_candidate_value(table, root_table, position_id, value) do + :ets.delete(table, {root_table, position_id, value}) + :ok + end + + @doc """ + Add one exact membership value for callback-based verification. + """ + @spec add_exact_member(t(), term(), [String.t()], term()) :: :ok + def add_exact_member(table, shape_handle, subquery_ref, value) do + :ets.insert(table, {{:membership, shape_handle, subquery_ref, value}, true}) + :ok + end + + @doc """ + Remove one exact membership value for callback-based verification. + """ + @spec remove_exact_member(t(), term(), [String.t()], term()) :: :ok + def remove_exact_member(table, shape_handle, subquery_ref, value) do + :ets.delete(table, {:membership, shape_handle, subquery_ref, value}) :ok end @@ -203,28 +226,23 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do Get candidate shape handles for a record change on a root table. Evaluates the left-hand comparison expression for each registered - subquery position against the record, looks up matching handles, - and derives candidates for both positive and negated positions. - - Returns the union of all candidate handles plus any fallback shapes. + subquery position against the record, looks up matching candidates, + and derives the candidate set for both positive and negated positions. """ @spec candidates_for_record(t(), term(), map()) :: MapSet.t() def candidates_for_record(table, root_table, record) do - positions = registered_positions(table, root_table) - indexed_candidates = - Enum.reduce(positions, MapSet.new(), fn {pos, meta}, acc -> - case evaluate_position_lhs(meta, record) do - {:ok, typed_value} -> - positive = positive_candidates(table, root_table, pos, typed_value) - negated = negated_candidates(table, root_table, pos, typed_value) - acc |> MapSet.union(positive) |> MapSet.union(negated) - - :error -> - # Can't evaluate - include all shapes for this position as candidates - all_position_shapes(table, root_table, pos) - |> MapSet.union(acc) - end + Enum.reduce(registered_positions(table, root_table), MapSet.new(), fn {position_id, meta}, acc -> + candidates = + case evaluate_position_lhs(meta, record) do + {:ok, typed_value} -> + candidates_for_position(table, root_table, position_id, typed_value, meta) + + :error -> + MapSet.new([shape_handle_from_position(position_id)]) + end + + MapSet.union(acc, candidates) end) MapSet.union(indexed_candidates, fallback_shapes(table, root_table)) @@ -233,12 +251,10 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do @doc """ Check if a specific shape has a value in its current dependency view for a canonical subquery ref. - - Used for callback-based exact verification in WhereClause evaluation. """ @spec member?(t(), term(), [String.t()], term()) :: boolean() def member?(table, shape_handle, subquery_ref, typed_value) do - :ets.member(table, {shape_handle, subquery_ref, typed_value}) + :ets.member(table, {:membership, shape_handle, subquery_ref, typed_value}) end @doc """ @@ -268,25 +284,57 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do :ets.member(table, {:shape_positions, shape_handle}) end + @doc """ + Return the registered positions for a shape. + """ + @spec positions_for_shape(t(), term()) :: [position_id()] + def positions_for_shape(table, shape_handle) do + case :ets.lookup(table, {:shape_positions, shape_handle}) do + [{_, positions}] -> positions + [] -> [] + end + end + # -- Private helpers -- defp registered_positions(table, root_table) do table - |> :ets.match({{:position_meta, root_table, :"$1"}, :"$2"}) - |> Enum.map(fn [pos, meta] -> {pos, meta} end) - |> Enum.uniq_by(fn {pos, _} -> pos end) + |> :ets.match({{:table_position, root_table}, :"$1"}) + |> List.flatten() + |> Enum.uniq() + |> Enum.flat_map(fn position_id -> + case :ets.lookup(table, {:position_meta, root_table, position_id}) do + [{_, meta}] -> [{position_id, meta}] + [] -> [] + end + end) end - defp positions_for_dependency(table, root_table, dep_index) do + defp positions_for_shape_dependency(table, shape_handle, root_table, dep_index) do table - |> :ets.match({{:position_meta, root_table, :"$1"}, :"$2"}) - |> Enum.filter(fn [_pos, meta] -> meta.dependency_index == dep_index end) - |> Enum.map(fn [pos, _meta] -> {root_table, pos} end) - |> Enum.uniq() + |> positions_for_shape(shape_handle) + |> Enum.filter(fn position_id -> + case :ets.lookup(table, {:position_meta, root_table, position_id}) do + [{_, %{dependency_index: ^dep_index}}] -> true + _ -> false + end + end) + end + + defp candidates_for_position(table, root_table, position_id, typed_value, meta) do + shape_handle = shape_handle_from_position(position_id) + matched? = :ets.member(table, {root_table, position_id, typed_value}) + + case {meta.polarity, matched?} do + {:positive, true} -> MapSet.new([shape_handle]) + {:negated, false} -> MapSet.new([shape_handle]) + _ -> MapSet.new() + end end + defp shape_handle_from_position({shape_handle, _pos}), do: shape_handle + defp evaluate_position_lhs(meta, record) do - # Extract the test expression from the sublink_membership_check AST testexpr = extract_testexpr(meta.ast) expr = Expr.wrap_parser_part(testexpr) @@ -303,62 +351,4 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do end defp extract_testexpr(%{name: "sublink_membership_check", args: [testexpr, _]}), do: testexpr - - defp positive_candidates(table, root_table, pos, typed_value) do - # Lookup shapes that have this value in their positive membership - matching = - table - |> :ets.lookup({root_table, pos, typed_value}) - |> Enum.map(fn {_, shape_handle} -> shape_handle end) - |> MapSet.new() - - # Only include shapes that are registered as positive for this position - positive_shapes = - table - |> :ets.lookup({root_table, pos, :positive}) - |> Enum.map(fn {_, shape_handle} -> shape_handle end) - |> MapSet.new() - - MapSet.intersection(matching, positive_shapes) - end - - defp negated_candidates(table, root_table, pos, typed_value) do - # All shapes registered as negated for this position - all_negated = - table - |> :ets.lookup({root_table, pos, :negated}) - |> Enum.map(fn {_, shape_handle} -> shape_handle end) - |> MapSet.new() - - if MapSet.size(all_negated) == 0 do - MapSet.new() - else - # Shapes that have this value in their membership (which means the - # negated predicate is FALSE for them - the value IS in the subquery) - matching = - table - |> :ets.lookup({root_table, pos, typed_value}) - |> Enum.map(fn {_, shape_handle} -> shape_handle end) - |> MapSet.new() - - # Negated candidates = all negated shapes MINUS those that match - MapSet.difference(all_negated, matching) - end - end - - defp all_position_shapes(table, root_table, pos) do - positive = - table - |> :ets.lookup({root_table, pos, :positive}) - |> Enum.map(fn {_, shape_handle} -> shape_handle end) - |> MapSet.new() - - negated = - table - |> :ets.lookup({root_table, pos, :negated}) - |> Enum.map(fn {_, shape_handle} -> shape_handle end) - |> MapSet.new() - - MapSet.union(positive, negated) - end end diff --git a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex index 6fbe2ffd9f..d1bf500e13 100644 --- a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex +++ b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex @@ -18,8 +18,10 @@ defmodule Electric.Shapes.Filter.WhereCondition do alias Electric.Replication.Eval.Parser.Const alias Electric.Replication.Eval.Parser.Func alias Electric.Replication.Eval.Parser.Ref + alias Electric.Replication.Eval.Parser.RowExpr alias Electric.Shapes.Filter alias Electric.Shapes.Filter.Index + alias Electric.Shapes.Filter.SubqueryIndex alias Electric.Shapes.WhereClause alias Electric.Telemetry.OpenTelemetry @@ -103,6 +105,22 @@ defmodule Electric.Shapes.Filter.WhereCondition do %{operation: "@>", field: field, type: type, value: [value], and_where: nil} end + defp optimise_where(%Func{name: "sublink_membership_check"} = subquery) do + if simple_subquery_testexpr?(subquery) do + %{operation: "subquery", field: "$subquery", and_where: nil} + else + :not_optimised + end + end + + defp optimise_where(%Func{name: "not", args: [%Func{name: "sublink_membership_check"} = subquery]}) do + if simple_subquery_testexpr?(subquery) do + %{operation: "subquery", field: "$subquery", and_where: nil} + else + :not_optimised + end + end + # field IN (const1, const2, ...) → reuse = index with multiple values defp optimise_where(%Func{name: "or"} = expr) do case flatten_or_equalities(expr) do @@ -116,10 +134,10 @@ defmodule Electric.Shapes.Filter.WhereCondition do defp optimise_where(%Func{name: "and", args: [arg1, arg2]}) do case {optimise_where(arg1), optimise_where(arg2)} do - {%{operation: op, and_where: nil} = params, _} when op in ["=", "@>", "in"] -> + {%{operation: op, and_where: nil} = params, _} when op in ["=", "@>", "in", "subquery"] -> %{params | and_where: where_expr(arg2)} - {_, %{operation: op, and_where: nil} = params} when op in ["=", "@>", "in"] -> + {_, %{operation: op, and_where: nil} = params} when op in ["=", "@>", "in", "subquery"] -> %{params | and_where: where_expr(arg1)} _ -> @@ -131,6 +149,7 @@ defmodule Electric.Shapes.Filter.WhereCondition do # "in" shares the EqualityIndex with "=", so use the same index key defp index_key("in"), do: "=" + defp index_key("subquery"), do: "$subquery" defp index_key(op), do: op defp where_expr(eval) do @@ -232,11 +251,22 @@ defmodule Electric.Shapes.Filter.WhereCondition do :ok end - def affected_shapes(%Filter{where_cond_table: table} = filter, condition_id, record) do - MapSet.union( - indexed_shapes_affected(filter, condition_id, record), - other_shapes_affected(filter, table, condition_id, record) - ) + def affected_shapes(%Filter{} = filter, condition_id, table_name, record) do + affected_shapes(filter, condition_id, table_name, record, true) + end + + def affected_shapes(%Filter{where_cond_table: table} = filter, condition_id, table_name, record, include_fallback?) do + affected = + MapSet.union( + indexed_shapes_affected(filter, condition_id, table_name, record), + other_shapes_affected(filter, table, condition_id, table_name, record) + ) + + if include_fallback? do + MapSet.union(affected, SubqueryIndex.fallback_shapes(filter.subquery_index, table_name)) + else + affected + end rescue error -> Logger.error(""" @@ -248,7 +278,12 @@ defmodule Electric.Shapes.Filter.WhereCondition do all_shape_ids(filter, condition_id) end - defp indexed_shapes_affected(%Filter{where_cond_table: table} = filter, condition_id, record) do + defp indexed_shapes_affected( + %Filter{where_cond_table: table} = filter, + condition_id, + table_name, + record + ) do OpenTelemetry.with_child_span( "filter.filter_using_indexes", [], @@ -258,7 +293,7 @@ defmodule Electric.Shapes.Filter.WhereCondition do index_keys |> Enum.map(fn {field, operation} -> - Index.affected_shapes(filter, condition_id, field, operation, record) + Index.affected_shapes(filter, condition_id, table_name, field, operation, record) end) |> Enum.reduce(MapSet.new(), &MapSet.union(&1, &2)) end @@ -269,6 +304,7 @@ defmodule Electric.Shapes.Filter.WhereCondition do %Filter{subquery_index: index} = _filter, table, condition_id, + _table_name, record ) do [{_, {_index_keys, other_shapes}}] = :ets.lookup(table, condition_id) @@ -278,11 +314,7 @@ defmodule Electric.Shapes.Filter.WhereCondition do [shape_count: map_size(other_shapes)], fn -> for {shape_id, where} <- other_shapes, - WhereClause.includes_record?( - where, - record, - WhereClause.subquery_member_from_index(index, shape_id) - ), + other_shape_matches?(index, shape_id, where, record), into: MapSet.new() do shape_id end @@ -290,6 +322,25 @@ defmodule Electric.Shapes.Filter.WhereCondition do ) end + defp other_shape_matches?(index, shape_id, where, record) do + case WhereClause.includes_record_result( + where, + record, + WhereClause.subquery_member_from_index(index, shape_id) + ) do + {:ok, included?} -> included? + :error -> true + end + end + + defp simple_subquery_testexpr?(%Func{name: "sublink_membership_check", args: [testexpr, _ref]}) do + case testexpr do + %Ref{path: [_field]} -> true + %RowExpr{elements: elements} -> Enum.all?(elements, &match?(%Ref{path: [_]}, &1)) + _ -> false + end + end + def all_shape_ids(%Filter{where_cond_table: table} = filter, condition_id) do case :ets.lookup(table, condition_id) do [] -> diff --git a/packages/sync-service/lib/electric/shapes/where_clause.ex b/packages/sync-service/lib/electric/shapes/where_clause.ex index 83cd53b803..32e7a98804 100644 --- a/packages/sync-service/lib/electric/shapes/where_clause.ex +++ b/packages/sync-service/lib/electric/shapes/where_clause.ex @@ -3,6 +3,25 @@ defmodule Electric.Shapes.WhereClause do alias Electric.Replication.Eval.Runner alias Electric.Shapes.Filter.SubqueryIndex + @spec includes_record_result( + Electric.Replication.Eval.Expr.t() | nil, + map(), + ([String.t()], term() -> boolean()) + ) :: {:ok, boolean()} | :error + def includes_record_result(where_clause, record, subquery_member? \\ fn _, _ -> false end) + def includes_record_result(nil = _where_clause, _record, _), do: {:ok, true} + + def includes_record_result(where_clause, record, subquery_member?) + when is_function(subquery_member?, 2) do + with {:ok, refs} <- Runner.record_to_ref_values(where_clause.used_refs, record), + {:ok, evaluated} <- + Runner.execute(where_clause, refs, subquery_member?: subquery_member?) do + {:ok, not is_nil(evaluated) and evaluated != false} + else + _ -> :error + end + end + @spec includes_record?(Electric.Replication.Eval.Expr.t() | nil, map(), ([String.t()], term() -> boolean())) :: boolean() @@ -11,12 +30,9 @@ defmodule Electric.Shapes.WhereClause do def includes_record?(where_clause, record, subquery_member?) when is_function(subquery_member?, 2) do - with {:ok, refs} <- Runner.record_to_ref_values(where_clause.used_refs, record), - {:ok, evaluated} <- - Runner.execute(where_clause, refs, subquery_member?: subquery_member?) do - if is_nil(evaluated), do: false, else: evaluated - else - _ -> false + case includes_record_result(where_clause, record, subquery_member?) do + {:ok, included?} -> included? + :error -> false end end diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index 774eef812c..6676104d3d 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -521,7 +521,7 @@ defmodule Electric.Shapes.FilterTest do where_cond: :ets.tab2list(filter.where_cond_table) |> Enum.sort(), eq_index: :ets.tab2list(filter.eq_index_table) |> Enum.sort(), incl_index: :ets.tab2list(filter.incl_index_table) |> Enum.sort(), - subquery_shapes: :ets.tab2list(filter.subquery_shapes_table) |> Enum.sort(), + subquery_cond: :ets.tab2list(filter.subquery_cond_table) |> Enum.sort(), subquery_index: :ets.tab2list(filter.subquery_index) |> Enum.sort() } end @@ -1016,5 +1016,113 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, update_into_shape) == MapSet.new(["shape1"]) end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" + ] + test "seeded subquery shape reached via non-subquery indexes is still verified against the full predicate", + %{inspector: inspector} do + {:ok, shape} = + Shape.new("child", + inspector: inspector, + where: "par_id = 7 AND id IN (SELECT id FROM parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape.root_table + + Filter.SubqueryIndex.add_value(index, "shape1", root_table, subquery_ref, 0, 1) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) + + wrong_subquery_value = %NewRecord{ + relation: {"public", "child"}, + record: %{"id" => "99", "par_id" => "7"} + } + + assert Filter.affected_shapes(filter, wrong_subquery_value) == MapSet.new([]) + + matching_record = %NewRecord{ + relation: {"public", "child"}, + record: %{"id" => "1", "par_id" => "7"} + } + + assert Filter.affected_shapes(filter, matching_record) == MapSet.new(["shape1"]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" + ] + test "subquery positions are isolated per shape even when DNF positions overlap", %{ + inspector: inspector + } do + {:ok, shape1} = + Shape.new("child", + inspector: inspector, + where: "id IN (SELECT id FROM parent)" + ) + + {:ok, shape2} = + Shape.new("child", + inspector: inspector, + where: "par_id IN (SELECT id FROM parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape1) + |> Filter.add_shape("shape2", shape2) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape1.root_table + + Filter.SubqueryIndex.add_value(index, "shape1", root_table, subquery_ref, 0, 1) + Filter.SubqueryIndex.add_value(index, "shape2", root_table, subquery_ref, 0, 1) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) + Filter.SubqueryIndex.mark_ready(index, "shape2", root_table) + + change = %NewRecord{ + relation: {"public", "child"}, + record: %{"id" => "50", "par_id" => "1"} + } + + assert Filter.affected_shapes(filter, change) == MapSet.new(["shape2"]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" + ] + test "remove_shape cleans up subquery index metadata and values", %{inspector: inspector} do + {:ok, shape} = + Shape.new("child", + inspector: inspector, + where: "id IN (SELECT id FROM parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape.root_table + + Filter.SubqueryIndex.add_value(index, "shape1", root_table, subquery_ref, 0, 1) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) + + assert :ets.tab2list(index) != [] + + Filter.remove_shape(filter, "shape1") + + assert :ets.tab2list(index) == [] + end end end From 2b29c2147fa0fc35e548bb543e6c6e80ad781648 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 17:01:56 +0000 Subject: [PATCH 47/63] Docs: update filter docs --- .../simple-subqueries-filter-plan.md | 473 +++--------------- .../sync-service/simple-subqueries-filter.md | 133 +++-- 2 files changed, 127 insertions(+), 479 deletions(-) diff --git a/packages/sync-service/simple-subqueries-filter-plan.md b/packages/sync-service/simple-subqueries-filter-plan.md index 7d5d825a4c..d0e285f0f8 100644 --- a/packages/sync-service/simple-subqueries-filter-plan.md +++ b/packages/sync-service/simple-subqueries-filter-plan.md @@ -4,438 +4,95 @@ Related: - `./simple-subqueries-filter.md` - `./simple-subqueries.md` - `./simple-subqueries-with-dnf-plan.md` +- `./finish-filter-plan.md` ## Goal -Replace the current "route every subquery shape on root-table changes" behaviour +Replace the old "route every subquery shape on root-table changes" behaviour with an ETS-backed reverse index that gives `Shapes.Filter` a small candidate set, while keeping the existing splice/buffering correctness model for subquery moves. -## Current Baseline +## Status: Implementation Complete -- `Filter.add_shape/3` already adds subquery shapes into `WhereCondition`, but - `Filter.shapes_affected_by_record/3` also unions in - `subquery_shape_ids_for_table/2`, so every subquery shape for the root table - is routed for every root-table row change. -- That `subquery_shapes_table` path is a temporary oversend hack, not the - desired fallback design. It currently oversends even after startup for shapes - whose root-table predicate should reject the row, including non-optimisable - `other_shapes` cases such as `OR + subquery`. -- `WhereCondition.other_shapes_affected/4` still evaluates subqueries by - calling `WhereClause.includes_record?/3` with `refs_fun.(shape)`, which means - filter-side evaluation needs access to full subquery views. -- `ShapeLogCollector` no longer wires `Materializer.get_all_as_refs/2` into the - production `EventRouter`, so the old `refs_fun` path is no longer a viable - runtime answer for exact filter-side subquery checks. The reverse-index - callback path is required to remove the oversend hack safely. -- The consumer subquery runtime already has the exact view timeline we need: - `Steady.views`, `Buffering.views_before_move`, and `Buffering.views_after_move`. -- `DnfPlan` already preserves the per-position metadata that matters for - subqueries, including repeated occurrences of the same dependency and - negation. That is a better source of filter metadata than the legacy - `shape.subquery_comparison_expressions` map. +All six implementation stages have been landed. The core reverse-index design +is live and the legacy always-route path has been removed. -## Design Choice +### What Was Built -The reverse index should store actual subquery membership, not the complement. +1. **`Electric.Shapes.Filter.SubqueryIndex`** — stack-scoped ETS table storing + candidate lookup entries, exact membership entries, position metadata, + fallback state, and per-shape teardown metadata. -Concretely: +2. **`Filter.add_shape/3`** compiles a `DnfPlan`, extracts indexable subquery + positions, and registers them in the `SubqueryIndex`. Non-indexable shapes + are registered as fallback shapes. -- for a positive predicate, candidates come from the matching membership rows -- for a negated predicate, candidates come from the registered negated shapes - for that position minus the matching membership rows +3. **Consumer runtime** (`Consumer.initialize_subquery_runtime/1`) seeds the + reverse index from current dependency views on startup, then maintains it + via `update_subquery_index_from_state_change/3` as dependency views move. -That is the key refinement to the proposal in -`simple-subqueries-filter.md`. It keeps the ETS data structure representable -and still yields the same safe candidate-set semantics during move buffering. +4. **Callback-based `WhereClause` evaluation** — filter-side exact verification + uses `SubqueryIndex.member?/3` instead of loading full `MapSet` views. -## Runtime Shape +5. **`Filter.shapes_affected_by_record/3`** uses reverse-index candidates plus + exact verification via `WhereCondition` and the callback-backed + `other_shapes` path. -We need three distinct things: +6. **Legacy always-route path removed** — the unconditional + `subquery_shape_ids_for_table/2` union is gone. Only the conservative + fallback set remains for startup/restore and unsupported positions. -1. static metadata for each subquery position on a root table -2. dynamic membership entries that consumers update as their dependency views - move -3. a conservative fallback for shapes whose reverse-index state is not ready - yet +## Canonical Design -## Stage 1: Add a Stack-Scoped Reverse Index Runtime +The reverse index is the single runtime truth for filter-side subquery +membership. There is no separate "routing projection" module. -Create a new module, for example `Electric.Shapes.Filter.SubqueryIndex`, with a -named public ETS table per stack. +- The reverse index stores positive membership (actual subquery view values). +- For positive predicates, candidates come from matching membership rows. +- For negated predicates, candidates come from registered negated shapes for + the position minus the matching membership rows (complement at read time). +- During buffering, the index stores a conservative visibility image: + - positive dependency: `before ∪ after` + - negated dependency: `before ∩ after` +- Exact final inclusion for streamed changes belongs to + `Shape.convert_change/3`. -Suggested storage shape: +This design was chosen over the originally proposed split-state approach +(separate exact membership and routing projection) because: -- candidate lookup entries: - - `{{root_table, position_id, typed_value}, shape_handle} -> true` -- exact membership entries for callback-based evaluation: - - `{{shape_handle, subquery_ref, typed_value}, true}` -- per-position metadata: - - `{root_table, position_id} -> %{expr: Expr.t(), dependency_index: non_neg_integer(), subquery_ref: [String.t()], polarity: :positive | :negated}` -- per-position registered handles: - - `{{root_table, position_id, :positive}, shape_handle} -> true` - - `{{root_table, position_id, :negated}, shape_handle} -> true` -- per-shape position metadata for teardown: - - `{shape_handle, :positions} -> [{root_table, position_id}, ...]` +- it keeps one runtime representation instead of two coupled ones +- it preserves the safety property: oversend is fine, undersend is not +- it avoids synchronization bugs between "exact" and "routing-only" state -Notes: +## ETS Storage Shape -- `position_id` should come from `DnfPlan` position metadata, not from - `shape.subquery_comparison_expressions`, because repeated dependency - occurrences can share a dependency handle but still need distinct filter - lookups. -- exact membership should be keyed by canonical `subquery_ref` / dependency - index, not by `position_id`. `WhereClause` evaluation only sees the - canonicalized sublink refs in the expression AST, so the callback can answer - "is this value in this shape's current dependency view?" but it does not know - which DNF position triggered the check. -- The table should be initialized in a stack-owned bootstrap path such as - `ShapeLogCollector.init/1`, not lazily inside a consumer, so the router and - the consumers agree on the table name from startup onward. +- candidate lookup: `{{root_table, position_id, typed_value}, true}` +- exact membership: `{{:membership, shape_handle, subquery_ref, typed_value}, true}` +- position metadata: `{{:position_meta, root_table, position_id}, %{ast, dependency_index, subquery_ref, polarity}}` +- table positions: `{{:table_position, root_table}, position_id}` +- shape positions: `{{:shape_positions, shape_handle}, [position_id, ...]}` +- fallback: `{{:fallback, root_table, shape_handle}, true}` -## Stage 2: Register Static Position Metadata in the Filter +## Remaining Work -Touch points: +See `./finish-filter-plan.md` for the detailed remaining task list. In summary: -- `lib/electric/shapes/filter.ex` -- `lib/electric/shapes/dnf_plan.ex` (read-only dependency) -- new `lib/electric/shapes/filter/subquery_index.ex` +1. Fill missing filter unit coverage (negation, repeated positions, composite + keys, LIKE+subquery callback path, unsupported fallback). +2. Add direct `SubqueryIndex` ETS lifecycle tests. +3. Add process-level consumer index mutation tests. +4. Prove restore/startup safety with explicit tests. +5. Add end-to-end integration regressions (negated moves, repeated positions). -On `Filter.add_shape/3`: +## Key Modules -- detect shapes with dependencies -- compile `DnfPlan` for those shapes -- extract every subquery position that can be evaluated from the root-table - record -- register its static metadata and shape-handle membership in the - `SubqueryIndex` - -On `Filter.remove_shape/3`: - -- unregister the shape from the static position metadata -- delete all reverse-index rows for the shape - -Important fallback: - -- keep a conservative "not ready yet" set for subquery shapes -- `restore_shapes` currently adds shapes to the router before their consumer - exists -- until a consumer has seeded its dynamic membership rows, the filter must - continue routing that shape conservatively -- once a shape is ready, this fallback must stop oversending it; readiness - fallback is only for startup/restore and unsupported positions, not for - steady-state root-table routing - -That means the current `subquery_shapes_table` should not disappear -immediately. It should become a temporary "fallback subquery shapes" table and -only be retired once readiness handoff exists. - -## Stage 3: Seed and Maintain Dynamic Membership from the Consumer Runtime - -Touch points: - -- `lib/electric/shapes/consumer.ex` -- `lib/electric/shapes/consumer/subqueries.ex` -- `lib/electric/shapes/consumer/subqueries/steady.ex` -- `lib/electric/shapes/consumer/subqueries/buffering.ex` -- new pure helper such as - `lib/electric/shapes/filter/subquery_index_projection.ex` - -Architecture note: - -- keep the current split where `Subqueries.handle_event/2` is pure and returns - actions, and `Consumer.apply_subquery_event/3` is the place that performs - ETS writes / async queries / log writes -- do not have `Steady` or `Buffering` write into the reverse index directly -- instead, add a new action shape such as `{:subquery_index, ops}` or similar, - and let the consumer execute those ops after applying the subquery state - transition - -Add reverse-index state to the subquery runtime: - -- table name or module access -- the list of subquery positions for the outer shape -- enough metadata to map dependency-index changes onto position updates -- two logical projections: - - exact subquery views already kept in memory by the state machine - - a routing projection materialized into the filter index - -The routing projection is what should drive `Filter` candidate lookup: - -- in steady state, routing projection = current exact view -- while buffering one dependency move: - - positive positions use `before ∪ after` - - negated positions use `before ∩ after` as the matched-membership set that - the filter subtracts from `all_negated_handles(position)` - -That logic is sufficiently subtle that it should live in its own pure helper -module rather than being open-coded across `Steady`, `Buffering`, and -`Consumer`. - -During `initialize_subquery_runtime/1`: - -- compile the `DnfPlan` -- load the current dependency views from materializers as today -- seed reverse-index membership rows from those views before the shape is - treated as "ready" for indexed routing -- seed the routing projection by diffing from an empty projection and emitting - reverse-index ops through the normal action path - -For newly created shapes: - -- seed the reverse index before `ShapeLogCollector.add_shape/4` makes the shape - active in the router, so there is no under-routing window - -For restored shapes: - -- register them conservatively during `restore_shapes` -- after the consumer seeds its membership rows, send a small readiness message - back to the `ShapeLogCollector` / `EventRouter` so the shape can leave the - conservative fallback set - -During steady-state dependency moves: - -- update the in-memory views exactly as today -- compute the routing-projection delta in the pure helper -- emit reverse-index ops as actions -- let `Consumer.apply_subquery_event/3` execute those ops imperatively - -During buffering: - -- do not think in terms of two separate filter-side evaluations -- buffering has two subquery views, `views_before_move` and - `views_after_move`, but the filter should collapse them into one - conservative routing projection -- the pure helper should own that derivation and emit the diff from: - - steady exact view -> buffering conservative projection - - buffering conservative projection -> next steady exact view after splice -- queued dependency moves should continue to be modelled exactly as today in - the subquery state machine; the projection helper only cares about the - active routing image before and after each state transition - -For the first indexed slice: - -- candidate lookup can use the buffering routing projection -- use the index to shrink the candidate set -- do not exact-reject actively buffering shapes in `Filter`; oversending is - acceptable, undersending is not -- rely on `Shape.convert_change/3` as the authoritative final filter for - buffered transactions - -Because the ETS table stores positive membership, negation needs no special -write-path logic. Negated candidate sets are derived by complement at read -time. - -On teardown: - -- remove all reverse-index rows for the shape when the shape is cleaned up or - removed from the router - -## Stage 4: Use the Reverse Index in `Filter.affected_shapes/2` - -Touch points: - -- `lib/electric/shapes/filter.ex` -- new `lib/electric/shapes/filter/subquery_index.ex` - -Replace the unconditional subquery union in -`Filter.shapes_affected_by_record/3` with: - -1. `WhereCondition.affected_shapes/3` -2. reverse-index candidates for subquery positions on the table -3. exact verification of steady-state candidates, with buffering candidates - allowed to oversend - -Reverse-index lookup flow: - -- enumerate registered subquery positions for the root table -- evaluate the left-hand comparison expression against the incoming record -- look up matching positive-membership rows -- derive: - - positive candidates directly from the lookup - - negated candidates from `all_negated_handles(position) - matched_handles(position)` -- union all candidate handles across positions - -For the first implementation: - -- steady-state candidate verification should be done by evaluating the full - shape predicate for each candidate shape, not by trying to fold subquery - lookups into the `WhereCondition` tree immediately -- buffering shapes should use the conservative routing projection and may - oversend through to `Shape.convert_change/3` - -That is the lowest-risk slice: - -- it removes the worst over-routing -- it reuses the exact predicate semantics we already have -- it keeps the existing equality/inclusion tree untouched -- it restores correctness for non-optimisable `other_shapes` predicates, so the - current skipped `OR + subquery` regression can be re-enabled instead of being - masked by always-route behaviour - -If this proves too expensive in practice, we can later add a real subquery -index operation to `WhereCondition.optimise_where/1`, but that should be a -follow-up, not part of the first slice. - -## Stage 5: Add Callback-Based Subquery Evaluation to `WhereClause` - -Touch points: - -- `lib/electric/shapes/where_clause.ex` -- `lib/electric/replication/eval/runner.ex` or `lib/pg_interop/sublink.ex` - -Do not replace the existing refs-map path used by `Shape.convert_change/3`. -That path still needs full subquery views for exact change conversion. - -Instead: - -- keep the current `includes_record?/3` behaviour for conversion code -- add a callback-based variant for filter-side exact verification - -Suggested API shape: - -- `includes_record?(where, record, extra_refs, subquery_member?)` -- or a distinct helper such as - `includes_record_with_subquery_membership?(where, record, subquery_member?)` - -The callback should look like: - -- `fn subquery_ref, typed_value -> boolean end` - -That `subquery_ref` is the canonical dependency ref from the validated WHERE -clause, for example `["$sublink", "0"]`, not a DNF `position_id`. - -That split is intentional: - -- candidate routing needs `position_id`, because repeated occurrences of the - same dependency can appear in different positions and compare against - different root-table columns -- exact `WhereClause` verification only needs canonical dependency membership, - because the AST has already been rewritten to shared dependency refs - -Implementation options: - -- extend `PgInterop.Sublink.member?/2` to accept a wrapper value that delegates - to the callback -- or special-case `sublink_membership_check` execution in `Runner` - -The important thing is that filter-side verification can ask: - -- "does this specific shape currently contain this typed value for this - canonical subquery ref / dependency?" - -using the reverse index, without loading full `MapSet` views into the filter. -This is also the production replacement for the old `refs_fun`-backed filter -checks; after the always-route hack is removed, filter verification must work -without `ShapeLogCollector` materializer refs. - -## Stage 6: Remove the Legacy Always-Route Path - -Once the reverse-index path is green: - -- retire the unconditional `subquery_shape_ids_for_table/2` union for - root-table record changes -- narrow `subquery_shapes_table` to only the conservative startup fallback, or - remove it completely if readiness is explicit -- simplify `all_shape_ids/1` and `shape_ids_for_table/2` so they do not depend - on duplicated subquery shape bookkeeping -- re-enable any regression tests currently skipped because the oversend hack - masks false positives - -Keep the existing safety valve: - -- if filter-side evaluation crashes or cannot determine the result, return all - shapes for safety - -Also keep a conservative fallback for unsupported subquery positions: - -- if a shape's subquery test expression cannot be evaluated from a root-table - record, leave that shape on the fallback routing path instead of partially - indexing it -- buffering shapes likewise may oversend through to `Shape.convert_change/3`; - the routing projection exists to avoid undersend, not to make buffering exact - -## Suggested Test Plan - -### Filter unit tests - -Extend `test/electric/shapes/filter_test.exs` to cover: - -- positive single-column subquery routing -- non-matching values no longer route the shape -- `field = const AND subquery` still verifies the row predicate after candidate - lookup -- re-enable the current skipped `OR + subquery` regression and assert that a - non-matching root-table row is not routed just because the shape has - dependencies -- `LIKE` + subquery in `other_shapes` uses the callback path instead of full - `refs_fun` views -- multiple shapes sharing a value -- multiple shapes with different values -- negated subquery candidate derivation by complement -- repeated dependency with two positions: - - `a IN sq OR b IN sq` -- composite-key subquery positions -- fallback behaviour for unsupported positions -- `remove_shape/3` cleanup of ETS entries - -### Consumer / subquery-runtime tests - -Extend `test/electric/shapes/consumer/subqueries_test.exs` to cover: - -- initial seeding of membership rows from current dependency views -- steady `move_in` and `move_out` updates to the reverse index -- buffering updates the reverse index at move start -- readiness handoff for restored shapes -- teardown removes reverse-index rows -- negated shapes produce the expected candidate visibility without special - write-time polarity handling - -Add focused unit tests for the new projection helper, for example in -`test/electric/shapes/filter/subquery_index_projection_test.exs`, covering: - -- steady exact view -> routing projection -- steady -> buffering projection for positive positions (`before ∪ after`) -- steady -> buffering projection for negated positions (`before ∩ after`) -- buffering -> steady projection after splice -- repeated positions sharing one dependency -- emitted add/remove ops for seed, move, and teardown - -### Integration tests - -Add or extend integration coverage for: - -- root-table changes no longer waking unrelated subquery shapes -- `AND` + subquery mixed predicates -- repeated dependency positions -- negated subquery move-out / move-in regressions -- stack restart / shape restore preserving conservative correctness until - consumers reseed the reverse index - -## Recommended Delivery Order - -1. Add `SubqueryIndex` storage and static registration. -2. Add callback-based `WhereClause` evaluation. -3. Seed and update dynamic membership from the consumer runtime. -4. Add readiness handoff for restored shapes. -5. Switch `Filter.shapes_affected_by_record/3` to reverse-index candidates plus - exact verification. -6. Remove the legacy always-route path once tests are green. - -## Expected Touch Points - -- `lib/electric/shapes/filter.ex` -- `lib/electric/shapes/filter/where_condition.ex` -- `lib/electric/shapes/where_clause.ex` -- `lib/electric/shapes/consumer.ex` -- `lib/electric/shapes/consumer/subqueries.ex` -- `lib/electric/shapes/consumer/subqueries/steady.ex` -- `lib/electric/shapes/consumer/subqueries/buffering.ex` -- `lib/electric/replication/shape_log_collector.ex` -- new `lib/electric/shapes/filter/subquery_index.ex` -- new `lib/electric/shapes/filter/subquery_index_projection.ex` -- `test/electric/shapes/filter_test.exs` -- `test/electric/shapes/consumer/subqueries_test.exs` -- `test/electric/shapes/filter/subquery_index_projection_test.exs` -- relevant integration tests under `test/integration/` +- `lib/electric/shapes/filter/subquery_index.ex` — reverse index storage and API +- `lib/electric/shapes/filter/indexes/subquery_index.ex` — ties reverse-index + candidates into the `WhereCondition` tree +- `lib/electric/shapes/filter.ex` — shape registration and routing +- `lib/electric/shapes/consumer.ex` — seeds and maintains index from runtime +- `lib/electric/shapes/where_clause.ex` — callback-based exact verification +- `test/electric/shapes/filter_test.exs` — filter unit tests +- `test/electric/shapes/consumer/subqueries_test.exs` — subquery state machine tests +- `test/integration/` — end-to-end integration tests diff --git a/packages/sync-service/simple-subqueries-filter.md b/packages/sync-service/simple-subqueries-filter.md index 54dcee7019..49a01dd457 100644 --- a/packages/sync-service/simple-subqueries-filter.md +++ b/packages/sync-service/simple-subqueries-filter.md @@ -1,39 +1,36 @@ ## Shapes.Filter — Subquery Support via Reverse Index -For subqueries, the Shapes.Filter should support subqueries using a reverse -index per subquery. We'd use an ETS table, but conceptually the reverse index -can be thought of as a map of values to shape handles: - -```elixir -%{ - "1" => MapSet.new([handle1, handle2]), - "2" => MapSet.new([handle2, handle3]), - "3" => MapSet.new([handle1]) -} -``` - -When a change arrives with a value for the subquery column, we look up that -value in the reverse index and get the set of shape handles whose subquery -view contains that value. +For subqueries, the Shapes.Filter uses a reverse index backed by ETS +(`Electric.Shapes.Filter.SubqueryIndex`) that maps typed values to shape +handles. The index is stack-scoped and shared across all consumers. -### Integration with `WhereClause.includes_record?/3` +When a change arrives, the filter evaluates the left-hand comparison expression +from each registered subquery position against the incoming record, looks up +matching candidate shapes in the reverse index, and verifies candidates against +the full `WhereCondition` for the table. + +### Positive vs Negated Predicates + +The index stores positive membership (the actual values in each shape's +subquery view). -Shapes that cannot be indexed (for example because their `WHERE` clause also -has `LIKE` in it) currently end up in `other_shapes` and are iterated through -using `WhereClause.includes_record?/3`. We should use the reverse index for -subquery evaluation in this path too, for simplicity and to avoid holding more -in memory. +- **Positive** (`x IN (SELECT ...)`): candidates are shapes whose membership + contains the looked-up value. +- **Negated** (`x NOT IN (SELECT ...)`): candidates are all registered negated + shapes for the position minus those whose membership contains the value + (complement at read time). -Currently, for `x IN subquery`, `includes_record?/3` gets all the values from -the materialized view of the subquery (kept in `refs`) and checks whether the -value of `x` from `record` is in that set. Instead, we look up the value of -`x` in the reverse index and check whether our shape handle is in the result -set. +### Integration with `WhereClause.includes_record?/3` + +Shapes that cannot be fully indexed (for example because their `WHERE` clause +also has `LIKE` in it) end up in `other_shapes` and are iterated through using +`WhereClause.includes_record?/3` with a callback-based subquery membership +check. -This means changing the interface of `includes_record?/3`: instead of -passing a `refs` map containing the full subquery value set, we pass a -function that determines subquery inclusion. The shape handle can be captured -in the closure of that function. +Instead of passing a `refs` map containing the full subquery value set, the +filter passes a callback function `fn subquery_ref, typed_value -> boolean` +that delegates to `SubqueryIndex.member?/3`. The shape handle is captured in +the closure. ### Candidate filtering @@ -42,64 +39,58 @@ must still verify each candidate against the full `WhereCondition` for that table, because the `WhereCondition` tree may include non-subquery branches that rule the shape out. -## Managing the Reverse Index - -### Consumer independence - -Each consumer has subquery views at different times, so each consumer manages -the reverse index entries for its own shapes independently of other shapes: +### Fallback routing -```elixir -index = %{"1" => MapSet.new(["handle1"])} +Shapes whose reverse-index state is not yet ready (e.g., restored shapes +before the consumer has reseeded) are registered as fallback shapes and +routed conservatively for all root-table changes. Once the consumer seeds +membership and calls `mark_ready/3`, the shape leaves the fallback set and +only routes via the index. -ReverseIndex.add_value(index, "handle2", _value = "1") +Shapes with unsupported subquery test expressions (e.g., function-wrapped +LHS that cannot be evaluated from a root-table record) also remain on the +fallback path permanently. -# => %{"1" => MapSet.new(["handle1", "handle2"])} -``` +## Managing the Reverse Index -Because the index is an ETS table, updates by one consumer are immediately -visible to the Filter running in the EventRouter process. +### Consumer independence -### Move-ins +Each consumer manages the reverse index entries for its own shape +independently. Because the index is an ETS table, updates by one consumer +are immediately visible to the Filter running in the EventRouter process. -While a move-in query is in flight we buffer changes (see -`simple-subqueries.md`, section B). During buffering the reverse index must be -broad enough to capture changes relevant to _both_ the pre-splice and -post-splice views: +### Seeding -- **Pre-splice changes** are converted with the old subquery view. -- **Post-splice changes** are converted with the new subquery view. +On startup (`Consumer.initialize_subquery_runtime/1`), the consumer loads +current dependency views and seeds membership entries via +`SubqueryIndex.seed_membership/6`, then calls `SubqueryIndex.mark_ready/3` +to clear the fallback flag. -The safe strategy depends on whether the shape uses negation: +### Move-ins (buffering) -- **Shapes without negation** (i.e. shapes that do not use `NOT IN subquery`): - The reverse index should be the _union_ of the before and after subquery - views, so the consumer adds the moved-in value to the reverse index at the - start of the move-in. +While a move-in query is in flight we buffer changes. During buffering the +reverse index must be broad enough to capture changes relevant to _both_ the +pre-splice and post-splice views: -- **Shapes with negation** (i.e. shapes that use `NOT IN subquery`): - The reverse index should be the _intersection_ of the before and after - subquery views, so the consumer removes the moved-in value from the reverse - index at the start of the move-in. +- **Positive dependency**: the index stores `before ∪ after`, so the consumer + adds the moved-in value at the start of the move-in. +- **Negated dependency**: the index stores `before ∩ after` as the + matched-membership set (which the filter subtracts from all negated shapes), + so the consumer removes the moved-in value at the start. Consistency is maintained even if the Filter passes through more changes than -strictly necessary: `Shape.convert_change/3` will filter out any that do not +strictly necessary: `Shape.convert_change/3` filters out any that do not belong, using the correct subquery view for the change's position relative to -the splice boundary. The important invariant is that we never _miss_ a -relevant change. +the splice boundary. ### Move-outs In some scenarios the consumer processes move-outs the moment the move-out -message is received, which can be mid-transaction. By that point the Filter -has already filtered changes for the remainder of the transaction using the -old reverse index state. This is safe: - -- **Shapes without negation**: the old index already included the moved-out - value, so changes for both before and after the move-out are captured. Any - extra changes are filtered out by `Shape.convert_change/3`. +message is received, which can be mid-transaction. This is safe: -- **Shapes with negation**: a move-out from the subquery view means rows that - _were_ excluded now become included — effectively a move-in from the shape's +- **Positive**: the old index already included the moved-out value, so changes + for both before and after the move-out are captured. +- **Negated**: a move-out from the subquery view means rows that _were_ + excluded now become included — effectively a move-in from the shape's perspective. This case follows move-in semantics (buffering, splice boundary, etc.). From 77f92a784529ddb8a9f5067b0953fbe951e165e2 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 17:02:22 +0000 Subject: [PATCH 48/63] Really finish off filter --- .../replication/shape_log_collector_test.exs | 65 ++++ .../test/electric/shape_cache_test.exs | 25 ++ .../test/electric/shapes/consumer_test.exs | 140 ++++++++ .../shapes/filter/subquery_index_test.exs | 323 ++++++++++++++++++ .../test/electric/shapes/filter_test.exs | 264 ++++++++++++++ .../subquery_dependency_update_test.exs | 101 ++++++ .../integration/subquery_move_out_test.exs | 60 ++++ 7 files changed, 978 insertions(+) create mode 100644 packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs diff --git a/packages/sync-service/test/electric/replication/shape_log_collector_test.exs b/packages/sync-service/test/electric/replication/shape_log_collector_test.exs index a90f4b57c5..1543863788 100644 --- a/packages/sync-service/test/electric/replication/shape_log_collector_test.exs +++ b/packages/sync-service/test/electric/replication/shape_log_collector_test.exs @@ -40,6 +40,16 @@ defmodule Electric.Replication.ShapeLogCollectorTest do @shape Shape.new!("test_table", inspector: @inspector) @shape_handle "the-shape-handle" + @subquery_inspector Support.StubInspector.new( + tables: [{1234, {"public", "test_table"}}, {5678, {"public", "parent"}}], + columns: [%{name: "id", type: "int8", type_id: {20, 1}, pk_position: 0}] + ) + @subquery_shape Shape.new!("test_table", + inspector: @subquery_inspector, + where: "id IN (SELECT id FROM public.parent)" + ) + @subquery_shape_handle "subquery-shape-handle" + def setup_log_collector(ctx) do %{stack_id: stack_id} = ctx # Start a test Registry @@ -144,6 +154,61 @@ defmodule Electric.Replication.ShapeLogCollectorTest do xids = Support.TransactionConsumer.assert_consume([{1, consumer}], [txn]) assert xids == [xmin] end + + @tag restore_shapes: [{@subquery_shape_handle, @subquery_shape}], + inspector: @subquery_inspector + test "restored subquery shape routes via fallback before consumer seeds index", ctx do + alias Electric.Shapes.Filter.SubqueryIndex + + # After restore, the subquery shape should be in fallback because + # no consumer has seeded the SubqueryIndex yet. + index = SubqueryIndex.for_stack(ctx.stack_id) + assert index != nil + assert SubqueryIndex.fallback?(index, @subquery_shape_handle, {"public", "test_table"}) + + parent = self() + + consumer = + start_link_supervised!( + {Support.TransactionConsumer, + [ + id: 1, + stack_id: ctx.stack_id, + parent: parent, + shape: @subquery_shape, + shape_handle: @subquery_shape_handle, + stack_id: ctx.stack_id, + action: :restore + ]} + ) + + :ok = + Electric.Shapes.ConsumerRegistry.register_consumer( + consumer, + @subquery_shape_handle, + ctx.stack_id + ) + + xmin = 100 + lsn = Lsn.from_string("0/10") + last_log_offset = LogOffset.new(lsn, 0) + + # Any root-table change should route to the shape via fallback, + # even if the record wouldn't match the subquery membership. + txn = + transaction(xmin, lsn, [ + %Changes.NewRecord{ + relation: {"public", "test_table"}, + record: %{"id" => "999"}, + log_offset: last_log_offset + } + ]) + + assert :ok = ShapeLogCollector.handle_event(txn, ctx.stack_id) + + xids = Support.TransactionConsumer.assert_consume([{1, consumer}], [txn]) + assert xids == [xmin] + end end describe "lazy consumer initialization" do diff --git a/packages/sync-service/test/electric/shape_cache_test.exs b/packages/sync-service/test/electric/shape_cache_test.exs index 45f09e4503..c7210bf3b2 100644 --- a/packages/sync-service/test/electric/shape_cache_test.exs +++ b/packages/sync-service/test/electric/shape_cache_test.exs @@ -1169,6 +1169,31 @@ defmodule Electric.ShapeCacheTest do assert [{^dep_handle, _}, {^shape_handle, _}] = ShapeCache.list_shapes(ctx.stack_id) end + test "restarted subquery shape reseeds the subquery index after restart", ctx do + alias Electric.Shapes.Filter.SubqueryIndex + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + # Before restart: shape should have positions in the SubqueryIndex + index_before = SubqueryIndex.for_stack(ctx.stack_id) + assert index_before != nil + assert SubqueryIndex.has_positions?(index_before, shape_handle) + + restart_shape_cache(ctx) + + # After restart: the SubqueryIndex is recreated by the ShapeLogCollector. + # The consumer re-initializes and reseeds the index. + # Wait for the consumer to finish restoring. + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + index_after = SubqueryIndex.for_stack(ctx.stack_id) + assert index_after != nil + assert SubqueryIndex.has_positions?(index_after, shape_handle) + end + test "restores shapes with subqueries and their materializers when backup missing", ctx do {shape_handle, _} = ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) diff --git a/packages/sync-service/test/electric/shapes/consumer_test.exs b/packages/sync-service/test/electric/shapes/consumer_test.exs index 8537d14e34..8d7a31fd18 100644 --- a/packages/sync-service/test/electric/shapes/consumer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer_test.exs @@ -2060,6 +2060,146 @@ defmodule Electric.Shapes.ConsumerTest do %{"headers" => %{"control" => "snapshot-end"}} ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) end + + test "consumer startup seeds the stack-scoped subquery index", ctx do + alias Electric.Shapes.Filter.SubqueryIndex + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + # The consumer should have seeded the SubqueryIndex during initialization + index = SubqueryIndex.for_stack(ctx.stack_id) + assert index != nil + + # The shape should be registered with positions (by Filter.add_shape) + assert SubqueryIndex.has_positions?(index, shape_handle) + + # The shape should be marked ready (no longer in fallback) once + # the consumer has seeded the index. After await_snapshot_start returns + # the consumer has completed initialization including subquery seeding. + {:ok, shape} = Electric.Shapes.fetch_shape_by_handle(ctx.stack_id, shape_handle) + + # The consumer seeds the index via SubqueryIndex.for_stack, but the + # index is also modified by the Filter (which runs in the + # ShapeLogCollector process). Check that the shape has positions + # and that membership entries are correct (empty views for a fresh shape). + positions = SubqueryIndex.positions_for_shape(index, shape_handle) + assert length(positions) > 0 + + # Verify the index is accessible — a basic candidates lookup shouldn't crash + _candidates = + SubqueryIndex.candidates_for_record(index, shape.root_table, %{"id" => "1"}) + end + + test "consumer steady dependency move_in adds value to the subquery index", ctx do + alias Electric.Shapes.Filter.SubqueryIndex + + parent = self() + + Repatch.patch( + Electric.Shapes.Consumer.Subqueries, + :query_move_in_async, + [mode: :shared], + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) + :ok + end + ) + + Support.TestUtils.activate_mocks_for_descendant_procs(Consumer) + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + index = SubqueryIndex.for_stack(ctx.stack_id) + {:ok, _shape} = Electric.Shapes.fetch_shape_by_handle(ctx.stack_id, shape_handle) + + # Before any dependency changes, the index has empty membership + refute SubqueryIndex.member?(index, shape_handle, ["$sublink", "0"], 1) + + # Send a new record for the dependency table to trigger a move_in + ShapeLogCollector.handle_event( + complete_txn_fragment(100, Lsn.from_integer(50), [ + %Changes.NewRecord{ + relation: {"public", "other_table"}, + record: %{"id" => "1"}, + log_offset: LogOffset.new(Lsn.from_integer(50), 0) + } + ]), + ctx.stack_id + ) + + # Wait for the consumer to process the event and request a move_in query + assert_receive {:query_requested, consumer_pid} + + # During buffering, the value should have been added to the index + # (union for positive dependency: before ∪ after) + assert SubqueryIndex.member?(index, shape_handle, ["$sublink", "0"], 1) + + # Complete the move_in query to transition back to steady state + send(consumer_pid, {:pg_snapshot_known, {100, 300, []}}) + + send( + consumer_pid, + {:query_move_in_complete, + [ + %Electric.Shapes.Consumer.Subqueries.QueryRow{ + key: ~s'"public"."test_table"/"1"', + json: + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "val"}, + "headers" => %{ + "operation" => "insert", + "relation" => ["public", "test_table"] + } + }) + } + ], Lsn.from_integer(100)} + ) + + # Allow the consumer to process the completion + assert :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 100) + ref = Shapes.Consumer.register_for_changes(ctx.stack_id, shape_handle) + assert_receive {^ref, :new_changes, _offset}, @receive_timeout + + # After move_in completes, value should still be in the index (now steady state) + assert SubqueryIndex.member?(index, shape_handle, ["$sublink", "0"], 1) + end + + test "consumer cleanup removes shape rows from the subquery index", ctx do + alias Electric.Shapes.Filter.SubqueryIndex + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + index = SubqueryIndex.for_stack(ctx.stack_id) + assert SubqueryIndex.has_positions?(index, shape_handle) + + # Monitor the consumer so we know when cleanup finishes + consumer_name = Shapes.Consumer.name(ctx.stack_id, shape_handle) + consumer_pid = GenServer.whereis(consumer_name) + ref = Process.monitor(consumer_pid) + + expect_shape_status(remove_shape: fn _, ^shape_handle -> :ok end) + ShapeCache.clean_shape(shape_handle, ctx.stack_id) + + # Wait for consumer to shut down, flushing any other messages first + assert_receive {:DOWN, ^ref, :process, ^consumer_pid, _reason}, 5000 + + # The ShapeLogCollector removes the shape from the filter asynchronously. + # Wait briefly for it to process. + Process.sleep(100) + + # After cleanup, the shape's rows should be removed from the index + refute SubqueryIndex.has_positions?(index, shape_handle) + end end defp refute_storage_calls_for_txn_fragment(shape_handle) do diff --git a/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs new file mode 100644 index 0000000000..e51c571d15 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs @@ -0,0 +1,323 @@ +defmodule Electric.Shapes.Filter.SubqueryIndexTest do + use ExUnit.Case + + alias Electric.Shapes.Filter.SubqueryIndex + alias Electric.Replication.Eval.Parser.{Func, Ref} + + setup do + table = SubqueryIndex.new() + %{table: table} + end + + @root_table {"public", "child"} + @subquery_ref ["$sublink", "0"] + + # A minimal DnfPlan with one positive subquery position + defp make_plan(opts \\ []) do + polarity = Keyword.get(opts, :polarity, :positive) + dep_index = Keyword.get(opts, :dep_index, 0) + subquery_ref = Keyword.get(opts, :subquery_ref, @subquery_ref) + field = Keyword.get(opts, :field, "par_id") + + testexpr = %Ref{path: [field], type: :int8} + ref = %Ref{path: subquery_ref, type: {:array, :int8}} + + ast = %Func{ + name: "sublink_membership_check", + args: [testexpr, ref], + type: :bool + } + + %Electric.Shapes.DnfPlan{ + disjuncts: [], + disjuncts_positions: [], + position_count: 1, + positions: %{ + 0 => %{ + ast: ast, + sql: "fake", + is_subquery: true, + negated: polarity == :negated, + dependency_index: dep_index, + subquery_ref: subquery_ref, + tag_columns: [field] + } + }, + dependency_positions: %{dep_index => [0]}, + dependency_disjuncts: %{}, + dependency_polarities: %{dep_index => polarity}, + has_negated_subquery: polarity == :negated + } + end + + defp make_two_position_plan do + testexpr_id = %Ref{path: ["id"], type: :int8} + testexpr_par = %Ref{path: ["par_id"], type: :int8} + sublink_ref = %Ref{path: @subquery_ref, type: {:array, :int8}} + + ast_id = %Func{name: "sublink_membership_check", args: [testexpr_id, sublink_ref], type: :bool} + ast_par = %Func{name: "sublink_membership_check", args: [testexpr_par, sublink_ref], type: :bool} + + %Electric.Shapes.DnfPlan{ + disjuncts: [], + disjuncts_positions: [], + position_count: 2, + positions: %{ + 0 => %{ + ast: ast_id, + sql: "fake", + is_subquery: true, + negated: false, + dependency_index: 0, + subquery_ref: @subquery_ref, + tag_columns: ["id"] + }, + 1 => %{ + ast: ast_par, + sql: "fake", + is_subquery: true, + negated: false, + dependency_index: 0, + subquery_ref: @subquery_ref, + tag_columns: ["par_id"] + } + }, + dependency_positions: %{0 => [0, 1]}, + dependency_disjuncts: %{}, + dependency_polarities: %{0 => :positive}, + has_negated_subquery: false + } + end + + describe "register_shape/4 and positions_for_shape/2" do + test "writes position metadata and fallback rows", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + # Shape positions recorded + positions = SubqueryIndex.positions_for_shape(table, "s1") + assert length(positions) == 1 + [{shape_handle, pos}] = positions + assert shape_handle == "s1" + assert pos == 0 + + # Shape starts in fallback + assert SubqueryIndex.fallback?(table, "s1", @root_table) + + # Position metadata written + position_id = {"s1", 0} + assert [{_, meta}] = :ets.lookup(table, {:position_meta, @root_table, position_id}) + assert meta.polarity == :positive + assert meta.dependency_index == 0 + assert meta.subquery_ref == @subquery_ref + end + + test "writes negated position metadata", %{table: table} do + plan = make_plan(polarity: :negated) + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + position_id = {"s1", 0} + [{_, meta}] = :ets.lookup(table, {:position_meta, @root_table, position_id}) + assert meta.polarity == :negated + end + end + + describe "seed_membership/6" do + test "writes candidate rows for each matching position", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + view = MapSet.new([1, 2, 3]) + SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, view) + + position_id = {"s1", 0} + + # Candidate entries written + for v <- [1, 2, 3] do + assert :ets.member(table, {@root_table, position_id, v}) + end + + refute :ets.member(table, {@root_table, position_id, 4}) + end + + test "writes exact membership rows for each canonical subquery ref", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + view = MapSet.new([10, 20]) + SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, view) + + assert SubqueryIndex.member?(table, "s1", @subquery_ref, 10) + assert SubqueryIndex.member?(table, "s1", @subquery_ref, 20) + refute SubqueryIndex.member?(table, "s1", @subquery_ref, 30) + end + + test "seeds multiple positions for the same dependency", %{table: table} do + plan = make_two_position_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + view = MapSet.new([5]) + SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, view) + + # Both position_ids should have candidate entries + assert :ets.member(table, {@root_table, {"s1", 0}, 5}) + assert :ets.member(table, {@root_table, {"s1", 1}, 5}) + end + end + + describe "mark_ready/3" do + test "clears fallback flag", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + assert SubqueryIndex.fallback?(table, "s1", @root_table) + + SubqueryIndex.mark_ready(table, "s1", @root_table) + refute SubqueryIndex.fallback?(table, "s1", @root_table) + end + end + + describe "add_value/6 and remove_value/6" do + test "add_value updates both routing and membership", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + SubqueryIndex.add_value(table, "s1", @root_table, @subquery_ref, 0, 42) + + # Candidate entry written + position_id = {"s1", 0} + assert :ets.member(table, {@root_table, position_id, 42}) + + # Exact membership written + assert SubqueryIndex.member?(table, "s1", @subquery_ref, 42) + end + + test "remove_value removes both routing and membership", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + SubqueryIndex.add_value(table, "s1", @root_table, @subquery_ref, 0, 42) + SubqueryIndex.remove_value(table, "s1", @root_table, @subquery_ref, 0, 42) + + position_id = {"s1", 0} + refute :ets.member(table, {@root_table, position_id, 42}) + refute SubqueryIndex.member?(table, "s1", @subquery_ref, 42) + end + + test "add_value updates all positions for the dependency", %{table: table} do + plan = make_two_position_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + SubqueryIndex.add_value(table, "s1", @root_table, @subquery_ref, 0, 99) + + assert :ets.member(table, {@root_table, {"s1", 0}, 99}) + assert :ets.member(table, {@root_table, {"s1", 1}, 99}) + end + end + + describe "unregister_shape/3" do + test "removes candidate rows, membership, position metadata, and fallback", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, MapSet.new([1, 2])) + + # Verify data exists before unregister + assert SubqueryIndex.has_positions?(table, "s1") + assert SubqueryIndex.member?(table, "s1", @subquery_ref, 1) + + SubqueryIndex.unregister_shape(table, "s1", @root_table) + + # All rows for the shape should be gone + refute SubqueryIndex.has_positions?(table, "s1") + refute SubqueryIndex.member?(table, "s1", @subquery_ref, 1) + refute SubqueryIndex.member?(table, "s1", @subquery_ref, 2) + refute SubqueryIndex.fallback?(table, "s1", @root_table) + assert SubqueryIndex.positions_for_shape(table, "s1") == [] + end + + test "does not remove rows belonging to other shapes", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + SubqueryIndex.register_shape(table, "s2", @root_table, plan) + + SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, MapSet.new([1])) + SubqueryIndex.seed_membership(table, "s2", @root_table, @subquery_ref, 0, MapSet.new([1])) + + SubqueryIndex.unregister_shape(table, "s1", @root_table) + + # s2 still has its data + assert SubqueryIndex.has_positions?(table, "s2") + assert SubqueryIndex.member?(table, "s2", @subquery_ref, 1) + assert SubqueryIndex.fallback?(table, "s2", @root_table) + end + end + + describe "candidates_for_record/3" do + test "returns positive candidates matching the record", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, MapSet.new([5])) + SubqueryIndex.mark_ready(table, "s1", @root_table) + + assert MapSet.member?( + SubqueryIndex.candidates_for_record(table, @root_table, %{"par_id" => "5"}), + "s1" + ) + + refute MapSet.member?( + SubqueryIndex.candidates_for_record(table, @root_table, %{"par_id" => "99"}), + "s1" + ) + end + + test "returns negated candidates when value is NOT in membership", %{table: table} do + plan = make_plan(polarity: :negated) + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, MapSet.new([5])) + SubqueryIndex.mark_ready(table, "s1", @root_table) + + # Value 5 IS in membership, so negated position does NOT match + refute MapSet.member?( + SubqueryIndex.candidates_for_record(table, @root_table, %{"par_id" => "5"}), + "s1" + ) + + # Value 99 is NOT in membership, so negated position DOES match + assert MapSet.member?( + SubqueryIndex.candidates_for_record(table, @root_table, %{"par_id" => "99"}), + "s1" + ) + end + + test "includes fallback shapes regardless of record values", %{table: table} do + plan = make_plan() + SubqueryIndex.register_shape(table, "s1", @root_table, plan) + + # Not marked ready, so s1 is in fallback + candidates = SubqueryIndex.candidates_for_record(table, @root_table, %{"par_id" => "99"}) + assert MapSet.member?(candidates, "s1") + end + end + + describe "register_fallback_shape/3" do + test "registers shape on fallback with no indexed positions", %{table: table} do + SubqueryIndex.register_fallback_shape(table, "s1", @root_table) + + assert SubqueryIndex.fallback?(table, "s1", @root_table) + assert SubqueryIndex.positions_for_shape(table, "s1") == [] + + candidates = SubqueryIndex.candidates_for_record(table, @root_table, %{"par_id" => "1"}) + assert MapSet.member?(candidates, "s1") + end + end + + describe "for_stack/1" do + test "stores and retrieves table ref by stack_id" do + table = SubqueryIndex.new(stack_id: "test-stack-123") + assert SubqueryIndex.for_stack("test-stack-123") == table + end + + test "returns nil for unknown stack" do + assert SubqueryIndex.for_stack("nonexistent-stack") == nil + end + end +end diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index 6676104d3d..f1cc5fcc68 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -1124,5 +1124,269 @@ defmodule Electric.Shapes.FilterTest do assert :ets.tab2list(index) == [] end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS neg_parent (id INT PRIMARY KEY, value TEXT NOT NULL)", + "CREATE TABLE IF NOT EXISTS neg_child (id INT PRIMARY KEY, parent_id INT REFERENCES neg_parent(id))" + ] + test "negated candidate derivation by complement", %{inspector: inspector} do + {:ok, shape} = + Shape.new("neg_child", + inspector: inspector, + where: "parent_id NOT IN (SELECT id FROM neg_parent WHERE value = 'keep')" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape.root_table + + # Seed membership with value 1 (parent id 1 matches the subquery "WHERE value = 'keep'") + Filter.SubqueryIndex.seed_membership(index, "shape1", root_table, subquery_ref, 0, MapSet.new([1])) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) + + # parent_id=1 is in the subquery view, so NOT IN means this should NOT route + insert_matching_member = %NewRecord{ + relation: {"public", "neg_child"}, + record: %{"id" => "10", "parent_id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_matching_member) == MapSet.new([]) + + # parent_id=2 is NOT in the subquery view, so NOT IN means this SHOULD route + insert_not_member = %NewRecord{ + relation: {"public", "neg_child"}, + record: %{"id" => "11", "parent_id" => "2"} + } + + assert Filter.affected_shapes(filter, insert_not_member) == MapSet.new(["shape1"]) + + # Update crossing from non-matching to matching should route (union of old/new) + update_crossing = %UpdatedRecord{ + relation: {"public", "neg_child"}, + record: %{"id" => "10", "parent_id" => "1"}, + old_record: %{"id" => "10", "parent_id" => "2"} + } + + assert Filter.affected_shapes(filter, update_crossing) == MapSet.new(["shape1"]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS rep_parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS rep_child (id INT PRIMARY KEY, par_id INT REFERENCES rep_parent(id))" + ] + test "repeated dependency positions in one shape", %{inspector: inspector} do + # Both positions reference the same dependency (rep_parent.id), but + # compare against different root-table columns. + {:ok, shape} = + Shape.new("rep_child", + inspector: inspector, + where: "id IN (SELECT id FROM rep_parent) OR par_id IN (SELECT id FROM rep_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape.root_table + + # Seed the membership view with values {1, 2} + Filter.SubqueryIndex.seed_membership(index, "shape1", root_table, subquery_ref, 0, MapSet.new([1, 2])) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) + + # Only id matches (id=1, par_id=99) -> should route + insert_id_match = %NewRecord{ + relation: {"public", "rep_child"}, + record: %{"id" => "1", "par_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_id_match) == MapSet.new(["shape1"]) + + # Only par_id matches (id=99, par_id=2) -> should route + insert_par_match = %NewRecord{ + relation: {"public", "rep_child"}, + record: %{"id" => "99", "par_id" => "2"} + } + + assert Filter.affected_shapes(filter, insert_par_match) == MapSet.new(["shape1"]) + + # Neither matches (id=99, par_id=99) -> should not route + insert_neither = %NewRecord{ + relation: {"public", "rep_child"}, + record: %{"id" => "99", "par_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_neither) == MapSet.new([]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS comp_parent (x INT NOT NULL, y INT NOT NULL, PRIMARY KEY (x, y))", + "CREATE TABLE IF NOT EXISTS comp_child (id INT PRIMARY KEY, a INT NOT NULL, b INT NOT NULL)" + ] + test "composite-key subquery routing", %{inspector: inspector} do + {:ok, shape} = + Shape.new("comp_child", + inspector: inspector, + where: "(a, b) IN (SELECT x, y FROM comp_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape.root_table + + # Seed membership with a tuple value {10, 20} + Filter.SubqueryIndex.seed_membership( + index, + "shape1", + root_table, + subquery_ref, + 0, + MapSet.new([{10, 20}]) + ) + + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) + + # Matching tuple (a=10, b=20) should route + insert_match = %NewRecord{ + relation: {"public", "comp_child"}, + record: %{"id" => "1", "a" => "10", "b" => "20"} + } + + assert Filter.affected_shapes(filter, insert_match) == MapSet.new(["shape1"]) + + # Only one column matches (a=10, b=99) should not route + insert_partial = %NewRecord{ + relation: {"public", "comp_child"}, + record: %{"id" => "2", "a" => "10", "b" => "99"} + } + + assert Filter.affected_shapes(filter, insert_partial) == MapSet.new([]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS like_parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS like_child (id INT PRIMARY KEY, name TEXT NOT NULL, parent_id INT REFERENCES like_parent(id))" + ] + test "LIKE + subquery on the other_shapes path uses callback verification", %{ + inspector: inspector + } do + # LIKE is not optimisable, so this shape ends up in other_shapes. + # The subquery membership check should use the callback path. + {:ok, shape} = + Shape.new("like_child", + inspector: inspector, + where: "name LIKE 'keep%' AND parent_id IN (SELECT id FROM like_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + root_table = shape.root_table + + Filter.SubqueryIndex.seed_membership(index, "shape1", root_table, subquery_ref, 0, MapSet.new([1, 2])) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) + + # Both conditions match -> route + insert_match = %NewRecord{ + relation: {"public", "like_child"}, + record: %{"id" => "10", "name" => "keep_me", "parent_id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_match) == MapSet.new(["shape1"]) + + # LIKE matches but subquery membership fails -> no route + insert_like_only = %NewRecord{ + relation: {"public", "like_child"}, + record: %{"id" => "11", "name" => "keep_me", "parent_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_like_only) == MapSet.new([]) + + # Subquery matches but LIKE fails -> no route + insert_subquery_only = %NewRecord{ + relation: {"public", "like_child"}, + record: %{"id" => "12", "name" => "discard", "parent_id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_subquery_only) == MapSet.new([]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS fb_parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS fb_child (id INT PRIMARY KEY, par_id INT REFERENCES fb_parent(id))" + ] + test "fallback shape routes conservatively even after normal filter setup", %{ + inspector: inspector + } do + # When DnfPlan.compile fails or the test expression is unsupported, + # shapes are registered as fallback shapes. This test proves that a + # fallback shape keeps routing conservatively for all root-table changes, + # and that other seeded shapes on the same table are not affected. + {:ok, fallback_shape} = + Shape.new("fb_child", + inspector: inspector, + where: "par_id IN (SELECT id FROM fb_parent)" + ) + + {:ok, indexed_shape} = + Shape.new("fb_child", + inspector: inspector, + where: "id IN (SELECT id FROM fb_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("fallback_s", fallback_shape) + |> Filter.add_shape("indexed_s", indexed_shape) + + index = Filter.subquery_index(filter) + root_table = fallback_shape.root_table + + # Manually force fallback_s to stay on fallback (simulating unsupported) + # by not calling mark_ready. indexed_s gets seeded and marked ready. + subquery_ref = ["$sublink", "0"] + Filter.SubqueryIndex.seed_membership(index, "indexed_s", root_table, subquery_ref, 0, MapSet.new([1])) + Filter.SubqueryIndex.mark_ready(index, "indexed_s", root_table) + + assert Filter.SubqueryIndex.fallback?(index, "fallback_s", root_table) + refute Filter.SubqueryIndex.fallback?(index, "indexed_s", root_table) + + # fallback_s routes for any root-table change, indexed_s only for matching + insert_match = %NewRecord{ + relation: {"public", "fb_child"}, + record: %{"id" => "1", "par_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_match) == + MapSet.new(["fallback_s", "indexed_s"]) + + insert_no_match = %NewRecord{ + relation: {"public", "fb_child"}, + record: %{"id" => "99", "par_id" => "99"} + } + + # fallback_s still routes, indexed_s does not + assert Filter.affected_shapes(filter, insert_no_match) == MapSet.new(["fallback_s"]) + + # Changes on unrelated table should not route either shape + insert_other = %NewRecord{ + relation: {"public", "fb_parent"}, + record: %{"id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_other) == MapSet.new([]) + end end end diff --git a/packages/sync-service/test/integration/subquery_dependency_update_test.exs b/packages/sync-service/test/integration/subquery_dependency_update_test.exs index 4514f37f4a..a7c5468199 100644 --- a/packages/sync-service/test/integration/subquery_dependency_update_test.exs +++ b/packages/sync-service/test/integration/subquery_dependency_update_test.exs @@ -267,6 +267,107 @@ defmodule Electric.Integration.SubqueryDependencyUpdateTest do end end + describe "repeated dependency positions (same dependency, multiple columns)" do + setup [:with_unique_db, :with_dual_ref_tables, :with_sql_execute] + setup :with_complete_stack + setup :with_electric_client + + # Shape: items where either a_id or b_id references an active parent + @dual_ref_where """ + a_id IN (SELECT id FROM dual_parents WHERE active = true) OR b_id IN (SELECT id FROM dual_parents WHERE active = true) + """ + + @tag with_sql: [ + "INSERT INTO dual_parents (id, active) VALUES ('p1', true), ('p2', false), ('p3', true)", + "INSERT INTO dual_items (id, a_id, b_id) VALUES ('item-1', 'p1', 'p2'), ('item-2', 'p2', 'p3'), ('item-3', 'p2', 'p2')" + ] + test "routes correctly when only one of two refs matches", ctx do + # item-1: a_id=p1(active), b_id=p2(inactive) -> in shape (via a_id) + # item-2: a_id=p2(inactive), b_id=p3(active) -> in shape (via b_id) + # item-3: a_id=p2(inactive), b_id=p2(inactive) -> NOT in shape + + shape = ShapeDefinition.new!("dual_items", where: @dual_ref_where) + stream = Client.stream(ctx.client, shape, live: true) + + with_consumer stream do + {:ok, inserts} = + await_count(consumer, 2, + match: &match?(%ChangeMessage{headers: %{operation: :insert}}, &1) + ) + + insert_ids = Enum.map(inserts, & &1.value["id"]) |> Enum.sort() + assert insert_ids == ["item-1", "item-2"] + assert_up_to_date(consumer) + + # Activate p2 — item-3 should now enter the shape (both a_id and b_id match). + # item-1 and item-2 may also get updates (new tags for their p2 ref). + Postgrex.query!(ctx.db_conn, "UPDATE dual_parents SET active = true WHERE id = 'p2'", []) + + # Collect messages until we see the insert for item-3 + messages = collect_messages(consumer, timeout: 2000) + + change_messages = Enum.filter(messages, &match?(%ChangeMessage{}, &1)) + + item3_inserts = + Enum.filter(change_messages, fn msg -> + match?(%ChangeMessage{headers: %{operation: :insert}, value: %{"id" => "item-3"}}, msg) + end) + + assert length(item3_inserts) == 1, + "Expected item-3 to be inserted into shape after p2 activated. " <> + "Got change messages: #{inspect(Enum.map(change_messages, &{&1.headers.operation, &1.value["id"]}))}" + end + end + + @tag with_sql: [ + "INSERT INTO dual_parents (id, active) VALUES ('p1', true)", + "INSERT INTO dual_items (id, a_id, b_id) VALUES ('item-1', 'p1', 'p1')" + ] + test "deactivating parent removes items that relied on it for both refs", ctx do + shape = ShapeDefinition.new!("dual_items", where: @dual_ref_where) + stream = Client.stream(ctx.client, shape, live: true) + + with_consumer stream do + assert_insert(consumer, %{"id" => "item-1"}) + assert_up_to_date(consumer) + + # Deactivate p1 — item-1 loses both a_id and b_id matches + Postgrex.query!(ctx.db_conn, "UPDATE dual_parents SET active = false WHERE id = 'p1'", []) + + assert_delete(consumer, %{"id" => "item-1"}) + end + end + end + + # ---- Dual-Reference Schema for repeated positions tests ---- + + def with_dual_ref_tables(%{db_conn: conn} = _context) do + Postgrex.query!( + conn, + """ + CREATE TABLE dual_parents ( + id TEXT PRIMARY KEY, + active BOOLEAN NOT NULL DEFAULT false + ) + """, + [] + ) + + Postgrex.query!( + conn, + """ + CREATE TABLE dual_items ( + id TEXT PRIMARY KEY, + a_id TEXT NOT NULL REFERENCES dual_parents(id), + b_id TEXT NOT NULL REFERENCES dual_parents(id) + ) + """, + [] + ) + + %{tables: [{"public", "dual_parents"}, {"public", "dual_items"}]} + end + # ---- Simple Parent/Child Schema for 1-level subquery tests ---- def with_simple_parent_child_tables(%{db_conn: conn} = _context) do diff --git a/packages/sync-service/test/integration/subquery_move_out_test.exs b/packages/sync-service/test/integration/subquery_move_out_test.exs index ffacbbe1ea..8219d60aad 100644 --- a/packages/sync-service/test/integration/subquery_move_out_test.exs +++ b/packages/sync-service/test/integration/subquery_move_out_test.exs @@ -357,6 +357,66 @@ defmodule Electric.Integration.SubqueryMoveOutTest do end end + describe "negated subquery move-in and move-out" do + setup [:with_unique_db, :with_parent_child_tables, :with_sql_execute] + setup :with_complete_stack + setup :with_electric_client + + # Shape: children whose parent is NOT active + @negated_where "parent_id NOT IN (SELECT id FROM parent WHERE active = true)" + + setup _ctx do + shape = ShapeDefinition.new!("child", where: @negated_where) + %{shape: shape} + end + + @tag with_sql: [ + "INSERT INTO parent (id, active) VALUES ('parent-1', true)", + "INSERT INTO child (id, parent_id, value) VALUES ('child-1', 'parent-1', 'test value')" + ] + test "child enters shape when parent becomes inactive (negated move-in)", %{ + client: client, + shape: shape, + db_conn: db_conn + } do + stream = Client.stream(client, shape, live: true) + + with_consumer stream do + # Initially child-1 is NOT in shape because parent-1 is active + # (NOT IN active parents means only inactive-parent children are included) + assert_up_to_date(consumer) + + # Deactivate parent-1 — child-1 should now ENTER the shape + Postgrex.query!(db_conn, "UPDATE parent SET active = false WHERE id = 'parent-1'", []) + + assert_insert(consumer, %{"id" => "child-1"}) + end + end + + @tag with_sql: [ + "INSERT INTO parent (id, active) VALUES ('parent-1', false)", + "INSERT INTO child (id, parent_id, value) VALUES ('child-1', 'parent-1', 'test value')" + ] + test "child leaves shape when parent becomes active (negated move-out)", %{ + client: client, + shape: shape, + db_conn: db_conn + } do + stream = Client.stream(client, shape, live: true) + + with_consumer stream do + # Initially child-1 IS in shape because parent-1 is inactive + assert_insert(consumer, %{"id" => "child-1"}) + assert_up_to_date(consumer) + + # Activate parent-1 — child-1 should now LEAVE the shape + Postgrex.query!(db_conn, "UPDATE parent SET active = true WHERE id = 'parent-1'", []) + + assert_delete(consumer, %{"id" => "child-1"}) + end + end + end + # Helper to set up parent/child tables for subquery tests def with_parent_child_tables(%{db_conn: conn} = _context) do statements = [ From f85818b12fa1375c9a46943a03c68d071c5b9378 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 23 Mar 2026 20:45:00 +0000 Subject: [PATCH 49/63] Add ELECTRIC_SUBQUERY_BUFFER_MAX_TRANSACTIONS to limit buffering during move-in The subquery buffering state machine can accumulate transactions indefinitely while waiting for a move-in query to complete. Add a configurable limit (default 1000) that emits a :shutdown action when exceeded, terminating the shape and triggering a 409 must-refetch for clients. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/sync-service/config/runtime.exs | 2 ++ packages/sync-service/lib/electric/config.ex | 3 +- .../lib/electric/shapes/consumer.ex | 15 ++++++++- .../electric/shapes/consumer/subqueries.ex | 15 +++++++-- .../shapes/consumer/subqueries/buffering.ex | 18 +++++++--- .../shapes/consumer/subqueries/steady.ex | 6 ++-- .../shapes/consumer/subqueries_test.exs | 33 +++++++++++++++++-- 7 files changed, 78 insertions(+), 14 deletions(-) diff --git a/packages/sync-service/config/runtime.exs b/packages/sync-service/config/runtime.exs index bb97c3bdbe..1ee1c30156 100644 --- a/packages/sync-service/config/runtime.exs +++ b/packages/sync-service/config/runtime.exs @@ -236,6 +236,8 @@ config :electric, # The ELECTRIC_EXPERIMENTAL_MAX_BATCH_SIZE is undocumented and used for testing only. max_batch_size: env!("ELECTRIC_EXPERIMENTAL_MAX_BATCH_SIZE", :integer, nil), service_port: env!("ELECTRIC_PORT", :integer, nil), + subquery_buffer_max_transactions: + env!("ELECTRIC_SUBQUERY_BUFFER_MAX_TRANSACTIONS", :integer, nil), shape_hibernate_after: shape_hibernate_after, shape_enable_suspend?: shape_enable_suspend?, storage_dir: storage_dir, diff --git a/packages/sync-service/lib/electric/config.ex b/packages/sync-service/lib/electric/config.ex index 66a93fd002..654949d44e 100644 --- a/packages/sync-service/lib/electric/config.ex +++ b/packages/sync-service/lib/electric/config.ex @@ -120,7 +120,8 @@ defmodule Electric.Config do shape_db_synchronous: Electric.ShapeCache.ShapeStatus.ShapeDb.Connection.default!(:synchronous), shape_db_cache_size: Electric.ShapeCache.ShapeStatus.ShapeDb.Connection.default!(:cache_size), - exclude_spans: MapSet.new() + exclude_spans: MapSet.new(), + subquery_buffer_max_transactions: 1000 ] @installation_id_key "electric_installation_id" diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index e5af2e70a9..7e40234170 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -925,6 +925,13 @@ defmodule Electric.Shapes.Consumer do state = handle_txn_with_truncate(xid, state) {state, 0, 0, nil} + :shutdown, {state, _num_changes, _total_size, _latest_written} -> + Logger.warning( + "Subquery buffer overflow for #{state.shape_handle} - terminating shape" + ) + + {mark_for_removal(state), 0, 0, nil} + {:flush, source_offset}, {state, num_changes, total_size, latest_written} -> state = if latest_written do @@ -1203,7 +1210,13 @@ defmodule Electric.Shapes.Consumer do shape_handle: state.shape_handle, dnf_plan: dnf_plan, views: views, - dependency_handle_to_ref: dep_handle_to_ref + dependency_handle_to_ref: dep_handle_to_ref, + buffer_max_transactions: + Electric.StackConfig.lookup( + state.stack_id, + :subquery_buffer_max_transactions, + Electric.Config.default(:subquery_buffer_max_transactions) + ) ) } diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index baa91c7c3a..8a669404bf 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -24,7 +24,10 @@ defmodule Electric.Shapes.Consumer.Subqueries do @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} @type sm_action() :: - {:store, [Changes.change() | QueryRow.t()]} | {:control, map()} | {:truncate, term()} + {:store, [Changes.change() | QueryRow.t()]} + | {:control, map()} + | {:truncate, term()} + | :shutdown @type action() :: sm_action() | :start_move_in_query | {:flush, term()} @@ -35,15 +38,21 @@ defmodule Electric.Shapes.Consumer.Subqueries do def new(opts) when is_list(opts) or is_map(opts) do opts = Map.new(opts) + stack_id = fetch_opt!(opts, :stack_id) + %Steady{ shape: fetch_opt!(opts, :shape), - stack_id: fetch_opt!(opts, :stack_id), + stack_id: stack_id, shape_handle: fetch_opt!(opts, :shape_handle), dnf_plan: fetch_opt!(opts, :dnf_plan), views: Map.get(opts, :views, %{}), dependency_handle_to_ref: Map.get(opts, :dependency_handle_to_ref, %{}), latest_seen_lsn: Map.get(opts, :latest_seen_lsn), - queue: MoveQueue.new() + queue: MoveQueue.new(), + buffer_max_transactions: + Map.get(opts, :buffer_max_transactions, + Electric.Config.default(:subquery_buffer_max_transactions) + ) } end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex index 0eaa5b92b2..b3be36fcbc 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex @@ -36,7 +36,8 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do boundary_txn_count: nil, buffered_txns: [], queue: MoveQueue.new(), - query_started?: false + query_started?: false, + buffer_max_transactions: 1000 ] @type t() :: %__MODULE__{ @@ -56,7 +57,8 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do boundary_txn_count: non_neg_integer() | nil, buffered_txns: [Transaction.t()], queue: MoveQueue.t(), - query_started?: boolean() + query_started?: boolean(), + buffer_max_transactions: pos_integer() } @spec from_steady( @@ -93,7 +95,8 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do views_after_move: views_after, dependency_handle_to_ref: state.dependency_handle_to_ref, latest_seen_lsn: state.latest_seen_lsn, - queue: queue + queue: queue, + buffer_max_transactions: state.buffer_max_transactions } end @@ -203,7 +206,8 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do views: state.views_after_move, dependency_handle_to_ref: state.dependency_handle_to_ref, latest_seen_lsn: state.latest_seen_lsn, - queue: state.queue + queue: state.queue, + buffer_max_transactions: state.buffer_max_transactions } end @@ -245,7 +249,11 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, |> Buffering.maybe_buffer_boundary_from_txn(txn) |> Map.update!(:buffered_txns, &[txn | &1]) - Buffering.maybe_splice(next_state) + if length(next_state.buffered_txns) > next_state.buffer_max_transactions do + {[:shutdown], next_state} + else + Buffering.maybe_splice(next_state) + end end def handle_event(state, {:global_last_seen_lsn, lsn}) do diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex index 61d8694f01..a8e3d8a8a5 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex @@ -16,7 +16,8 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do views: %{}, dependency_handle_to_ref: %{}, latest_seen_lsn: nil, - queue: MoveQueue.new() + queue: MoveQueue.new(), + buffer_max_transactions: 1000 ] @type t() :: %__MODULE__{ @@ -27,7 +28,8 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do views: %{[String.t()] => MapSet.t()}, dependency_handle_to_ref: %{String.t() => {non_neg_integer(), [String.t()]}}, latest_seen_lsn: Electric.Postgres.Lsn.t() | nil, - queue: MoveQueue.t() + queue: MoveQueue.t(), + buffer_max_transactions: pos_integer() } @spec drain_queue(t(), [Subqueries.sm_action()]) :: diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs index 18ff7a0e2e..5442495e5e 100644 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs @@ -658,6 +658,27 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do assert length(patterns) == 2 end + test "emits :shutdown when buffered transactions exceed the limit" do + state = new_state(buffer_max_transactions: 3) + dep_handle = dep_handle(state) + + {[:start_move_in_query], state} = + Subqueries.handle_event( + state, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert %Buffering{} = state + + {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("1", "1")])) + {[], state} = Subqueries.handle_event(state, txn(51, [child_insert("2", "1")])) + {[], state} = Subqueries.handle_event(state, txn(52, [child_insert("3", "1")])) + + {actions, _state} = Subqueries.handle_event(state, txn(53, [child_insert("4", "1")])) + + assert [:shutdown] = actions + end + test "raises on dependency handle mismatch" do assert_raise ArgumentError, ~r/unexpected dependency handle/, fn -> new_state() @@ -720,14 +741,22 @@ defmodule Electric.Shapes.Consumer.SubqueriesTest do {:ok, dnf_plan} = DnfPlan.compile(shape) dep_handle = hd(shape.shape_dependencies_handles) - Subqueries.new( + new_opts = [ shape: shape, stack_id: "stack-id", shape_handle: "shape-handle", dnf_plan: dnf_plan, views: %{["$sublink", "0"] => Keyword.get(opts, :subquery_view, MapSet.new())}, dependency_handle_to_ref: %{dep_handle => {0, ["$sublink", "0"]}} - ) + ] + + new_opts = + case Keyword.fetch(opts, :buffer_max_transactions) do + {:ok, max} -> Keyword.put(new_opts, :buffer_max_transactions, max) + :error -> new_opts + end + + Subqueries.new(new_opts) end defp new_negated_state(opts \\ []) do From 044fc50e261ec4c2fe7ec6689ca68e9dbb3debda Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 25 Mar 2026 14:36:07 +0000 Subject: [PATCH 50/63] Client: fix tests --- .../test/electric/client_test.exs | 58 ++++++++++--------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/packages/elixir-client/test/electric/client_test.exs b/packages/elixir-client/test/electric/client_test.exs index 6b1b2ea5b9..e4c37f3696 100644 --- a/packages/elixir-client/test/electric/client_test.exs +++ b/packages/elixir-client/test/electric/client_test.exs @@ -1601,7 +1601,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-abc"]}, + "headers" => %{"operation" => "insert", "tags" => ["tag-abc"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111", "name" => "test"} }, @@ -1664,13 +1664,13 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["shared-tag"]}, + "headers" => %{"operation" => "insert", "tags" => ["shared-tag"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111"} }, %{ "key" => "row-2", - "headers" => %{"operation" => "insert", "tags" => ["shared-tag"]}, + "headers" => %{"operation" => "insert", "tags" => ["shared-tag"], "active_conditions" => [true]}, "offset" => "1_1", "value" => %{"id" => "2222"} }, @@ -1732,7 +1732,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-A"]}, + "headers" => %{"operation" => "insert", "tags" => ["tag-A"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1794,7 +1794,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["old-tag"]}, + "headers" => %{"operation" => "insert", "tags" => ["old-tag"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1808,7 +1808,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", "tags" => ["new-tag"], - "removed_tags" => ["old-tag"] + "removed_tags" => ["old-tag"], + "active_conditions" => [true] }, "offset" => "2_0", "value" => %{"id" => "1111", "name" => "updated"} @@ -1871,7 +1872,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1882,7 +1883,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "delete", "tags" => ["my-tag"]}, + "headers" => %{"operation" => "delete", "tags" => ["my-tag"], "active_conditions" => [true]}, "offset" => "2_0", "value" => %{"id" => "1111"} }, @@ -1949,7 +1950,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111", "version" => "1"} }, @@ -1963,7 +1964,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", # Same tag, but no removed_tags - this is the problematic case - "tags" => ["my-tag"] + "tags" => ["my-tag"], + "active_conditions" => [true] }, "offset" => "2_0", "value" => %{"id" => "1111", "version" => "2"} @@ -2025,15 +2027,20 @@ defmodule Electric.ClientTest do "Expected 1 synthetic delete but got #{length(delete_msgs)} - duplicate entries in tag_index" end - test "row with multiple tags - partial move-out should not delete if other tags remain", + test "row with multiple disjuncts - partial move-out should not delete if another disjunct satisfied", ctx do - # Edge case: row has multiple tags, move-out for one tag shouldn't delete - # if the row still belongs to the shape via another tag + # Row matches via two disjuncts at different positions. + # Move-out for one position shouldn't delete if the other disjunct is still satisfied. + # Tags: ["tag-a/", "/tag-b"] means disjunct 0 uses pos 0, disjunct 1 uses pos 1 body1 = Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-a", "tag-b"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-a/", "/tag-b"], + "active_conditions" => [true, true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -2045,7 +2052,7 @@ defmodule Electric.ClientTest do %{ "headers" => %{ "event" => "move-out", - # Only moving out tag-a, row still has tag-b + # Only moving out pos 0, disjunct 1 (pos 1) still satisfied "patterns" => [%{"pos" => 0, "value" => "tag-a"}] } }, @@ -2078,17 +2085,13 @@ defmodule Electric.ClientTest do bypass_response(ctx, responses) - # insert, up-to-date, up-to-date - # BUG: Currently generates a synthetic delete even though row still has tag-b - # EXPECTED: No synthetic delete since row still belongs via tag-b + # insert, up-to-date, up-to-date (no synthetic delete) msgs = stream(ctx, 3) delete_msgs = Enum.filter(msgs, &match?(%ChangeMessage{headers: %{operation: :delete}}, &1)) - # This documents expected behavior - row should NOT be deleted - # If this fails, it confirms the bug that partial move-out incorrectly deletes assert delete_msgs == [], - "Row with multiple tags should not be deleted when only one tag is moved out" + "Row should not be deleted when another disjunct is still satisfied" end test "synthetic delete uses latest value after update", ctx do @@ -2097,7 +2100,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111", "name" => "original"} }, @@ -2111,7 +2114,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", "tags" => ["my-tag"], - "removed_tags" => ["my-tag"] + "removed_tags" => ["my-tag"], + "active_conditions" => [true] }, "offset" => "2_0", "value" => %{"id" => "1111", "name" => "updated"} @@ -2188,7 +2192,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-A"]}, + "headers" => %{"operation" => "insert", "tags" => ["tag-A"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -2202,8 +2206,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", # Remove the old tag but add NO new tags - "removed_tags" => ["tag-A"] - # Note: no "tags" field, meaning this row now has zero tags + "removed_tags" => ["tag-A"], + "active_conditions" => [false] }, "offset" => "2_0", "value" => %{"id" => "1111", "name" => "updated"} @@ -2276,7 +2280,7 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, "offset" => "1_0", "value" => %{"id" => "1111"} }, From 667156755995e076fd3aad0551bf6f96fe2deff4 Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 25 Mar 2026 14:38:44 +0000 Subject: [PATCH 51/63] Client - format tests --- .../test/electric/client_test.exs | 66 +++++++++++++++---- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/packages/elixir-client/test/electric/client_test.exs b/packages/elixir-client/test/electric/client_test.exs index e4c37f3696..ba93e42b98 100644 --- a/packages/elixir-client/test/electric/client_test.exs +++ b/packages/elixir-client/test/electric/client_test.exs @@ -1601,7 +1601,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-abc"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-abc"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111", "name" => "test"} }, @@ -1664,13 +1668,21 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["shared-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["shared-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, %{ "key" => "row-2", - "headers" => %{"operation" => "insert", "tags" => ["shared-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["shared-tag"], + "active_conditions" => [true] + }, "offset" => "1_1", "value" => %{"id" => "2222"} }, @@ -1732,7 +1744,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-A"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-A"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1794,7 +1810,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["old-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["old-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1872,7 +1892,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1883,7 +1907,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "delete", "tags" => ["my-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "delete", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "2_0", "value" => %{"id" => "1111"} }, @@ -1950,7 +1978,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111", "version" => "1"} }, @@ -2100,7 +2132,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111", "name" => "original"} }, @@ -2192,7 +2228,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-A"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-A"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -2280,7 +2320,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"], "active_conditions" => [true]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, From 4738538ac403ad77682a34cf29f52cc70ecc20fb Mon Sep 17 00:00:00 2001 From: rob Date: Tue, 24 Mar 2026 14:23:23 +0000 Subject: [PATCH 52/63] REMOVE: add reviews --- packages/sync-service/review1.md | 22 ++++++ packages/sync-service/review2.md | 122 +++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 packages/sync-service/review1.md create mode 100644 packages/sync-service/review2.md diff --git a/packages/sync-service/review1.md b/packages/sync-service/review1.md new file mode 100644 index 0000000000..ea8bd85142 --- /dev/null +++ b/packages/sync-service/review1.md @@ -0,0 +1,22 @@ +**Findings** +- High: legacy move-out handling is broken in [tag_tracker.ex:194](/Users/rob/src/electric-sql/worktrees/alt1/packages/elixir-client/lib/electric/client/tag_tracker.ex#L194). `generate_synthetic_deletes/5` now unconditionally walks `data.active_conditions`, but legacy flat-tag messages still store `active_conditions: nil`. That crashes on the first move-out instead of applying the old “remove only when no tags remain” behavior. I reproduced this with the targeted Elixir client suite: 6 product-test failures all hit this path (`receives move-out`, `multiple matching rows`, `partial move-out`, `resume preserves move-out state`, etc.). +- Medium: DNF compile failure does not have one consistent contract. The filter silently falls back in [filter.ex:91](/Users/rob/src/electric-sql/worktrees/alt1/packages/sync-service/lib/electric/shapes/filter.ex#L91), querying silently drops back to non-DNF metadata in [querying.ex:333](/Users/rob/src/electric-sql/worktrees/alt1/packages/sync-service/lib/electric/shapes/querying.ex#L333), while the consumer initializes `NoSubqueries` in [consumer.ex:1187](/Users/rob/src/electric-sql/worktrees/alt1/packages/sync-service/lib/electric/shapes/consumer.ex#L1187) and only invalidates later on the first dependency event in [consumer.ex:373](/Users/rob/src/electric-sql/worktrees/alt1/packages/sync-service/lib/electric/shapes/consumer.ex#L373). That means a shape can be accepted, snapshot successfully, and only fail under live traffic. +- Low: one new client test is broken on the current Elixir version: [client_test.exs:2834](/Users/rob/src/electric-sql/worktrees/alt1/packages/elixir-client/test/electric/client_test.exs#L2834) uses `String.count/2`, which is undefined here. That leaves the targeted client suite at 7 failures instead of 6. + +**Module Design** +The direction is better than before. `DnfPlan` is a worthwhile extraction, and `Subqueries.Steady` / `Buffering` / `MoveQueue` are much clearer than the older move-handling path. + +The boundaries are still not fully clean: +- `Consumer` is still doing too much. It owns GenServer lifecycle, subquery-runtime bootstrapping, reverse-index seeding/sync, action execution, and log writing; see [consumer.ex:1183](/Users/rob/src/electric-sql/worktrees/alt1/packages/sync-service/lib/electric/shapes/consumer.ex#L1183). +- `Shape` still mixes declarative shape definition with runtime projection/evaluation concerns; `convert_change/3` now needs runtime-only opts like `dnf_plan`, `extra_refs`, `stack_id`, and `shape_handle`; see [shape.ex:623](/Users/rob/src/electric-sql/worktrees/alt1/packages/sync-service/lib/electric/shapes/shape.ex#L623) and [shape.ex:720](/Users/rob/src/electric-sql/worktrees/alt1/packages/sync-service/lib/electric/shapes/shape.ex#L720). +- The two `SubqueryIndex` modules have different responsibilities but nearly identical names, which makes the interface harder to reason about. + +**Improvements** +- Compile once, consume everywhere. Introduce a single compiled artifact such as `CompiledShape` or `ShapeRuntime` that owns the `DnfPlan`, capability flags, and tag/metadata helpers. `Filter`, `Querying`, and `Consumer` should consume that instead of each calling `DnfPlan.compile/1` independently. +- Extract a `Consumer.SubqueryRuntime` boundary. Let it own dependency views, buffering state, reverse-index sync, and emitted actions; let `Consumer` remain the GenServer shell plus storage writer. +- Split `Shape` into spec vs projector. Keep `Shape` as validated request/config; move `convert_change` and row-metadata projection into a dedicated projector module. +- On the client, do not encode “legacy tags” vs “DNF tags” with `active_conditions: nil`. Use separate modes or separate modules. The regression above came directly from that ambiguous interface. +- Rename the index pair to something role-based, e.g. `Filter.SubqueryMembershipIndex` and `Filter.Indexes.SubqueryWhereIndex`. + +**Verification** +Targeted `sync-service` suites passed: DNF, filter, consumer, querying, router, and the new subquery integrations. Targeted `elixir-client` suites failed with 7 failures: 6 real regressions from `TagTracker`, 1 broken test helper. diff --git a/packages/sync-service/review2.md b/packages/sync-service/review2.md new file mode 100644 index 0000000000..8e8dafe082 --- /dev/null +++ b/packages/sync-service/review2.md @@ -0,0 +1,122 @@ +# Module Design Review: `rob/simple-subqueries-with-dnf` + +## Summary + +This branch adds DNF-based subquery support: decomposing WHERE clauses into Disjunctive Normal Form, tracking per-position active conditions, and handling move-in/move-out via a state machine. The module structure is generally well-thought-out. Below are observations and suggestions focused on interfaces and responsibilities. + +--- + +## What's Working Well + +1. **StateMachine protocol** (`Subqueries.StateMachine`) — Using a protocol to dispatch `handle_event` across `Steady`, `Buffering`, and `NoSubqueries` is clean. The state machine pattern keeps the Consumer GenServer simple: it just calls `Subqueries.handle_event` and interprets the resulting actions. + +2. **DnfPlan as a pure data module** — `DnfPlan` is a well-scoped compilation unit. It takes a shape, produces an immutable plan struct, and provides pure functions to query it. No process, no side effects. This is easy to test and reason about. + +3. **MoveQueue** — Single-responsibility, well-defined interface (`new/0`, `enqueue/4`, `pop_next/1`), with deduplication/redundancy elimination contained internally. Clean design. + +4. **Decomposer** — Pure function that takes an AST and returns a decomposition. Well-documented with examples. The `@max_disjuncts` limit is a good safety valve. + +5. **SqlGenerator** — Precedence-aware SQL generation with a clear, exhaustive pattern match. The catch-all `raise` ensures unsupported nodes fail early at shape creation time. + +--- + +## Design Issues & Suggestions + +### 1. `DnfPlan` has mixed responsibilities + +**Current:** `DnfPlan` does three distinct things: +- **Compilation** (decompose WHERE → plan struct) +- **Runtime evaluation** (`get_row_metadata`, `compute_active_conditions`, `compute_tags`) +- **SQL generation** (`move_in_where_clause`, `active_conditions_sql`, `tags_sql`, `active_conditions_sql_for_views`) + +At 661 lines, it's the largest new module. The runtime evaluation functions (`get_row_metadata`) reach into `Subqueries` for hashing, and the SQL generation functions have complex parameterized query building with `position_to_sql`. + +**Suggestion:** Consider splitting into: +- `DnfPlan` — the struct + `compile/1` (pure data) +- `DnfPlan.Evaluator` or similar — runtime row evaluation (`get_row_metadata`, `compute_active_conditions`, `compute_tags`) +- `DnfPlan.SqlBuilder` — SQL generation for move-in queries, active_conditions, tags + +This would make each piece independently testable and keep `DnfPlan` focused on the compilation step. The current `DnfPlan` module is doing work at three different stages of the pipeline (compile-time, query-time, and runtime evaluation), which makes it harder to reason about when each function is called. + +### 2. `Subqueries` module is a hybrid facade + utility bag + +**Current:** `Subqueries` serves as: +- A facade that wraps `StateMachine.handle_event` and adds flush/query-start logic +- A constructor (`new/1` returns `Steady.t()`) +- A utility module for hashing (`make_value_hash`, `namespace_value`, `null_sentinel`, `value_prefix`) +- A helper module for the Consumer (`query_move_in_async`, `convert_transaction`, `validate_dependency_handle!`) + +**Issue:** The facade role is good, but it's doing too many things. The hashing utilities are referenced from `DnfPlan` (creating a circular dependency at the concept level — `DnfPlan` → `Subqueries` for hashing, `Subqueries` → `DnfPlan` for compilation). The `query_move_in_async` function takes a `consumer_state` map and reaches into its fields (`consumer_state.shape`, `consumer_state.stack_id`, etc.), coupling it to the Consumer's internal state shape. + +**Suggestion:** +- Extract hashing to a small `Subqueries.Hashing` or `Subqueries.Tags` module. Both `DnfPlan` and `Subqueries` import from it. No circular dependency. +- Make `query_move_in_async` accept explicit parameters rather than reaching into the consumer state map. Or move it into the Consumer module where it already lives conceptually (it's called from `start_move_in_query/1` in Consumer). + +### 3. Duplicated code between `Steady` and `Buffering` + +`remove_move_values/2` and `add_move_values/2` (or `add_move_in_values/2`) are duplicated across `Steady` (lines 123-133) and `Buffering` (lines 218-236). These are private functions doing identical work. + +**Suggestion:** Move them to `Subqueries` or `MoveQueue` as shared helpers. + +### 4. `Buffering` struct + protocol impl in one file + +`Buffering` defines the struct + its public functions AND the `StateMachine` protocol implementation in the same file (lines 1-237 for the module, 239-306 for the protocol impl). This is fine for small protocols, but the protocol impl here handles 5 event types with non-trivial logic. + +The pattern of defining the struct in the module and the protocol impl at the bottom of the same file is reasonable for Elixir, but consider whether the protocol impl for `Buffering` is growing complex enough to warrant its own file for readability. + +### 5. Consumer's `apply_subquery_event` is the most complex junction point + +**Current:** `apply_subquery_event` (Consumer lines 897-956) is the bridge between the GenServer and the state machine. It: +1. Calls `Subqueries.handle_event` +2. Updates the subquery index from state changes +3. Iterates over actions (`store`, `control`, `start_move_in_query`, `truncate`, `shutdown`, `flush`) +4. Manages offset tracking and txn_offset_mapping + +This is where all the complexity concentrates. The action types are defined as types in `Subqueries` (`sm_action`, `action`) but interpreted in Consumer. If you add a new action type, you modify `Subqueries` types AND `Consumer` reducer — classic shotgun surgery. + +**Suggestion:** Consider making action interpretation explicit. Either: +- A function in `Subqueries` that converts actions into a simpler set of commands the Consumer can execute, or +- A behavior/callback that the Consumer implements for each action type + +This would make the contract between the state machine and the Consumer more explicit. + +### 6. `SubqueryIndex` (Filter module) vs `SubqueryIndex` (Filter.Indexes module) naming + +There are two `SubqueryIndex` modules: +- `Electric.Shapes.Filter.SubqueryIndex` — The ETS-backed reverse index for routing +- `Electric.Shapes.Filter.Indexes.SubqueryIndex` — The WhereCondition tree node for subquery predicates + +Having two modules named `SubqueryIndex` in adjacent namespaces is confusing. The `Filter.Indexes.SubqueryIndex` is really a `SubqueryConditionNode` or similar — it's a node type in the WhereCondition tree, not an "index" in the same sense. + +**Suggestion:** Rename `Filter.Indexes.SubqueryIndex` to `Filter.Indexes.SubqueryCondition` or `Filter.Indexes.SubqueryNode` to distinguish it from the actual reverse index. + +### 7. `persistent_term` for SubqueryIndex discovery + +`SubqueryIndex.new/1` stores the ETS table ref in `persistent_term` keyed by `{__MODULE__, stack_id}`, and consumers look it up via `for_stack/1`. This works but has no cleanup path — if the Filter process dies and restarts, the old ETS table ref stays in persistent_term (pointing to a dead table). The `new/1` call will overwrite it, but there's a window where consumers could get a stale ref. + +This might be fine in practice given the supervision tree, but worth noting. + +### 8. The `dep_index` derivation pattern is fragile + +Throughout the code, dependency indices are derived from subquery ref paths: +```elixir +dep_index = ref |> List.last() |> String.to_integer() +``` + +This appears in `DnfPlan.extract_subquery_info`, `Consumer.seed_subquery_index`, `Consumer.do_update_subquery_index`, and `Steady.dep_ref_for_index` (reverse lookup). The coupling between the string ref path format (`["$sublink", "0"]`) and integer indices is implicit and spread across modules. + +**Suggestion:** Make this a single function in one place (e.g., `Subqueries.dep_index_from_ref/1`) and use it consistently. + +### 9. `NoSubqueries` handles truncation but `Steady`/`Buffering` don't + +`NoSubqueries.handle_event` checks for `TruncatedRelation` and returns `{:truncate, xid}`. The `Steady` and `Buffering` implementations don't — `convert_transaction` in `Subqueries` flat-maps through `Shape.convert_change` which silently drops truncations. If a truncation arrives while in `Steady` or `Buffering` state for a subquery shape, it won't be handled. + +This might be intentional (subquery shapes may not need truncation handling?), but it's an asymmetry in the StateMachine contract that should at least be documented. + +--- + +## Overall Assessment + +The module decomposition is solid for a first cut of a complex feature. The state machine approach with protocol dispatch is the right pattern. The main theme of the suggestions is **tightening interfaces**: reducing the surface area of `DnfPlan`, making the Subqueries facade less of a utility bag, and making implicit contracts (dep_index derivation, action types) more explicit. + +The code is well-typed with `@spec` annotations throughout, which is great. Test coverage from the diff looks comprehensive. The `Decomposer` and `SqlGenerator` are particularly clean — pure functions with clear inputs/outputs. From 25faa4d381a33a8ce17647488a2831211217d68a Mon Sep 17 00:00:00 2001 From: rob Date: Tue, 24 Mar 2026 17:20:32 +0000 Subject: [PATCH 53/63] Consumer redesign part 1 --- packages/sync-service/consumer-design.md | 723 +++++++++++++++ .../lib/electric/shapes/consumer.ex | 360 +++----- .../lib/electric/shapes/consumer/effect.ex | 8 + .../electric/shapes/consumer/event_handler.ex | 22 + .../default.ex} | 58 +- .../subqueries/buffering.ex | 316 ++++--- .../{ => event_handler}/subqueries/steady.ex | 156 ++-- .../event_handler/unsupported_subquery.ex | 52 ++ .../lib/electric/shapes/consumer/log_op.ex | 18 + .../lib/electric/shapes/consumer/plan.ex | 19 + .../electric/shapes/consumer/plan_executor.ex | 186 ++++ .../lib/electric/shapes/consumer/state.ex | 2 +- .../electric/shapes/consumer/subqueries.ex | 98 +-- .../consumer/subqueries/state_machine.ex | 4 - .../shapes/filter/indexes/equality_index.ex | 4 +- .../shapes/filter/indexes/subquery_index.ex | 18 +- .../electric/shapes/filter/subquery_index.ex | 3 +- .../electric/shapes/filter/where_condition.ex | 13 +- .../shapes/consumer/event_handler_test.exs | 766 ++++++++++++++++ .../shapes/consumer/subqueries_test.exs | 829 ------------------ .../shapes/filter/subquery_index_test.exs | 23 +- .../test/electric/shapes/filter_test.exs | 41 +- .../subquery_dependency_update_test.exs | 5 +- 23 files changed, 2317 insertions(+), 1407 deletions(-) create mode 100644 packages/sync-service/consumer-design.md create mode 100644 packages/sync-service/lib/electric/shapes/consumer/effect.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/event_handler.ex rename packages/sync-service/lib/electric/shapes/consumer/{subqueries/no_subqueries.ex => event_handler/default.ex} (65%) rename packages/sync-service/lib/electric/shapes/consumer/{ => event_handler}/subqueries/buffering.ex (57%) rename packages/sync-service/lib/electric/shapes/consumer/{ => event_handler}/subqueries/steady.ex (66%) create mode 100644 packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/log_op.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/plan.ex create mode 100644 packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex delete mode 100644 packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex create mode 100644 packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs delete mode 100644 packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs diff --git a/packages/sync-service/consumer-design.md b/packages/sync-service/consumer-design.md new file mode 100644 index 0000000000..23f3d78acc --- /dev/null +++ b/packages/sync-service/consumer-design.md @@ -0,0 +1,723 @@ +# Consumer Event Handling Design + +## Goal + +Simplify `Electric.Shapes.Consumer` by separating: + +- process and lifecycle concerns +- event interpretation +- log writing +- imperative side effects + +The main target is the current subquery runtime work from +`simple-subqueries-with-dnf-plan.md`, but the design should also leave the +consumer in a cleaner shape for the non-subquery path. + +This document is a design for a refactor, not a description of current code. + +## Current Problems + +### 1. `Consumer` owns too many responsibilities + +Today `lib/electric/shapes/consumer.ex` is doing all of the following: + +- GenServer lifecycle and supervision behavior +- initialization of storage, snapshotter, and subscriptions +- initial snapshot buffering and transaction fragment assembly +- subquery runtime initialization +- event interpretation for complete transactions and dependency events +- log writing +- control-message writing +- flush bookkeeping +- move-in query startup +- subquery index synchronization +- client and materializer notifications + +That makes the main event path hard to follow and hard to change safely. + +### 2. `NoSubqueries` has the wrong abstraction boundary + +`NoSubqueries` currently implements the subquery state-machine protocol, but it +is not really "a subquery thing". It is the default event behavior for a shape +that does not need the subquery runtime. + +It is also overloaded with another meaning: when DNF compilation fails or the +shape is unsupported, the consumer also falls back to `NoSubqueries`, and then +special-cases dependency events to invalidate the shape. That means one module +is standing for both: + +- "plain/default event handling" +- "unsupported subquery runtime" + +Those are different concerns and should be represented explicitly. + +### 3. `apply_subquery_event/3` is doing too many jobs + +The current `apply_subquery_event/3` function in `consumer.ex`: + +- calls the subquery state machine +- swaps the runtime state +- updates the subquery routing index +- executes low-level actions +- computes notification bounds +- infers flush behavior from action sequences +- handles termination side effects + +This is the main source of complexity in the current design. + +### 4. Low-level action tuples leak storage details into the state machine + +The current actions: + +- `{:store, items}` +- `{:control, message}` +- `:start_move_in_query` +- `{:flush, source_offset}` +- `{:truncate, xid}` +- `:shutdown` + +push too much low-level detail into the state machine. In particular: + +- `{:control, message}` exposes storage encoding concerns +- move-in splice behavior is represented as a sequence of tiny low-level steps +- `{:flush, source_offset}` is not really a write action at all + +This forces the consumer and the state machine to coordinate through implicit +action ordering instead of through clearer semantic operations. + +## Design Goals + +### Primary goals + +- `Consumer` should own process/lifecycle behavior, not semantic event logic. +- A dedicated event handler should interpret high-level events. +- Log writes and imperative side effects should be planned first, then executed. +- Unsupported subquery behavior should be explicit. +- The default path and the subquery path should share the same outer contract. + +### Non-goals for the first refactor + +- Do not refactor transaction fragment assembly into the new framework yet. +- Do not force all consumer logic into event handlers in one step. +- Do not introduce a deep polymorphic framework unless it materially simplifies + implementation. + +The first cut should cover the event path that currently flows through +`apply_subquery_event/3`, especially: + +- complete transactions +- dependency materializer events +- move-in query callbacks +- global LSN updates + +## Proposed Core Abstractions + +## 1. Event handler + +Replace `state.subquery_state` with `state.event_handler`. + +The event handler owns semantic interpretation of high-level events and carries +the runtime state needed for that interpretation. + +### Proposed module family + +- `Electric.Shapes.Consumer.EventHandler` +- `Electric.Shapes.Consumer.EventHandler.Default` +- `Electric.Shapes.Consumer.EventHandler.UnsupportedSubquery` +- `Electric.Shapes.Consumer.EventHandler.Subqueries.Steady` +- `Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering` + +### Proposed responsibilities + +`Consumer.EventHandler` should: + +- build the initial handler from consumer state +- dispatch `handle_event/2` +- expose routing-relevant views for subquery index synchronization + +The concrete handler modules should: + +- hold runtime state +- interpret events +- return a plan for what should happen + +### Proposed public contract + +```elixir +defmodule Electric.Shapes.Consumer.EventHandler do + alias Electric.Shapes.Consumer.Plan + + @type t() :: term() + + @spec build(Electric.Shapes.Consumer.State.t()) :: t() + @spec handle_event(t(), term()) :: + {:ok, t(), Plan.t()} | {:stop, term()} + @spec routing_views(t()) :: %{[String.t()] => MapSet.t()} +end +``` + +### Why this is the right boundary + +- The consumer does not need to know whether it is handling the default path or + a subquery path. +- The consumer only needs a uniform contract: handle event, get plan, execute + plan. +- The handler becomes the single place where move queue, buffering, and splice + semantics live. + +## 2. Plan + +Use a small `Plan` struct as the output of event interpretation. + +```elixir +defmodule Electric.Shapes.Consumer.Plan do + defstruct log_ops: [], effects: [], ack_source_offset: nil +end +``` + +### Fields + +- `log_ops` + - semantic append operations that will write to the shape log +- `effects` + - imperative operations that do not directly write to the log +- `ack_source_offset` + - source offset that should be considered flushed after this event + +### Why `ack_source_offset` belongs at the plan level + +It should not be a log op because it is not itself a write. + +It should not be buried inside an effect because it is part of the consumer's +core bookkeeping and directly affects flush behavior. + +It must be able to exist even when there are no log writes. For example, a +transaction may produce no relevant changes for a shape, but its source offset +still needs to be acknowledged. + +### Why a struct is useful here + +- It makes the handler contract explicit. +- It keeps the executor interface stable. +- It avoids encoding meaning into positional tuples. + +The `Plan` struct should stay small. It is not intended to become a generic +workflow engine. + +## 3. Log operations + +`log_ops` should represent semantic append operations, not low-level storage +calls. + +### Proposed modules + +- `Electric.Shapes.Consumer.LogOp.AppendChanges` +- `Electric.Shapes.Consumer.LogOp.AppendControl` +- `Electric.Shapes.Consumer.LogOp.AppendMoveInSnapshot` + +These should be simple structs, not protocols. + +### Why plain modules, not protocols + +For now this is a closed internal set of operations. + +The consumer does not need open-world extensibility here. The important thing +is that one executor can see and handle the full set of operations in one +place. + +That has practical benefits: + +- easier to grep and review +- easier to keep accumulator logic together +- easier to reason about write ordering and notification bounds + +### Proposed operation shapes + +```elixir +defmodule Electric.Shapes.Consumer.LogOp.AppendChanges do + defstruct changes: [], default_xid: nil +end + +defmodule Electric.Shapes.Consumer.LogOp.AppendControl do + defstruct message: nil +end + +defmodule Electric.Shapes.Consumer.LogOp.AppendMoveInSnapshot do + defstruct rows: [] +end +``` + +### Why `AppendMoveInSnapshot` + +This is intentionally named as an append operation, not a splice operation. +The state machine decides splice ordering. The executor only performs the +append in the order the plan specifies. + +`AppendMoveInSnapshot` should mean: + +- append the move-in query rows +- append the trailing `snapshot-end` control message + +That keeps the "write the snapshot and its trailing control message" behavior +as one semantic unit. + +### Why `AppendMoveInSnapshot` is better than exposing raw control messages + +The current splice logic has to emit: + +- move-in control broadcast +- query rows +- snapshot-end control message + +The state machine should care about the splice boundary and ordering, but it +should not have to micromanage storage-specific details like "emit this special +control record after those rows". The executor can own that lower-level +encoding rule. + +## 4. Effects + +`effects` should represent imperative actions that are not log writes. + +### Proposed modules + +- `Electric.Shapes.Consumer.Effect.StartMoveInQuery` + +That may be the only effect needed in the first cut. + +Possible later additions could include: + +- `InvalidateShape` +- `EmitTelemetry` + +but those should only be added if they make the code cleaner. + +### Why plain modules, not protocols + +For the same reason as log ops: this is an internal closed set right now. + +If the effect set grows substantially later, switching to protocols would still +be easy. It is better to keep the initial refactor simple. + +## 5. Plan executor + +Add a dedicated executor module: + +- `Electric.Shapes.Consumer.PlanExecutor` + +This module should execute a `Plan` against consumer state and return the +updated state plus execution metadata. + +### Proposed contract + +```elixir +defmodule Electric.Shapes.Consumer.PlanExecutor do + alias Electric.Shapes.Consumer.Plan + + @type execution_result() :: %{ + state: Electric.Shapes.Consumer.State.t(), + num_changes: non_neg_integer(), + total_size: non_neg_integer(), + latest_written: Electric.Replication.LogOffset.t() | nil + } + + @spec execute( + Plan.t(), + Electric.Shapes.Consumer.State.t(), + keyword() + ) :: execution_result() +end +``` + +### Responsibilities + +The executor should: + +- execute log ops in order +- update `latest_offset` +- accumulate bytes and counts +- track the latest written offset +- execute effects +- apply `ack_source_offset` + +The executor should not decide what to do. It should only perform the planned +operations. + +## Proposed Event Flow + +The consumer event path should become: + +1. receive a high-level event +2. call `EventHandler.handle_event(handler, event)` +3. if `{:stop, reason}`, stop the shape +4. otherwise swap in the new handler +5. diff `EventHandler.routing_views(old)` vs `routing_views(new)` and update the + subquery routing index +6. execute the returned plan with `PlanExecutor` +7. notify materializer and clients if the log advanced + +In pseudocode: + +```elixir +defp apply_event(state, event, opts \\ []) do + old_handler = state.event_handler + + case EventHandler.handle_event(old_handler, event) do + {:stop, reason} -> + stop_consumer(state, reason) + + {:ok, new_handler, plan} -> + state = %{state | event_handler: new_handler} + sync_subquery_index(state, old_handler, new_handler) + + result = PlanExecutor.execute(plan, state, opts) + + notification = + if result.state.latest_offset != state.latest_offset do + {{state.latest_offset, result.state.latest_offset}, result.state.latest_offset} + end + + {result.state, notification, result.num_changes, result.total_size} + end +end +``` + +This is intentionally close to the current `apply_subquery_event/3` shape, but +with much clearer separation of concerns. + +## Proposed Handler Semantics + +## 1. `EventHandler.Default` + +This is the replacement for `NoSubqueries`. + +### Responsibilities + +- handle complete transactions for the default shape path +- resolve dependency refs lazily if needed for already-filled dependencies +- emit append operations for converted changes +- acknowledge the source offset for processed transactions +- ignore `:global_last_seen_lsn` +- reject subquery-only callback events + +### Why it should be called `Default` + +This module is not a degenerate subquery runtime. It is the normal event path +for shapes that do not need buffering/splicing behavior. + +Naming it `Default` makes its role clearer and avoids teaching the codebase +that "everything is a subquery state machine even when it is not". + +## 2. `EventHandler.UnsupportedSubquery` + +This should explicitly represent shapes whose subquery runtime cannot be +supported. + +### Responsibilities + +- reject dependency and query callback events by stopping the shape +- handle other events conservatively, likely as no-ops or explicit stop paths + +### Why this module matters + +It removes the current overload where the default handler is also being used as +an invalidation sentinel. + +That makes the consumer logic clearer: + +- default handler means the default event path +- unsupported handler means the shape should stop if subquery runtime behavior + is required + +## 3. `EventHandler.Subqueries.Steady` + +This is the steady-state subquery runtime. + +### Responsibilities + +- keep current subquery views +- keep the dependency-handle mapping +- track the global LSN already seen +- queue dependency move events +- decide whether a move is an outer move-in or outer move-out +- either emit broadcast appends or enter buffering state + +### Output behavior + +Examples: + +- normal matching transaction + - `AppendChanges` + - `ack_source_offset` +- dependency move that becomes immediate outer move-out + - `AppendControl` +- dependency move that requires outer move-in + - transition to buffering + - `StartMoveInQuery` + +## 4. `EventHandler.Subqueries.Buffering` + +This is the buffering runtime while a move-in query is in flight. + +### Responsibilities + +- hold: + - `views_before_move` + - `views_after_move` + - triggering dependency index + - move-in values + - snapshot + - move-in rows + - move-in LSN + - buffered transactions + - queue of later dependency moves + - splice boundary +- compute when the splice is ready +- when ready, emit the ordered append plan and return to steady state + +### Splice ordering + +The state machine is responsible for deciding this order: + +1. append pre-boundary converted changes +2. append move-in control broadcast +3. append move-in snapshot rows and trailing `snapshot-end` +4. append post-boundary converted changes + +The executor should not infer this ordering. It should simply execute the plan. + +### Important naming point + +The state machine performs the splice decision. + +The log operation `AppendMoveInSnapshot` is not a splice operation. It is just +the semantic append unit for the move-in rows plus trailing snapshot-end +control message. + +## Why `apply_subquery_event/3` feels complicated today + +It is worth stating the exact reason, because this is what the refactor is +fixing. + +The current design splits one conceptual decision across two levels: + +- the state machine decides part of the semantic behavior +- the consumer reconstructs the rest from low-level action tuples + +This is why the current implementation needs: + +- `flush_actions/3` +- `maybe_start_move_in_query/2` +- logic that treats `{:flush, source_offset}` differently depending on whether + anything was written +- logic that knows control messages affect size/count but not `latest_written` + +Those are not isolated bugs. They are consequences of using an action format +that is too low-level for the boundary. + +The proposed handler/plan split fixes this by making the handler return a more +complete semantic plan: + +- what to append +- what side effects to run +- what source offset to acknowledge + +## Initialization Design + +Initialization should move from `initialize_subquery_runtime/1` to +`EventHandler.build/1`. + +### Proposed behavior + +`EventHandler.build/1` should: + +- inspect the shape +- compile DNF if needed +- wait for dependency materializers if needed +- fetch current dependency views +- seed the subquery index if needed +- return one of: + - `EventHandler.Default` + - `EventHandler.Subqueries.Steady` + - `EventHandler.UnsupportedSubquery` + +### Why this is better + +It keeps "what handler should this shape use?" as one explicit decision rather +than scattering it between: + +- `initialize_subquery_runtime/1` +- `initialize_no_subqueries/1` +- special cases later in `handle_info` + +## Subquery Index Synchronization + +The consumer should still own synchronization of the reverse routing index, +because that is process-level shared state and not part of semantic event +interpretation. + +However, it should not inspect handler internals directly except through: + +```elixir +EventHandler.routing_views(handler) +``` + +### Why this is the right split + +- the handler knows how to project its internal state into routing views +- the consumer owns stack-global side effects like index mutation + +This keeps the boundary clean. + +## Why log ops and effects are not protocols right now + +Protocols are not forbidden, but they are not the best default for the first +refactor. + +### Reasons + +- The operation set is closed and internal. +- Log-op execution has shared accumulator state. +- The main simplification comes from moving routing logic out of `consumer.ex`, + not from type-based polymorphism. +- A single executor is easier to review and reason about. + +If the operation set grows large or needs open extension later, moving from +plain structs to protocols would still be straightforward. + +## Migration Plan + +Implement this incrementally. + +### Step 1. Introduce the new outer contract + +- add `Consumer.Plan` +- add `Consumer.EventHandler` +- add `Consumer.PlanExecutor` + +Do not change behavior yet. + +### Step 2. Move the default path first + +- rename `NoSubqueries` to `EventHandler.Default` +- make it return `{:ok, next_handler, %Plan{...}}` +- keep behavior identical + +This gives the non-subquery path the new contract first. + +### Step 3. Add `UnsupportedSubquery` + +- stop using the default handler as the invalidation sentinel +- initialize unsupported shapes explicitly into `UnsupportedSubquery` + +### Step 4. Port steady and buffering subquery handlers + +- move current `Steady` and `Buffering` into the new handler namespace +- replace low-level action tuples with `Plan` + log ops + effects +- delete `flush_actions/3` +- delete `maybe_start_move_in_query/2` + +### Step 5. Collapse `apply_subquery_event/3` + +Replace it with a smaller helper that: + +- calls the handler +- updates routing views +- executes the plan +- computes notification bounds + +### Step 6. Clean up naming in `State` + +- rename `subquery_state` to `event_handler` + +At that point the remaining consumer code should read much more directly. + +## Explicit Decisions + +These are the design choices this document recommends. + +### Chosen + +- Use an `EventHandler` abstraction. +- Use `{:ok, next_handler, plan}` and `{:stop, reason}` as the handler return + contract. +- Use a `Plan` struct. +- Keep `ack_source_offset` at the plan level. +- Use semantic log operations, not raw action tuples. +- Use `AppendMoveInSnapshot` to represent "append move-in rows plus trailing + snapshot-end control message". +- Keep log ops and effects as plain structs and plain executor modules for now. +- Add `UnsupportedSubquery` explicitly. + +### Not chosen + +- Do not use `{:error, reason}` for normal shape invalidation/termination. +- Do not make `ack_source_offset` a kind of effect. +- Do not attach `ack_source_offset` only to write operations. +- Do not use protocols for log ops or effects in the first cut. +- Do not refactor transaction fragment assembly into the new framework yet. + +## Implementation Notes For The Agent Doing The Refactor + +### Keep the first refactor narrow + +Only move the complete-event path behind the new abstractions. Leave the +transaction-fragment assembly path mostly as-is until the new handler/executor +split is working. + +### Preserve current ordering semantics + +The move-in path is sensitive to ordering. The refactor must preserve: + +1. pre-boundary changes +2. move-in control broadcast +3. move-in query rows +4. snapshot-end control +5. post-boundary changes + +The design is changing the representation of that behavior, not the behavior +itself. + +### Preserve current flush behavior + +The current flush behavior is subtle and must not regress. + +In particular: + +- transactions with no written rows may still need source-offset acknowledgement +- plans with writes should map the latest written offset to the source offset +- plans without writes should still be able to call `consider_flushed/2` + +### Preserve materializer/client notification semantics + +Only log advancement should trigger new-change notification bounds. + +Effects that do not append to the log must not accidentally produce a client +notification range. + +### Keep executor logic centralized + +Do not scatter append logic across multiple modules on the first pass. + +It is better for `PlanExecutor` to own: + +- log-op execution +- metrics/count accumulation +- `latest_written` tracking +- ack application + +than to split that logic prematurely. + +## Summary + +The intended end state is: + +- `Consumer` owns process behavior +- `EventHandler` owns event semantics +- `Plan` is the handoff object +- `PlanExecutor` owns execution +- log ops are semantic append operations +- effects are imperative non-log actions + +The main simplification is not "more modules". The main simplification is that +the state machine stops emitting low-level tuples and the consumer stops +reconstructing semantics from them. diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 7e40234170..57119dbbf0 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -1,12 +1,11 @@ defmodule Electric.Shapes.Consumer do use GenServer, restart: :temporary + alias Electric.Shapes.Consumer.EventHandler alias Electric.Shapes.Consumer.InitialSnapshot alias Electric.Shapes.Consumer.PendingTxn + alias Electric.Shapes.Consumer.PlanExecutor alias Electric.Shapes.Consumer.State - alias Electric.Shapes.Consumer.Subqueries - alias Electric.Shapes.Consumer.Subqueries.NoSubqueries - alias Electric.Shapes.Consumer.Subqueries.QueryRow alias Electric.Shapes.DnfPlan alias Electric.Shapes.Filter.SubqueryIndex @@ -367,37 +366,18 @@ defmodule Electric.Shapes.Consumer do "Consumer reacting to #{length(move_in)} move ins and #{length(move_out)} move outs from its #{dep_handle} dependency" end) - # Invalidate if subquery runtime was not initialized because the DNF plan - # failed to compile. All supported subquery shapes, including negated ones, - # should have an active subquery runtime. - if match?(%NoSubqueries{}, state.subquery_state) do - stop_and_clean(state) - else - {state, notification, _num_changes, _total_size} = - apply_subquery_event( - state, - {:materializer_changes, dep_handle, %{move_in: move_in, move_out: move_out}} - ) - - if notification do - :ok = notify_new_changes(state, notification) - end - - {:noreply, state, state.hibernate_after} - end + handle_apply_event_result( + state, + apply_event( + state, + {:materializer_changes, dep_handle, %{move_in: move_in, move_out: move_out}} + ) + ) end def handle_info({:pg_snapshot_known, snapshot}, state) do Logger.debug(fn -> "Snapshot known for active move-in" end) - - {state, notification, _num_changes, _total_size} = - apply_subquery_event(state, {:pg_snapshot_known, snapshot}) - - if notification do - :ok = notify_new_changes(state, notification) - end - - {:noreply, state, state.hibernate_after} + handle_apply_event_result(state, apply_event(state, {:pg_snapshot_known, snapshot})) end def handle_info({:query_move_in_complete, rows, move_in_lsn}, state) do @@ -405,14 +385,10 @@ defmodule Electric.Shapes.Consumer do "Consumer query move in complete for #{state.shape_handle} with #{length(rows)} rows" end) - {state, notification, _num_changes, _total_size} = - apply_subquery_event(state, {:query_move_in_complete, rows, move_in_lsn}) - - if notification do - :ok = notify_new_changes(state, notification) - end - - {:noreply, state, state.hibernate_after} + handle_apply_event_result( + state, + apply_event(state, {:query_move_in_complete, rows, move_in_lsn}) + ) end def handle_info({:query_move_in_error, error, stacktrace}, state) do @@ -534,13 +510,17 @@ defmodule Electric.Shapes.Consumer do end defp handle_event({:global_last_seen_lsn, _lsn} = event, state) do - {state, notification, _num_changes, _total_size} = apply_subquery_event(state, event) + case apply_event(state, event) do + {:stop, reason} -> + handle_event_stop(state, reason) - if notification do - :ok = notify_new_changes(state, notification) - end + {state, notification, _num_changes, _total_size} -> + if notification do + :ok = notify_new_changes(state, notification) + end - state + state + end end defp handle_event(%TransactionFragment{} = txn_fragment, state) do @@ -862,157 +842,94 @@ defmodule Electric.Shapes.Consumer do defp do_handle_txn(%Transaction{} = txn, state) do timestamp = System.monotonic_time() - {state, notification, num_changes, total_size} = - apply_subquery_event(state, txn, default_xid: txn.xid) + case apply_event(state, txn) do + {:stop, reason} -> + handle_event_stop(state, reason) - if notification do - :ok = notify_new_changes(state, notification) + {state, notification, num_changes, total_size} -> + if notification do + :ok = notify_new_changes(state, notification) - OpenTelemetry.add_span_attributes(%{ - num_bytes: total_size, - actual_num_changes: num_changes - }) + OpenTelemetry.add_span_attributes(%{ + num_bytes: total_size, + actual_num_changes: num_changes + }) - lag = calculate_replication_lag(txn.commit_timestamp) - OpenTelemetry.add_span_attributes(replication_lag: lag) + lag = calculate_replication_lag(txn.commit_timestamp) + OpenTelemetry.add_span_attributes(replication_lag: lag) - Electric.Telemetry.OpenTelemetry.execute( - [:electric, :storage, :transaction_stored], - %{ - duration: System.monotonic_time() - timestamp, - bytes: total_size, - count: 1, - operations: num_changes, - replication_lag: lag - }, - Map.new(State.telemetry_attrs(state)) - ) + Electric.Telemetry.OpenTelemetry.execute( + [:electric, :storage, :transaction_stored], + %{ + duration: System.monotonic_time() - timestamp, + bytes: total_size, + count: 1, + operations: num_changes, + replication_lag: lag + }, + Map.new(State.telemetry_attrs(state)) + ) - state - else - state + state + else + state + end end end - defp apply_subquery_event(state, event, opts \\ []) do - old_subquery_state = state.subquery_state - {actions, subquery_state} = Subqueries.handle_event(state.subquery_state, event) - - state = %{state | subquery_state: subquery_state} - update_subquery_index_from_state_change(state, old_subquery_state, subquery_state) - previous_offset = state.latest_offset - - {state, num_changes, total_size, _latest_written} = - Enum.reduce(actions, {state, 0, 0, nil}, fn - {:store, items}, {state, num_changes, total_size, _latest_written} -> - case write_items_to_log(state, items, opts) do - {state, nil, 0, 0} -> - {state, num_changes, total_size, nil} - - {state, _range, new_changes, new_size} -> - {state, num_changes + new_changes, total_size + new_size, state.latest_offset} - end - - {:control, message}, {state, num_changes, total_size, latest_written} -> - {state, size, count} = append_control_message_output(state, 0, 0, message) - {state, num_changes + count, total_size + size, latest_written} - - :start_move_in_query, {state, num_changes, total_size, latest_written} -> - start_move_in_query(state) - {state, num_changes, total_size, latest_written} - - {:truncate, xid}, {state, _num_changes, _total_size, _latest_written} -> - state = handle_txn_with_truncate(xid, state) - {state, 0, 0, nil} - - :shutdown, {state, _num_changes, _total_size, _latest_written} -> - Logger.warning( - "Subquery buffer overflow for #{state.shape_handle} - terminating shape" - ) + defp handle_apply_event_result(state, {:stop, reason}) do + state = handle_event_stop(state, reason) - {mark_for_removal(state), 0, 0, nil} - - {:flush, source_offset}, {state, num_changes, total_size, latest_written} -> - state = - if latest_written do - %{ - state - | txn_offset_mapping: - state.txn_offset_mapping ++ [{latest_written, source_offset}] - } - else - consider_flushed(state, source_offset) - end - - {state, num_changes, total_size, latest_written} - end) - - notification = - if state.latest_offset != previous_offset do - {{previous_offset, state.latest_offset}, state.latest_offset} - end - - {state, notification, num_changes, total_size} + if state.terminating? do + {:noreply, state, {:continue, :stop_and_clean}} + else + stop_and_clean(state) + end end - defp start_move_in_query(state) do - supervisor = Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) - Subqueries.query_move_in_async(supervisor, state, state.subquery_state, self()) + defp handle_apply_event_result(_old_state, {state, notification, _num_changes, _total_size}) do + if notification do + :ok = notify_new_changes(state, notification) + end + + {:noreply, state, state.hibernate_after} end - defp write_items_to_log(state, [], _opts), do: {state, nil, 0, 0} + defp apply_event(state, event) do + old_handler = state.event_handler - defp write_items_to_log(state, items, opts) do - previous_offset = state.latest_offset + case EventHandler.handle_event(old_handler, event) do + {:stop, reason} -> + {:stop, reason} - {lines, total_size, state} = - Enum.reduce(items, {[], 0, state}, fn item, {lines, size, state} -> - case item do - %QueryRow{key: key, json: json} -> - json = IO.iodata_to_binary(json) - offset = LogOffset.increment(state.latest_offset) - line = {offset, key, :insert, json} - {lines ++ [line], size + byte_size(json), %{state | latest_offset: offset}} + {:ok, new_handler, plan} -> + state = %{state | event_handler: new_handler} + sync_subquery_index(state, old_handler, new_handler) + previous_offset = state.latest_offset - change - when is_struct(change, Changes.NewRecord) or - is_struct(change, Changes.UpdatedRecord) or - is_struct(change, Changes.DeletedRecord) -> - {new_lines, line_size} = change_to_log_lines(change, opts[:default_xid], state.shape) - last_offset = new_lines |> List.last() |> elem(0) + result = PlanExecutor.execute(plan, state) - {lines ++ new_lines, size + line_size, %{state | latest_offset: last_offset}} - end - end) + notification = + if result.state.latest_offset != previous_offset do + {{previous_offset, result.state.latest_offset}, result.state.latest_offset} + end - writer = ShapeCache.Storage.append_to_log!(lines, state.writer) - state = %{state | writer: writer} - {state, {previous_offset, state.latest_offset}, length(lines), total_size} + {result.state, notification, result.num_changes, result.total_size} + end end - defp append_control_message_output(state, size, count, control_message) do - encoded = Jason.encode!(control_message) - - {{_, offset}, writer} = - ShapeCache.Storage.append_control_message!(encoded, state.writer) + defp handle_event_stop(state, {:truncate, xid}) do + handle_txn_with_truncate(xid, state) + end - {%{state | writer: writer, latest_offset: offset}, size + byte_size(encoded), count + 1} + defp handle_event_stop(state, :unsupported_subquery) do + mark_for_removal(state) end - defp change_to_log_lines(change, xid, shape) do - lines = - change - |> LogItems.from_change( - xid, - Shape.pk(shape, change.relation), - shape.replica - ) - |> Enum.map(fn {offset, %{key: key} = log_item} -> - {offset, key, log_item.headers.operation, Jason.encode!(log_item)} - end) + defp handle_event_stop(state, :buffer_overflow) do + Logger.warning("Subquery buffer overflow for #{state.shape_handle} - terminating shape") - size = Enum.reduce(lines, 0, fn {_, _, _, json}, acc -> acc + byte_size(json) end) - {lines, size} + mark_for_removal(state) end defp handle_txn_with_truncate(xid, state) do @@ -1201,37 +1118,47 @@ defmodule Electric.Shapes.Consumer do # Seed the reverse index with initial membership seed_subquery_index(state, views) - %{ - state - | subquery_state: - Subqueries.new( - shape: state.shape, - stack_id: state.stack_id, - shape_handle: state.shape_handle, - dnf_plan: dnf_plan, - views: views, - dependency_handle_to_ref: dep_handle_to_ref, - buffer_max_transactions: - Electric.StackConfig.lookup( - state.stack_id, - :subquery_buffer_max_transactions, - Electric.Config.default(:subquery_buffer_max_transactions) - ) + handler = + %EventHandler.Subqueries.Steady{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + dnf_plan: dnf_plan, + views: views, + dependency_handle_to_ref: dep_handle_to_ref, + buffer_max_transactions: + Electric.StackConfig.lookup( + state.stack_id, + :subquery_buffer_max_transactions, + Electric.Config.default(:subquery_buffer_max_transactions) ) - } + } + + %{state | event_handler: handler} _other -> - # :no_subqueries or {:error, _} - no subquery runtime needed - initialize_no_subqueries(state) + # :no_subqueries or {:error, _} - use unsupported handler since deps exist but plan failed + initialize_unsupported_subquery(state) end end - defp initialize_subquery_runtime(state), do: initialize_no_subqueries(state) + defp initialize_subquery_runtime(state), do: initialize_default_handler(state) + + defp initialize_default_handler(state) do + %{ + state + | event_handler: %EventHandler.Default{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle + } + } + end - defp initialize_no_subqueries(state) do + defp initialize_unsupported_subquery(state) do %{ state - | subquery_state: %NoSubqueries{ + | event_handler: %EventHandler.UnsupportedSubquery{ shape: state.shape, stack_id: state.stack_id, shape_handle: state.shape_handle @@ -1264,22 +1191,16 @@ defmodule Electric.Shapes.Consumer do end end - # Compare old and new subquery state views and update the index accordingly. - # For Steady states, we diff the exact views. - # For Steady->Buffering transitions, we use the conservative projection - # (union for positive, intersection for negated). - # For Buffering->Steady (splice), we diff from the buffering projection - # to the new steady exact view. - defp update_subquery_index_from_state_change(state, old_state, new_state) do + defp sync_subquery_index(state, old_handler, new_handler) do case SubqueryIndex.for_stack(state.stack_id) do nil -> :ok - index -> do_update_subquery_index(index, state, old_state, new_state) + index -> do_sync_subquery_index(index, state, old_handler, new_handler) end end - defp do_update_subquery_index(index, state, old_state, new_state) do - old_views = get_routing_views(old_state) - new_views = get_routing_views(new_state) + defp do_sync_subquery_index(index, state, old_handler, new_handler) do + old_views = EventHandler.routing_views(old_handler) + new_views = EventHandler.routing_views(new_handler) if old_views != new_views do root_table = state.shape.root_table @@ -1305,39 +1226,6 @@ defmodule Electric.Shapes.Consumer do :ok end - # Get the routing-relevant views from a subquery state. - # In steady state: exact views. - # In buffering: conservative projection (union for positive deps). - defp get_routing_views(%Subqueries.Steady{views: views}), do: views - - defp get_routing_views(%Subqueries.Buffering{ - views_before_move: before, - views_after_move: after_move, - dnf_plan: plan, - trigger_dep_index: trigger_dep - }) do - # For the triggering dependency, use union (conservative for positive) - # or intersection (conservative for negated) - polarity = Map.get(plan.dependency_polarities, trigger_dep, :positive) - - Map.merge(before, after_move, fn ref, before_view, after_view -> - dep_index = ref |> List.last() |> String.to_integer() - - if dep_index == trigger_dep do - case polarity do - :positive -> MapSet.union(before_view, after_view) - :negated -> MapSet.intersection(before_view, after_view) - end - else - # Non-triggering dependencies: use the current view (same in both) - after_view - end - end) - end - - defp get_routing_views(%NoSubqueries{}), do: %{} - defp get_routing_views(_), do: %{} - defp all_materializers_alive?(state) do Enum.all?(state.shape.shape_dependencies_handles, fn shape_handle -> name = Materializer.name(state.stack_id, shape_handle) diff --git a/packages/sync-service/lib/electric/shapes/consumer/effect.ex b/packages/sync-service/lib/electric/shapes/consumer/effect.ex new file mode 100644 index 0000000000..beb9a467e2 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/effect.ex @@ -0,0 +1,8 @@ +defmodule Electric.Shapes.Consumer.Effect do + @moduledoc false + + defmodule StartMoveInQuery do + @moduledoc false + defstruct [] + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex new file mode 100644 index 0000000000..012ae5beac --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex @@ -0,0 +1,22 @@ +defmodule Electric.Shapes.Consumer.EventHandler do + @moduledoc false + + alias Electric.Shapes.Consumer.Plan + + @type t() :: term() + + @callback handle_event(t(), term()) :: + {:ok, t(), Plan.t()} | {:stop, term()} + + @callback routing_views(t()) :: %{[String.t()] => MapSet.t()} + + @spec handle_event(t(), term()) :: {:ok, t(), Plan.t()} | {:stop, term()} + def handle_event(handler, event) do + handler.__struct__.handle_event(handler, event) + end + + @spec routing_views(t()) :: %{[String.t()] => MapSet.t()} + def routing_views(handler) do + handler.__struct__.routing_views(handler) + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex similarity index 65% rename from packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex rename to packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex index 65d11ee0a1..d95cfb00e5 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/no_subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex @@ -1,26 +1,32 @@ -defmodule Electric.Shapes.Consumer.Subqueries.NoSubqueries do +defmodule Electric.Shapes.Consumer.EventHandler.Default do @moduledoc false - @enforce_keys [:shape, :stack_id, :shape_handle] - defstruct [:shape, :stack_id, :shape_handle] - - @type t() :: %__MODULE__{ - shape: Electric.Shapes.Shape.t(), - stack_id: String.t(), - shape_handle: String.t() - } -end + @behaviour Electric.Shapes.Consumer.EventHandler -defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, - for: Electric.Shapes.Consumer.Subqueries.NoSubqueries do alias Electric.Replication.Changes alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.LogOp alias Electric.Shapes.Consumer.Materializer + alias Electric.Shapes.Consumer.Plan alias Electric.Shapes.Shape require Shape - def handle_event(state, %Transaction{xid: xid, changes: changes}) do + @enforce_keys [:shape, :stack_id, :shape_handle] + defstruct [:shape, :stack_id, :shape_handle] + + @type t() :: %__MODULE__{ + shape: Shape.t(), + stack_id: String.t(), + shape_handle: String.t() + } + + @impl true + def handle_event(state, %Transaction{ + xid: xid, + last_log_offset: last_log_offset, + changes: changes + }) do extra_refs = if Shape.are_deps_filled(state.shape) do refs = Materializer.get_all_as_refs(state.shape, state.stack_id) @@ -46,31 +52,41 @@ defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, end) case result do - {:truncate, xid} -> - {[{:truncate, xid}], state} + {:truncate, _xid} -> + {:stop, {:truncate, xid}} [] -> - {[], state} + {:ok, state, %Plan{ack_source_offset: last_log_offset}} changes -> - {[{:store, mark_last_change(changes)}], state} + plan = %Plan{ + log_ops: [%LogOp.AppendChanges{changes: mark_last_change(changes), default_xid: xid}], + ack_source_offset: last_log_offset + } + + {:ok, state, plan} end end - def handle_event(state, {:global_last_seen_lsn, _lsn}), do: {[], state} + def handle_event(state, {:global_last_seen_lsn, _lsn}) do + {:ok, state, %Plan{}} + end def handle_event(_state, {:pg_snapshot_known, _snapshot}) do - raise ArgumentError, "received {:pg_snapshot_known, snapshot} in NoSubqueries state" + raise ArgumentError, "received {:pg_snapshot_known, snapshot} in Default handler" end def handle_event(_state, {:query_move_in_complete, _rows, _move_in_lsn}) do - raise ArgumentError, "received {:query_move_in_complete, ...} in NoSubqueries state" + raise ArgumentError, "received {:query_move_in_complete, ...} in Default handler" end def handle_event(_state, {:materializer_changes, _dep_handle, _payload}) do - raise ArgumentError, "received {:materializer_changes, ...} in NoSubqueries state" + raise ArgumentError, "received {:materializer_changes, ...} in Default handler" end + @impl true + def routing_views(_state), do: %{} + defp mark_last_change([]), do: [] defp mark_last_change(changes) do diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex similarity index 57% rename from packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex rename to packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex index b3be36fcbc..09e98da55e 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex @@ -1,13 +1,19 @@ -defmodule Electric.Shapes.Consumer.Subqueries.Buffering do +defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do @moduledoc false + @behaviour Electric.Shapes.Consumer.EventHandler + alias Electric.Postgres.Lsn alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Steady + alias Electric.Shapes.Consumer.LogOp + alias Electric.Shapes.Consumer.Plan alias Electric.Shapes.Consumer.Subqueries alias Electric.Shapes.Consumer.Subqueries.MoveQueue - alias Electric.Shapes.Consumer.Subqueries.Steady alias Electric.Shapes.DnfPlan + require Logger + @enforce_keys [ :shape, :stack_id, @@ -36,7 +42,6 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do boundary_txn_count: nil, buffered_txns: [], queue: MoveQueue.new(), - query_started?: false, buffer_max_transactions: 1000 ] @@ -44,7 +49,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do shape: Electric.Shapes.Shape.t(), stack_id: String.t(), shape_handle: String.t(), - dnf_plan: Electric.Shapes.DnfPlan.t(), + dnf_plan: DnfPlan.t(), trigger_dep_index: non_neg_integer(), move_in_values: [Subqueries.move_value()], views_before_move: %{[String.t()] => MapSet.t()}, @@ -57,7 +62,6 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do boundary_txn_count: non_neg_integer() | nil, buffered_txns: [Transaction.t()], queue: MoveQueue.t(), - query_started?: boolean(), buffer_max_transactions: pos_integer() } @@ -100,97 +104,156 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do } end - @spec maybe_buffer_boundary_from_txn(t(), Transaction.t()) :: t() - def maybe_buffer_boundary_from_txn(%__MODULE__{boundary_txn_count: boundary} = state, _txn) - when not is_nil(boundary), - do: state - - def maybe_buffer_boundary_from_txn(%__MODULE__{snapshot: nil} = state, _txn), do: state - - def maybe_buffer_boundary_from_txn(%__MODULE__{} = state, %Transaction{} = txn) do - if Transaction.visible_in_snapshot?(txn, state.snapshot) do + @impl true + def handle_event(state, %Transaction{} = txn) do + next_state = state + |> maybe_buffer_boundary_from_txn(txn) + |> Map.update!(:buffered_txns, &[txn | &1]) + + if length(next_state.buffered_txns) > next_state.buffer_max_transactions do + {:stop, :buffer_overflow} else - %{state | boundary_txn_count: length(state.buffered_txns)} + maybe_splice(next_state, txn.last_log_offset) end end - @spec maybe_buffer_boundary_from_snapshot(t()) :: t() - def maybe_buffer_boundary_from_snapshot(%__MODULE__{boundary_txn_count: boundary} = state) - when not is_nil(boundary), - do: state + def handle_event(state, {:global_last_seen_lsn, lsn}) do + lsn = Subqueries.normalize_global_lsn(lsn) - def maybe_buffer_boundary_from_snapshot(%__MODULE__{snapshot: nil} = state), do: state + state + |> Map.put(:latest_seen_lsn, lsn) + |> maybe_buffer_boundary_from_lsn(lsn) + |> maybe_splice(nil) + end - def maybe_buffer_boundary_from_snapshot(%__MODULE__{} = state) do - case state.buffered_txns - |> Enum.reverse() - |> Enum.find_index(&(not Transaction.visible_in_snapshot?(&1, state.snapshot))) do - nil -> state - index -> %{state | boundary_txn_count: index} - end + def handle_event(state, {:materializer_changes, dep_handle, payload}) do + :ok = Subqueries.validate_dependency_handle!(state, dep_handle) + {dep_index, subquery_ref} = Map.fetch!(state.dependency_handle_to_ref, dep_handle) + dep_view = Map.get(state.views_after_move, subquery_ref, MapSet.new()) + + next_state = + Map.update!( + state, + :queue, + &MoveQueue.enqueue(&1, dep_index, payload, dep_view) + ) + + {:ok, next_state, %Plan{}} end - @spec maybe_buffer_boundary_from_lsn(t(), Lsn.t()) :: t() - def maybe_buffer_boundary_from_lsn(%__MODULE__{boundary_txn_count: boundary} = state, _lsn) - when not is_nil(boundary), - do: state + def handle_event(%{snapshot: snapshot}, {:pg_snapshot_known, _new_snapshot}) + when not is_nil(snapshot) do + raise ArgumentError, "received {:pg_snapshot_known, snapshot} more than once for one move-in" + end - def maybe_buffer_boundary_from_lsn(%__MODULE__{move_in_lsn: nil} = state, _lsn), do: state + def handle_event(state, {:pg_snapshot_known, snapshot}) do + state + |> Map.put(:snapshot, snapshot) + |> maybe_buffer_boundary_from_snapshot() + |> maybe_splice(nil) + end - def maybe_buffer_boundary_from_lsn(%__MODULE__{} = state, %Lsn{} = lsn) do - case Lsn.compare(lsn, state.move_in_lsn) do - :lt -> state - _ -> %{state | boundary_txn_count: length(state.buffered_txns)} - end + def handle_event(%{move_in_rows: rows}, {:query_move_in_complete, _new_rows, _move_in_lsn}) + when not is_nil(rows) do + raise ArgumentError, + "received {:query_move_in_complete, rows, move_in_lsn} more than once for one move-in" end - @spec maybe_buffer_boundary_from_seen_lsn(t()) :: t() - def maybe_buffer_boundary_from_seen_lsn(%__MODULE__{latest_seen_lsn: nil} = state), do: state + def handle_event(state, {:query_move_in_complete, rows, move_in_lsn}) do + state + |> Map.put(:move_in_rows, rows) + |> Map.put(:move_in_lsn, move_in_lsn) + |> maybe_buffer_boundary_from_seen_lsn() + |> maybe_splice(nil) + end - def maybe_buffer_boundary_from_seen_lsn(%__MODULE__{} = state) do - maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) + @impl true + def routing_views(%__MODULE__{ + views_before_move: before, + views_after_move: after_move, + dnf_plan: plan, + trigger_dep_index: trigger_dep + }) do + polarity = Map.get(plan.dependency_polarities, trigger_dep, :positive) + + Map.merge(before, after_move, fn ref, before_view, after_view -> + dep_index = ref |> List.last() |> String.to_integer() + + if dep_index == trigger_dep do + case polarity do + :positive -> MapSet.union(before_view, after_view) + :negated -> MapSet.intersection(before_view, after_view) + end + else + after_view + end + end) end - @spec maybe_splice(t()) :: {[Subqueries.sm_action()], Subqueries.StateMachine.t()} - def maybe_splice(%__MODULE__{} = state) do - if ready_to_splice?(state) do - {pre_txns, post_txns} = - state.buffered_txns |> Enum.reverse() |> Enum.split(state.boundary_txn_count) - - move_in_broadcast = - DnfPlan.make_move_in_broadcast( - state.dnf_plan, - state.trigger_dep_index, - state.move_in_values, - state.stack_id, - state.shape_handle - ) - - actions = - store_action(pre_txns, state, state.views_before_move) ++ - [{:control, move_in_broadcast}] ++ - store_rows_action(state.move_in_rows) ++ - [{:control, snapshot_end_control_message()}] ++ - store_action(post_txns, state, state.views_after_move) + # -- Splice logic -- - state - |> to_steady_state() - |> Steady.drain_queue(actions) + defp maybe_splice(state, last_txn_offset) do + if ready_to_splice?(state) do + do_splice(state, last_txn_offset) else - {[], state} + ack = + if last_txn_offset do + # Buffered txns still need their source offsets acknowledged + # (the ack will be deferred until splice via the buffered txn offsets) + nil + end + + {:ok, state, %Plan{ack_source_offset: ack}} end end - defp store_action(txns, state, views) do + defp do_splice(state, _last_txn_offset) do + {pre_txns, post_txns} = + state.buffered_txns |> Enum.reverse() |> Enum.split(state.boundary_txn_count) + + move_in_broadcast = + DnfPlan.make_move_in_broadcast( + state.dnf_plan, + state.trigger_dep_index, + state.move_in_values, + state.stack_id, + state.shape_handle + ) + + log_ops = + store_op(pre_txns, state, state.views_before_move) ++ + [%LogOp.AppendControl{message: move_in_broadcast}] ++ + move_in_snapshot_op(state.move_in_rows) ++ + store_op(post_txns, state, state.views_after_move) + + # The ack_source_offset should be the last buffered transaction's offset + # (or the last_txn_offset if there were txns) + all_txns = pre_txns ++ post_txns + + ack_offset = + case all_txns do + [] -> nil + txns -> txns |> List.last() |> Map.fetch!(:last_log_offset) + end + + plan = %Plan{log_ops: log_ops, ack_source_offset: ack_offset} + + # Transition back to steady state, then drain any queued moves + state + |> to_steady_state() + |> Steady.drain_queue(plan) + end + + defp store_op(txns, state, views) do case Enum.flat_map(txns, &Subqueries.convert_transaction(&1, state, views)) do [] -> [] - changes -> [{:store, changes}] + changes -> [%LogOp.AppendChanges{changes: changes}] end end - defp store_rows_action([]), do: [] - defp store_rows_action(rows), do: [{:store, rows}] + defp move_in_snapshot_op([]), do: [] + defp move_in_snapshot_op(rows), do: [%LogOp.AppendMoveInSnapshot{rows: rows}] defp ready_to_splice?(%__MODULE__{} = state) do not is_nil(state.snapshot) and not is_nil(state.move_in_rows) and @@ -211,96 +274,65 @@ defmodule Electric.Shapes.Consumer.Subqueries.Buffering do } end - defp snapshot_end_control_message do - %{headers: %{control: "snapshot-end"}} - end - - defp apply_dependency_move(subquery_view, move_in_values, :move_in) do - add_move_in_values(subquery_view, move_in_values) - end - - defp apply_dependency_move(subquery_view, move_in_values, :move_out) do - remove_move_values(subquery_view, move_in_values) - end + # -- Boundary computation -- - defp add_move_in_values(subquery_view, move_in_values) do - Enum.reduce(move_in_values, subquery_view, fn {value, _original_value}, view -> - MapSet.put(view, value) - end) - end + defp maybe_buffer_boundary_from_txn(%{boundary_txn_count: boundary} = state, _txn) + when not is_nil(boundary), + do: state - defp remove_move_values(subquery_view, move_values) do - Enum.reduce(move_values, subquery_view, fn {value, _original_value}, view -> - MapSet.delete(view, value) - end) - end -end - -defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, - for: Electric.Shapes.Consumer.Subqueries.Buffering do - alias Electric.Replication.Changes.Transaction - alias Electric.Shapes.Consumer.Subqueries - alias Electric.Shapes.Consumer.Subqueries.Buffering - alias Electric.Shapes.Consumer.Subqueries.MoveQueue + defp maybe_buffer_boundary_from_txn(%{snapshot: nil} = state, _txn), do: state - def handle_event(state, %Transaction{} = txn) do - next_state = + defp maybe_buffer_boundary_from_txn(state, %Transaction{} = txn) do + if Transaction.visible_in_snapshot?(txn, state.snapshot) do state - |> Buffering.maybe_buffer_boundary_from_txn(txn) - |> Map.update!(:buffered_txns, &[txn | &1]) - - if length(next_state.buffered_txns) > next_state.buffer_max_transactions do - {[:shutdown], next_state} else - Buffering.maybe_splice(next_state) + %{state | boundary_txn_count: length(state.buffered_txns)} end end - def handle_event(state, {:global_last_seen_lsn, lsn}) do - lsn = Subqueries.normalize_global_lsn(lsn) + defp maybe_buffer_boundary_from_snapshot(%{boundary_txn_count: boundary} = state) + when not is_nil(boundary), + do: state - state - |> Map.put(:latest_seen_lsn, lsn) - |> Buffering.maybe_buffer_boundary_from_lsn(lsn) - |> Buffering.maybe_splice() + defp maybe_buffer_boundary_from_snapshot(%{snapshot: nil} = state), do: state + + defp maybe_buffer_boundary_from_snapshot(state) do + case state.buffered_txns + |> Enum.reverse() + |> Enum.find_index(&(not Transaction.visible_in_snapshot?(&1, state.snapshot))) do + nil -> state + index -> %{state | boundary_txn_count: index} + end end - def handle_event(state, {:materializer_changes, dep_handle, payload}) do - :ok = Subqueries.validate_dependency_handle!(state, dep_handle) - {dep_index, subquery_ref} = Map.fetch!(state.dependency_handle_to_ref, dep_handle) - dep_view = Map.get(state.views_after_move, subquery_ref, MapSet.new()) + defp maybe_buffer_boundary_from_lsn(%{boundary_txn_count: boundary} = state, _lsn) + when not is_nil(boundary), + do: state - {[], - Map.update!( - state, - :queue, - &MoveQueue.enqueue(&1, dep_index, payload, dep_view) - )} - end + defp maybe_buffer_boundary_from_lsn(%{move_in_lsn: nil} = state, _lsn), do: state - def handle_event(%{snapshot: snapshot}, {:pg_snapshot_known, _new_snapshot}) - when not is_nil(snapshot) do - raise ArgumentError, "received {:pg_snapshot_known, snapshot} more than once for one move-in" + defp maybe_buffer_boundary_from_lsn(state, %Lsn{} = lsn) do + case Lsn.compare(lsn, state.move_in_lsn) do + :lt -> state + _ -> %{state | boundary_txn_count: length(state.buffered_txns)} + end end - def handle_event(state, {:pg_snapshot_known, snapshot}) do - state - |> Map.put(:snapshot, snapshot) - |> Buffering.maybe_buffer_boundary_from_snapshot() - |> Buffering.maybe_splice() + defp maybe_buffer_boundary_from_seen_lsn(%{latest_seen_lsn: nil} = state), do: state + + defp maybe_buffer_boundary_from_seen_lsn(state) do + maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) end - def handle_event(%{move_in_rows: rows}, {:query_move_in_complete, _new_rows, _move_in_lsn}) - when not is_nil(rows) do - raise ArgumentError, - "received {:query_move_in_complete, rows, move_in_lsn} more than once for one move-in" + defp apply_dependency_move(subquery_view, move_in_values, :move_in) do + Enum.reduce(move_in_values, subquery_view, fn {value, _original_value}, view -> + MapSet.put(view, value) + end) end - def handle_event(state, {:query_move_in_complete, rows, move_in_lsn}) do - state - |> Map.put(:move_in_rows, rows) - |> Map.put(:move_in_lsn, move_in_lsn) - |> Buffering.maybe_buffer_boundary_from_seen_lsn() - |> Buffering.maybe_splice() + defp apply_dependency_move(subquery_view, move_in_values, :move_out) do + Enum.reduce(move_in_values, subquery_view, fn {value, _original_value}, view -> + MapSet.delete(view, value) + end) end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex similarity index 66% rename from packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex rename to packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex index a8e3d8a8a5..5af13b4645 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/steady.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex @@ -1,10 +1,14 @@ -defmodule Electric.Shapes.Consumer.Subqueries.Steady do +defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do @moduledoc false + @behaviour Electric.Shapes.Consumer.EventHandler + + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering + alias Electric.Shapes.Consumer.LogOp + alias Electric.Shapes.Consumer.Plan alias Electric.Shapes.Consumer.Subqueries - alias Electric.Shapes.Consumer.Subqueries.Buffering alias Electric.Shapes.Consumer.Subqueries.MoveQueue - alias Electric.Shapes.Consumer.Subqueries.StateMachine alias Electric.Shapes.DnfPlan @enforce_keys [:shape, :stack_id, :shape_handle, :dnf_plan, :dependency_handle_to_ref] @@ -24,7 +28,7 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do shape: Electric.Shapes.Shape.t(), stack_id: String.t(), shape_handle: String.t(), - dnf_plan: Electric.Shapes.DnfPlan.t(), + dnf_plan: DnfPlan.t(), views: %{[String.t()] => MapSet.t()}, dependency_handle_to_ref: %{String.t() => {non_neg_integer(), [String.t()]}}, latest_seen_lsn: Electric.Postgres.Lsn.t() | nil, @@ -32,12 +36,54 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do buffer_max_transactions: pos_integer() } - @spec drain_queue(t(), [Subqueries.sm_action()]) :: - {[Subqueries.sm_action()], StateMachine.t()} - def drain_queue(%__MODULE__{} = state, actions \\ []) do + @impl true + def handle_event(state, %Transaction{} = txn) do + case Subqueries.convert_transaction(txn, state, state.views) do + [] -> + {:ok, state, %Plan{ack_source_offset: txn.last_log_offset}} + + changes -> + plan = %Plan{ + log_ops: [%LogOp.AppendChanges{changes: changes, default_xid: txn.xid}], + ack_source_offset: txn.last_log_offset + } + + {:ok, state, plan} + end + end + + def handle_event(state, {:global_last_seen_lsn, lsn}) do + {:ok, %{state | latest_seen_lsn: Subqueries.normalize_global_lsn(lsn)}, %Plan{}} + end + + def handle_event(state, {:materializer_changes, dep_handle, payload}) do + :ok = Subqueries.validate_dependency_handle!(state, dep_handle) + {dep_index, subquery_ref} = Map.fetch!(state.dependency_handle_to_ref, dep_handle) + dep_view = Map.get(state.views, subquery_ref, MapSet.new()) + + state + |> Map.update!(:queue, &MoveQueue.enqueue(&1, dep_index, payload, dep_view)) + |> drain_queue(%Plan{}) + end + + def handle_event(_state, {:pg_snapshot_known, _snapshot}) do + raise ArgumentError, "received {:pg_snapshot_known, snapshot} while no move-in is buffering" + end + + def handle_event(_state, {:query_move_in_complete, _rows, _move_in_lsn}) do + raise ArgumentError, + "received {:query_move_in_complete, rows, move_in_lsn} while no move-in is buffering" + end + + @impl true + def routing_views(%__MODULE__{views: views}), do: views + + @spec drain_queue(t(), Plan.t()) :: + {:ok, t() | Buffering.t(), Plan.t()} | {:stop, term()} + def drain_queue(%__MODULE__{} = state, %Plan{} = plan) do case MoveQueue.pop_next(state.queue) do nil -> - {actions, state} + {:ok, state, plan} {{:move_out, dep_index, move_out_values}, queue} -> subquery_ref = dep_ref_for_index(state, dep_index) @@ -61,18 +107,26 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do state.shape_handle ) - drain_queue(next_state, actions ++ [{:control, broadcast}]) + plan = %{plan | log_ops: plan.log_ops ++ [%LogOp.AppendControl{message: broadcast}]} + drain_queue(next_state, plan) :move_in -> - {actions, - Buffering.from_steady( - state, - dep_index, - subquery_ref, - move_out_values, - queue, - :move_out - )} + buffering = + Buffering.from_steady( + state, + dep_index, + subquery_ref, + move_out_values, + queue, + :move_out + ) + + plan = %{ + plan + | effects: plan.effects ++ [%Electric.Shapes.Consumer.Effect.StartMoveInQuery{}] + } + + {:ok, buffering, plan} end {{:move_in, dep_index, move_in_values}, queue} -> @@ -81,15 +135,22 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do case effect do :move_in -> - {actions, - Buffering.from_steady( - state, - dep_index, - subquery_ref, - move_in_values, - queue, - :move_in - )} + buffering = + Buffering.from_steady( + state, + dep_index, + subquery_ref, + move_in_values, + queue, + :move_in + ) + + plan = %{ + plan + | effects: plan.effects ++ [%Electric.Shapes.Consumer.Effect.StartMoveInQuery{}] + } + + {:ok, buffering, plan} :move_out -> next_state = %{ @@ -108,7 +169,8 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do state.shape_handle ) - drain_queue(next_state, actions ++ [{:control, broadcast}]) + plan = %{plan | log_ops: plan.log_ops ++ [%LogOp.AppendControl{message: broadcast}]} + drain_queue(next_state, plan) end end end @@ -132,41 +194,3 @@ defmodule Electric.Shapes.Consumer.Subqueries.Steady do end) end end - -defimpl Electric.Shapes.Consumer.Subqueries.StateMachine, - for: Electric.Shapes.Consumer.Subqueries.Steady do - alias Electric.Replication.Changes.Transaction - alias Electric.Shapes.Consumer.Subqueries - alias Electric.Shapes.Consumer.Subqueries.MoveQueue - alias Electric.Shapes.Consumer.Subqueries.Steady - - def handle_event(state, %Transaction{} = txn) do - case Subqueries.convert_transaction(txn, state, state.views) do - [] -> {[], state} - changes -> {[{:store, changes}], state} - end - end - - def handle_event(state, {:global_last_seen_lsn, lsn}) do - {[], %{state | latest_seen_lsn: Subqueries.normalize_global_lsn(lsn)}} - end - - def handle_event(state, {:materializer_changes, dep_handle, payload}) do - :ok = Subqueries.validate_dependency_handle!(state, dep_handle) - {dep_index, subquery_ref} = Map.fetch!(state.dependency_handle_to_ref, dep_handle) - dep_view = Map.get(state.views, subquery_ref, MapSet.new()) - - state - |> Map.update!(:queue, &MoveQueue.enqueue(&1, dep_index, payload, dep_view)) - |> Steady.drain_queue() - end - - def handle_event(_state, {:pg_snapshot_known, _snapshot}) do - raise ArgumentError, "received {:pg_snapshot_known, snapshot} while no move-in is buffering" - end - - def handle_event(_state, {:query_move_in_complete, _rows, _move_in_lsn}) do - raise ArgumentError, - "received {:query_move_in_complete, rows, move_in_lsn} while no move-in is buffering" - end -end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex new file mode 100644 index 0000000000..d7c2e2f7ca --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex @@ -0,0 +1,52 @@ +defmodule Electric.Shapes.Consumer.EventHandler.UnsupportedSubquery do + @moduledoc false + + @behaviour Electric.Shapes.Consumer.EventHandler + + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Plan + + @enforce_keys [:shape, :stack_id, :shape_handle] + defstruct [:shape, :stack_id, :shape_handle] + + @type t() :: %__MODULE__{ + shape: Electric.Shapes.Shape.t(), + stack_id: String.t(), + shape_handle: String.t() + } + + @impl true + def handle_event(state, %Transaction{} = txn) do + # Unsupported subquery shapes can still process transactions via the + # default path - they just can't handle dependency events. + delegate = %Electric.Shapes.Consumer.EventHandler.Default{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle + } + + case Electric.Shapes.Consumer.EventHandler.Default.handle_event(delegate, txn) do + {:ok, _default_handler, plan} -> {:ok, state, plan} + {:stop, reason} -> {:stop, reason} + end + end + + def handle_event(_state, {:materializer_changes, _dep_handle, _payload}) do + {:stop, :unsupported_subquery} + end + + def handle_event(_state, {:pg_snapshot_known, _snapshot}) do + {:stop, :unsupported_subquery} + end + + def handle_event(_state, {:query_move_in_complete, _rows, _move_in_lsn}) do + {:stop, :unsupported_subquery} + end + + def handle_event(state, {:global_last_seen_lsn, _lsn}) do + {:ok, state, %Plan{}} + end + + @impl true + def routing_views(_state), do: %{} +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/log_op.ex b/packages/sync-service/lib/electric/shapes/consumer/log_op.ex new file mode 100644 index 0000000000..0f272ecadb --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/log_op.ex @@ -0,0 +1,18 @@ +defmodule Electric.Shapes.Consumer.LogOp do + @moduledoc false + + defmodule AppendChanges do + @moduledoc false + defstruct changes: [], default_xid: nil + end + + defmodule AppendControl do + @moduledoc false + defstruct message: nil + end + + defmodule AppendMoveInSnapshot do + @moduledoc false + defstruct rows: [] + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/plan.ex b/packages/sync-service/lib/electric/shapes/consumer/plan.ex new file mode 100644 index 0000000000..90817257e6 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/plan.ex @@ -0,0 +1,19 @@ +defmodule Electric.Shapes.Consumer.Plan do + @moduledoc false + + defstruct log_ops: [], effects: [], ack_source_offset: nil + + @type t() :: %__MODULE__{ + log_ops: [log_op()], + effects: [effect()], + ack_source_offset: Electric.Replication.LogOffset.t() | nil + } + + @type log_op() :: + %Electric.Shapes.Consumer.LogOp.AppendChanges{} + | %Electric.Shapes.Consumer.LogOp.AppendControl{} + | %Electric.Shapes.Consumer.LogOp.AppendMoveInSnapshot{} + + @type effect() :: + %Electric.Shapes.Consumer.Effect.StartMoveInQuery{} +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex new file mode 100644 index 0000000000..2e90b71382 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex @@ -0,0 +1,186 @@ +defmodule Electric.Shapes.Consumer.PlanExecutor do + @moduledoc false + + alias Electric.Shapes.Consumer.Effect + alias Electric.Shapes.Consumer.LogOp + alias Electric.Shapes.Consumer.Plan + alias Electric.Shapes.Consumer.Subqueries + alias Electric.Shapes.Consumer.Subqueries.QueryRow + alias Electric.LogItems + alias Electric.Replication.LogOffset + alias Electric.ShapeCache + alias Electric.Shapes.Shape + + require Logger + + @type execution_result() :: %{ + state: term(), + num_changes: non_neg_integer(), + total_size: non_neg_integer(), + latest_written: LogOffset.t() | nil + } + + @spec execute(Plan.t(), term(), keyword()) :: execution_result() + def execute(%Plan{} = plan, state, _opts \\ []) do + acc = %{state: state, num_changes: 0, total_size: 0, latest_written: nil} + + acc = + Enum.reduce(plan.log_ops, acc, fn log_op, acc -> + execute_log_op(log_op, acc) + end) + + acc = + Enum.reduce(plan.effects, acc, fn effect, acc -> + execute_effect(effect, acc) + end) + + apply_ack(acc, plan.ack_source_offset) + end + + # -- Log ops -- + + defp execute_log_op(%LogOp.AppendChanges{changes: [], default_xid: _}, acc), do: acc + + defp execute_log_op(%LogOp.AppendChanges{changes: changes, default_xid: xid}, acc) do + state = acc.state + + {lines, total_size, state} = + Enum.reduce(changes, {[], 0, state}, fn change, {lines, size, state} -> + {new_lines, line_size} = change_to_log_lines(change, xid, state.shape) + last_offset = new_lines |> List.last() |> elem(0) + {lines ++ new_lines, size + line_size, %{state | latest_offset: last_offset}} + end) + + writer = ShapeCache.Storage.append_to_log!(lines, state.writer) + state = %{state | writer: writer} + + %{ + acc + | state: state, + num_changes: acc.num_changes + length(lines), + total_size: acc.total_size + total_size, + latest_written: state.latest_offset + } + end + + defp execute_log_op(%LogOp.AppendControl{message: message}, acc) do + state = acc.state + encoded = Jason.encode!(message) + + {{_, offset}, writer} = + ShapeCache.Storage.append_control_message!(encoded, state.writer) + + state = %{state | writer: writer, latest_offset: offset} + + %{ + acc + | state: state, + num_changes: acc.num_changes + 1, + total_size: acc.total_size + byte_size(encoded) + } + end + + defp execute_log_op(%LogOp.AppendMoveInSnapshot{rows: []}, acc), do: acc + + defp execute_log_op(%LogOp.AppendMoveInSnapshot{rows: rows}, acc) do + state = acc.state + + {lines, total_size, state} = + Enum.reduce(rows, {[], 0, state}, fn %QueryRow{key: key, json: json}, + {lines, size, state} -> + json = IO.iodata_to_binary(json) + offset = LogOffset.increment(state.latest_offset) + line = {offset, key, :insert, json} + {lines ++ [line], size + byte_size(json), %{state | latest_offset: offset}} + end) + + writer = ShapeCache.Storage.append_to_log!(lines, state.writer) + state = %{state | writer: writer} + + # Append the trailing snapshot-end control message + snapshot_end = Jason.encode!(%{headers: %{control: "snapshot-end"}}) + + {{_, offset}, writer} = + ShapeCache.Storage.append_control_message!(snapshot_end, state.writer) + + state = %{state | writer: writer, latest_offset: offset} + + %{ + acc + | state: state, + num_changes: acc.num_changes + length(lines) + 1, + total_size: acc.total_size + total_size + byte_size(snapshot_end), + latest_written: state.latest_offset + } + end + + # -- Effects -- + + defp execute_effect(%Effect.StartMoveInQuery{}, acc) do + state = acc.state + supervisor = Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) + Subqueries.query_move_in_async(supervisor, state, state.event_handler, self()) + acc + end + + # -- Ack -- + + defp apply_ack(acc, nil), do: acc + + defp apply_ack(acc, source_offset) do + state = acc.state + + state = + if acc.latest_written do + %{ + state + | txn_offset_mapping: state.txn_offset_mapping ++ [{acc.latest_written, source_offset}] + } + else + consider_flushed(state, source_offset) + end + + %{acc | state: state} + end + + defp consider_flushed(state, log_offset) do + alias Electric.Replication.ShapeLogCollector + + if state.txn_offset_mapping == [] do + ShapeLogCollector.notify_flushed(state.stack_id, state.shape_handle, log_offset) + state + else + new_boundary = log_offset + + {head, tail} = + Enum.split_while( + state.txn_offset_mapping, + &(LogOffset.compare(elem(&1, 1), new_boundary) == :lt) + ) + + case Enum.reverse(head) do + [] -> + state + + [{offset, _} | rest] -> + %{state | txn_offset_mapping: Enum.reverse([{offset, new_boundary} | rest], tail)} + end + end + end + + defp change_to_log_lines(change, xid, shape) do + lines = + change + |> LogItems.from_change( + xid, + Shape.pk(shape, change.relation), + shape.replica + ) + |> Enum.map(fn {offset, %{key: key} = log_item} -> + {offset, key, log_item.headers.operation, Jason.encode!(log_item)} + end) + + size = Enum.reduce(lines, 0, fn {_, _, _, json}, acc -> acc + byte_size(json) end) + {lines, size} + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/state.ex b/packages/sync-service/lib/electric/shapes/consumer/state.ex index cca4e83a11..02df15418b 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/state.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/state.ex @@ -22,7 +22,7 @@ defmodule Electric.Shapes.Consumer.State do :storage, :writer, initial_snapshot_state: InitialSnapshot.new(nil), - subquery_state: nil, + event_handler: nil, transaction_builder: TransactionBuilder.new(), buffer: [], txn_offset_mapping: [], diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 8a669404bf..4ed9208a60 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -8,11 +8,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do alias Electric.Replication.Changes.Transaction alias Electric.Replication.Eval alias Electric.Replication.Eval.Walker - alias Electric.Shapes.Consumer.Subqueries.Buffering - alias Electric.Shapes.Consumer.Subqueries.MoveQueue alias Electric.Shapes.Consumer.Subqueries.QueryRow - alias Electric.Shapes.Consumer.Subqueries.Steady - alias Electric.Shapes.Consumer.Subqueries.StateMachine alias Electric.Shapes.DnfPlan alias Electric.Shapes.Querying alias Electric.Shapes.Shape @@ -21,94 +17,21 @@ defmodule Electric.Shapes.Consumer.Subqueries do @null_sentinel "NULL" @type move_value() :: {term(), term()} - @type queue_op() :: {:move_in, move_value()} | {:move_out, move_value()} @type move_out_control() :: %{headers: %{event: String.t(), patterns: [map()]}} - @type sm_action() :: - {:store, [Changes.change() | QueryRow.t()]} - | {:control, map()} - | {:truncate, term()} - | :shutdown - @type action() :: - sm_action() | :start_move_in_query | {:flush, term()} def value_prefix, do: @value_prefix def null_sentinel, do: @null_sentinel - @spec new(keyword() | map()) :: Steady.t() - def new(opts) when is_list(opts) or is_map(opts) do - opts = Map.new(opts) - - stack_id = fetch_opt!(opts, :stack_id) - - %Steady{ - shape: fetch_opt!(opts, :shape), - stack_id: stack_id, - shape_handle: fetch_opt!(opts, :shape_handle), - dnf_plan: fetch_opt!(opts, :dnf_plan), - views: Map.get(opts, :views, %{}), - dependency_handle_to_ref: Map.get(opts, :dependency_handle_to_ref, %{}), - latest_seen_lsn: Map.get(opts, :latest_seen_lsn), - queue: MoveQueue.new(), - buffer_max_transactions: - Map.get(opts, :buffer_max_transactions, - Electric.Config.default(:subquery_buffer_max_transactions) - ) - } - end - - @spec handle_event(StateMachine.t(), term()) :: - {[action()], StateMachine.t()} - def handle_event(state, event) do - {actions, new_state} = StateMachine.handle_event(state, event) - actions = actions ++ flush_actions(state, new_state, event) - maybe_start_move_in_query(actions, new_state) - end - - # Same Buffering state (same move_in_values before and after) → no flush needed - defp flush_actions( - %Buffering{move_in_values: move_in_values}, - %Buffering{move_in_values: move_in_values}, - _event - ), - do: [] - - # Was Buffering, now different (splice happened or new move-in) → flush based on buffered txns - defp flush_actions(%Buffering{} = prev_state, _new_state, event) do - buffered_txns = - case event do - %Transaction{} = txn -> prev_state.buffered_txns ++ [txn] - _ -> prev_state.buffered_txns - end - - case buffered_txns do - [] -> [] - txns -> [{:flush, txns |> List.last() |> Map.fetch!(:last_log_offset)}] - end - end - - # Steady + Transaction → flush with txn offset - defp flush_actions(_prev_state, _new_state, %Transaction{last_log_offset: last_log_offset}), - do: [{:flush, last_log_offset}] - - # Everything else → no flush - defp flush_actions(_prev_state, _new_state, _event), do: [] - - defp maybe_start_move_in_query(actions, %Buffering{query_started?: false} = state) do - {actions ++ [:start_move_in_query], %{state | query_started?: true}} - end - - defp maybe_start_move_in_query(actions, state), do: {actions, state} - @spec normalize_global_lsn(Electric.Postgres.Lsn.t() | non_neg_integer()) :: Electric.Postgres.Lsn.t() def normalize_global_lsn(%Lsn{} = lsn), do: lsn def normalize_global_lsn(lsn) when is_integer(lsn), do: Lsn.from_integer(lsn) - @spec query_move_in_async(pid() | atom(), map(), StateMachine.t(), pid()) :: :ok + @spec query_move_in_async(pid() | atom(), map(), term(), pid()) :: :ok def query_move_in_async( supervisor, consumer_state, - %Buffering{} = buffering_state, + buffering_state, consumer_pid ) do {where, params} = @@ -199,9 +122,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> then(&{tag_structure, &1}) end - @spec convert_transaction(Transaction.t(), StateMachine.t(), map()) :: [ - Changes.change() - ] + @spec convert_transaction(Transaction.t(), term(), map()) :: [Changes.change()] def convert_transaction(%Transaction{changes: changes}, %{shape: shape} = state, views) do changes |> Enum.flat_map(fn change -> @@ -215,7 +136,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> mark_last_change() end - @spec validate_dependency_handle!(StateMachine.t(), term()) :: :ok + @spec validate_dependency_handle!(term(), term()) :: :ok def validate_dependency_handle!(%{dependency_handle_to_ref: mapping}, dep_handle) do unless Map.has_key?(mapping, dep_handle) do raise ArgumentError, @@ -226,9 +147,7 @@ defmodule Electric.Shapes.Consumer.Subqueries do :ok end - @spec make_move_out_control_message(StateMachine.t(), non_neg_integer(), [ - move_value() - ]) :: + @spec make_move_out_control_message(term(), non_neg_integer(), [move_value()]) :: move_out_control() def make_move_out_control_message( %{dnf_plan: dnf_plan, stack_id: stack_id, shape_handle: shape_handle}, @@ -274,11 +193,4 @@ defmodule Electric.Shapes.Consumer.Subqueries do {last, rest} = List.pop_at(changes, -1) rest ++ [%{last | last?: true}] end - - defp fetch_opt!(opts, key) do - case Map.fetch(opts, key) do - {:ok, value} -> value - :error -> raise ArgumentError, "missing required option #{inspect(key)}" - end - end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex deleted file mode 100644 index 360ccdc7a9..0000000000 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries/state_machine.ex +++ /dev/null @@ -1,4 +0,0 @@ -defprotocol Electric.Shapes.Consumer.Subqueries.StateMachine do - @spec handle_event(t(), term()) :: {list(term()), t()} - def handle_event(state, event) -end diff --git a/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex b/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex index 90e0e43953..c9a44de6b6 100644 --- a/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/indexes/equality_index.ex @@ -76,7 +76,9 @@ defmodule Electric.Shapes.Filter.Indexes.EqualityIndex do record ) do case :ets.lookup(table, {:type, condition_id, field}) do - [] -> MapSet.new() + [] -> + MapSet.new() + [{_, type}] -> affected_shapes_for_type(filter, table, condition_id, field, table_name, record, type) end diff --git a/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex b/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex index 5721dbdc4c..3c43b0023e 100644 --- a/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex @@ -17,7 +17,12 @@ defmodule Electric.Shapes.Filter.Indexes.SubqueryIndex do alias Electric.Shapes.Filter.WhereCondition @spec add_shape(Filter.t(), reference(), term(), map()) :: :ok - def add_shape(%Filter{subquery_cond_table: table} = filter, condition_id, shape_id, optimisation) do + def add_shape( + %Filter{subquery_cond_table: table} = filter, + condition_id, + shape_id, + optimisation + ) do next_condition_id = make_ref() WhereCondition.init(filter, next_condition_id) WhereCondition.add_shape(filter, next_condition_id, shape_id, optimisation.and_where) @@ -28,13 +33,20 @@ defmodule Electric.Shapes.Filter.Indexes.SubqueryIndex do end @spec remove_shape(Filter.t(), reference(), term(), map()) :: :deleted | :ok - def remove_shape(%Filter{subquery_cond_table: table} = filter, condition_id, shape_id, optimisation) do + def remove_shape( + %Filter{subquery_cond_table: table} = filter, + condition_id, + shape_id, + optimisation + ) do case :ets.lookup(table, {condition_id, shape_id}) do [] -> :deleted [{_, next_condition_id}] -> - _ = WhereCondition.remove_shape(filter, next_condition_id, shape_id, optimisation.and_where) + _ = + WhereCondition.remove_shape(filter, next_condition_id, shape_id, optimisation.and_where) + :ets.delete(table, {condition_id, shape_id}) if decrement_count(table, condition_id) == 0 do diff --git a/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex index 873c04ebb7..c76c5c6dfc 100644 --- a/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/subquery_index.ex @@ -232,7 +232,8 @@ defmodule Electric.Shapes.Filter.SubqueryIndex do @spec candidates_for_record(t(), term(), map()) :: MapSet.t() def candidates_for_record(table, root_table, record) do indexed_candidates = - Enum.reduce(registered_positions(table, root_table), MapSet.new(), fn {position_id, meta}, acc -> + Enum.reduce(registered_positions(table, root_table), MapSet.new(), fn {position_id, meta}, + acc -> candidates = case evaluate_position_lhs(meta, record) do {:ok, typed_value} -> diff --git a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex index d1bf500e13..0bb23c2fda 100644 --- a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex +++ b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex @@ -113,7 +113,10 @@ defmodule Electric.Shapes.Filter.WhereCondition do end end - defp optimise_where(%Func{name: "not", args: [%Func{name: "sublink_membership_check"} = subquery]}) do + defp optimise_where(%Func{ + name: "not", + args: [%Func{name: "sublink_membership_check"} = subquery] + }) do if simple_subquery_testexpr?(subquery) do %{operation: "subquery", field: "$subquery", and_where: nil} else @@ -255,7 +258,13 @@ defmodule Electric.Shapes.Filter.WhereCondition do affected_shapes(filter, condition_id, table_name, record, true) end - def affected_shapes(%Filter{where_cond_table: table} = filter, condition_id, table_name, record, include_fallback?) do + def affected_shapes( + %Filter{where_cond_table: table} = filter, + condition_id, + table_name, + record, + include_fallback? + ) do affected = MapSet.union( indexed_shapes_affected(filter, condition_id, table_name, record), diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs new file mode 100644 index 0000000000..4a71b3c223 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs @@ -0,0 +1,766 @@ +defmodule Electric.Shapes.Consumer.EventHandlerTest do + use ExUnit.Case, async: true + + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Effect + alias Electric.Shapes.Consumer.EventHandler + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Steady + alias Electric.Shapes.Consumer.LogOp + alias Electric.Shapes.Consumer.Plan + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Shape + + @inspector Support.StubInspector.new( + tables: ["parent", "child"], + columns: [ + %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, + %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}}, + %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, + %{name: "name", type: "text", pk_position: nil, type_id: {28, 1}} + ] + ) + + describe "Steady handler" do + test "converts transactions against the current subquery view" do + handler = new_handler(subquery_view: MapSet.new([1])) + + assert {:ok, %Steady{}, plan} = + EventHandler.handle_event( + handler, + txn(50, [child_insert("1", "1"), child_insert("2", "2")]) + ) + + assert %Plan{ + log_ops: [ + %LogOp.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}] + } + ] + } = plan + end + + test "negated subquery turns dependency move-in into an outer move-out" do + handler = new_handler(shape: negated_shape()) + dep_handle = dep_handle(handler) + + assert {:ok, %Steady{views: %{["$sublink", "0"] => view}} = _handler, plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert view == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{ + message: %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + } + ] + } = plan + end + + test "negated subquery turns dependency move-out into a buffered outer move-in" do + handler = new_handler(shape: negated_shape(), subquery_view: MapSet.new([1])) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + assert %Plan{effects: [%Effect.StartMoveInQuery{}]} = plan + + assert %Buffering{ + views_before_move: %{["$sublink", "0"] => before_view}, + views_after_move: %{["$sublink", "0"] => after_view} + } = handler + + assert before_view == MapSet.new([1]) + assert after_view == MapSet.new() + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + assert {:ok, %Steady{views: %{["$sublink", "0"] => view}}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert view == MapSet.new() + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} + ] + } = plan + end + + test "splices buffered transactions around the snapshot visibility boundary" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, %Plan{effects: [%Effect.StartMoveInQuery{}]}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_insert("10", "1")])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(150, [child_insert("11", "1")])) + + query_row = child_insert("99", "1") + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [query_row], lsn(10)} + ) + + assert views[["$sublink", "0"]] == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]}, + %LogOp.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}] + } + ] + } = plan + end + + test "splices move-in query rows between emitted pre and post boundary changes" do + handler = new_handler(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_insert("10", "1")])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(150, [child_insert("11", "2")])) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} + ) + + assert views[["$sublink", "0"]] == MapSet.new([1, 2]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "10"}}] + }, + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]}, + %LogOp.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}] + } + ] + } = plan + end + + test "splices updates that become a delete before the boundary and an insert after it" do + handler = new_handler(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_update("10", "1", "2")])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(150, [child_update("11", "3", "2")])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} + ) + + assert views[["$sublink", "0"]] == MapSet.new([1, 2]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendChanges{ + changes: [%Changes.DeletedRecord{old_record: %{"id" => "10"}}] + }, + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]}, + %LogOp.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}] + } + ] + } = plan + end + + test "uses lsn updates to splice at the current buffer tail" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(120, [child_insert("10", "1")])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert views[["$sublink", "0"]] == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} + ] + } = plan + end + + test "waits for an lsn update even when the move-in query completes with an empty buffer" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert views[["$sublink", "0"]] == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} + ] + } = plan + end + + test "uses an lsn update that arrived before the move-in query completed" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert views[["$sublink", "0"]] == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} + ] + } = plan + end + + test "uses an lsn update that was already seen before the move-in started" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, handler, %Plan{}} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert views[["$sublink", "0"]] == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} + ] + } = plan + end + + test "defers queued move outs until after splice and starts the next move in" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + assert {:ok, %Buffering{} = handler, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert %Buffering{ + move_in_values: [{2, "2"}], + views_before_move: views_before, + views_after_move: views_after + } = handler + + assert views_before[["$sublink", "0"]] == MapSet.new() + assert views_after[["$sublink", "0"]] == MapSet.new([2]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]}, + %LogOp.AppendControl{ + message: %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + } + ], + effects: [%Effect.StartMoveInQuery{}] + } = plan + end + + test "applies a queued move out for the active move-in value after splice" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert views[["$sublink", "0"]] == MapSet.new() + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]}, + %LogOp.AppendControl{ + message: %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + } + ] + } = plan + end + + test "batches consecutive move ins into a single active move in" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, + %{move_in: [{1, "1"}, {2, "2"}], move_out: []}} + ) + + assert %Buffering{ + move_in_values: [{1, "1"}, {2, "2"}], + views_before_move: views_before, + views_after_move: views_after + } = handler + + assert views_before[["$sublink", "0"]] == MapSet.new() + assert views_after[["$sublink", "0"]] == MapSet.new([1, 2]) + end + + test "cancels pending inverse ops while buffering" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert views[["$sublink", "0"]] == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} + ] + } = plan + end + + test "merges queued move outs into a single control message after splice" do + handler = new_handler(subquery_view: MapSet.new([2])) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert views[["$sublink", "0"]] == MapSet.new() + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]}, + %LogOp.AppendControl{ + message: %{headers: %{event: "move-out", patterns: patterns}} + } + ] + } = plan + + assert length(patterns) == 2 + end + + test "returns {:stop, :buffer_overflow} when buffered transactions exceed the limit" do + handler = new_handler(buffer_max_transactions: 3) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_insert("1", "1")])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(51, [child_insert("2", "1")])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(52, [child_insert("3", "1")])) + + assert {:stop, :buffer_overflow} = + EventHandler.handle_event(handler, txn(53, [child_insert("4", "1")])) + end + + test "raises on dependency handle mismatch" do + assert_raise ArgumentError, ~r/unexpected dependency handle/, fn -> + new_handler() + |> EventHandler.handle_event( + {:materializer_changes, "wrong", %{move_in: [], move_out: []}} + ) + end + end + + test "raises on query callbacks while steady" do + handler = new_handler() + + assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + end + + assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> + EventHandler.handle_event(handler, {:query_move_in_complete, [], lsn(1)}) + end + end + end + + describe "Default handler" do + test "returns plan with ack for empty transaction" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + txn = %Transaction{ + xid: 1, + changes: [], + num_changes: 0, + lsn: lsn(1), + last_log_offset: Electric.Replication.LogOffset.new(lsn(1), 0) + } + + assert {:ok, %EventHandler.Default{}, %Plan{ack_source_offset: offset}} = + EventHandler.handle_event(handler, txn) + + assert offset != nil + end + + test "ignores global_last_seen_lsn" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + assert {:ok, %EventHandler.Default{}, %Plan{log_ops: [], effects: []}} = + EventHandler.handle_event(handler, {:global_last_seen_lsn, 42}) + end + end + + describe "UnsupportedSubquery handler" do + test "stops on materializer changes" do + handler = %EventHandler.UnsupportedSubquery{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + assert {:stop, :unsupported_subquery} = + EventHandler.handle_event( + handler, + {:materializer_changes, "dep", %{move_in: [{1, "1"}], move_out: []}} + ) + end + end + + describe "routing_views/1" do + test "returns empty map for Default handler" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "s", + shape_handle: "h" + } + + assert %{} == EventHandler.routing_views(handler) + end + + test "returns exact views for Steady handler" do + handler = new_handler(subquery_view: MapSet.new([1, 2])) + assert %{["$sublink", "0"] => view} = EventHandler.routing_views(handler) + assert view == MapSet.new([1, 2]) + end + + test "returns conservative projection for Buffering handler" do + handler = new_handler(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(handler) + + {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + views = EventHandler.routing_views(handler) + # For positive deps, buffering uses union of before and after + assert views[["$sublink", "0"]] == MapSet.new([1, 2]) + end + end + + # -- Helpers -- + + defp new_handler(opts \\ []) do + shape = Keyword.get(opts, :shape, shape()) + {:ok, dnf_plan} = DnfPlan.compile(shape) + dep_handle = hd(shape.shape_dependencies_handles) + + %Steady{ + shape: shape, + stack_id: "stack-id", + shape_handle: "shape-handle", + dnf_plan: dnf_plan, + views: %{["$sublink", "0"] => Keyword.get(opts, :subquery_view, MapSet.new())}, + dependency_handle_to_ref: %{dep_handle => {0, ["$sublink", "0"]}}, + buffer_max_transactions: Keyword.get(opts, :buffer_max_transactions, 1000) + } + end + + defp dep_handle(handler) do + handler.dependency_handle_to_ref |> Map.keys() |> hd() + end + + defp shape do + Shape.new!("child", + where: "parent_id IN (SELECT id FROM public.parent WHERE value = 'keep')", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) + |> fill_handles() + end + + defp negated_shape do + Shape.new!("child", + where: "parent_id NOT IN (SELECT id FROM public.parent WHERE value = 'keep')", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) + |> fill_handles() + end + + defp simple_shape do + Shape.new!("child", inspector: @inspector) + end + + defp fill_handles(shape) do + filled_deps = Enum.map(shape.shape_dependencies, &fill_handles/1) + handles = Enum.map(filled_deps, &Shape.generate_id/1) + %{shape | shape_dependencies: filled_deps, shape_dependencies_handles: handles} + end + + defp txn(xid, changes) do + %Transaction{xid: xid, changes: changes, num_changes: length(changes), lsn: lsn(xid)} + end + + defp lsn(value), do: Lsn.from_integer(value) + defp global_last_seen_lsn(value), do: {:global_last_seen_lsn, value} + + defp child_insert(id, parent_id) do + %Changes.NewRecord{ + relation: {"public", "child"}, + record: %{"id" => id, "parent_id" => parent_id, "name" => "child-#{id}"} + } + |> Changes.fill_key(["id"]) + end + + defp child_update(id, old_parent_id, new_parent_id) do + Changes.UpdatedRecord.new( + relation: {"public", "child"}, + old_record: %{"id" => id, "parent_id" => old_parent_id, "name" => "child-#{id}-old"}, + record: %{"id" => id, "parent_id" => new_parent_id, "name" => "child-#{id}-new"} + ) + |> Changes.fill_key(["id"]) + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs deleted file mode 100644 index 5442495e5e..0000000000 --- a/packages/sync-service/test/electric/shapes/consumer/subqueries_test.exs +++ /dev/null @@ -1,829 +0,0 @@ -defmodule Electric.Shapes.Consumer.SubqueriesTest do - use ExUnit.Case, async: true - - alias Electric.Postgres.Lsn - alias Electric.Replication.Changes - alias Electric.Replication.Changes.Transaction - alias Electric.Shapes.Consumer.Subqueries - alias Electric.Shapes.Consumer.Subqueries.Buffering - alias Electric.Shapes.Consumer.Subqueries.Steady - alias Electric.Shapes.DnfPlan - alias Electric.Shapes.Shape - - @inspector Support.StubInspector.new( - tables: ["parent", "child"], - columns: [ - %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, - %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}}, - %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, - %{name: "name", type: "text", pk_position: nil, type_id: {28, 1}} - ] - ) - - test "converts steady transactions against the current subquery view" do - state = new_state(subquery_view: MapSet.new([1])) - - {actions, state} = - Subqueries.handle_event( - state, - txn(50, [child_insert("1", "1"), child_insert("2", "2")]) - ) - - assert %Steady{} = state - - assert [store: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}], flush: nil] = - actions - end - - test "negated subquery turns dependency move-in into an outer move-out" do - state = new_negated_state() - dep_handle = dep_handle(state) - - {actions, state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - assert %Steady{views: %{["$sublink", "0"] => view}} = state - assert view == MapSet.new([1]) - - assert [ - control: %{ - headers: %{ - event: "move-out", - patterns: [%{pos: 0, value: _value}] - } - } - ] = actions - end - - test "negated subquery turns dependency move-out into a buffered outer move-in" do - state = new_negated_state(subquery_view: MapSet.new([1])) - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} - ) - - assert %Buffering{ - views_before_move: %{["$sublink", "0"] => before_view}, - views_after_move: %{["$sublink", "0"] => after_view} - } = state - - assert before_view == MapSet.new([1]) - assert after_view == MapSet.new() - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} - ) - - {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) - - assert %Steady{views: %{["$sublink", "0"] => view}} = state - assert view == MapSet.new() - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}} - ] = actions - end - - test "splices buffered transactions around the snapshot visibility boundary" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - assert %Buffering{} = state - - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) - - query_row = child_insert("99", "1") - - {actions, state} = - Subqueries.handle_event(state, {:query_move_in_complete, [query_row], lsn(10)}) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}]}, - {:flush, nil} - ] = actions - end - - test "splices move-in query rows between emitted pre and post boundary changes" do - state = new_state(subquery_view: MapSet.new([1])) - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} - ) - - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "2")])) - - {actions, state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} - ) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1, 2]) - - assert [ - {:store, [%Changes.NewRecord{record: %{"id" => "10"}}]}, - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}]}, - {:flush, nil} - ] = actions - end - - test "splices updates that become a delete before the boundary and an insert after it" do - state = new_state(subquery_view: MapSet.new([1])) - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} - ) - - # Before the splice we still evaluate against the old view {1}, so moving - # from parent 1 to parent 2 means the row leaves the shape and becomes a delete. - {[], state} = Subqueries.handle_event(state, txn(50, [child_update("10", "1", "2")])) - - # After the splice we evaluate against the new view {1, 2}, so moving from - # parent 3 to parent 2 means the row enters the shape and becomes a new record. - {[], state} = Subqueries.handle_event(state, txn(150, [child_update("11", "3", "2")])) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - - {actions, state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "2")], lsn(10)} - ) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1, 2]) - - assert [ - {:store, [%Changes.DeletedRecord{old_record: %{"id" => "10"}}]}, - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}]}, - {:flush, nil} - ] = actions - end - - test "uses lsn updates to splice at the current buffer tail" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = Subqueries.handle_event(state, txn(120, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:flush, nil} - ] = actions - end - - test "splices buffered inserts, updates, and deletes around an lsn boundary" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = - Subqueries.handle_event( - state, - txn(120, [ - child_insert("10", "1"), - child_update("20", "1"), - child_delete("30", "1") - ]) - ) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - - {[], state} = - Subqueries.handle_event( - state, - txn(150, [ - child_insert("11", "1"), - child_update("21", "1"), - child_delete("31", "1") - ]) - ) - - {actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:store, - [ - %Changes.NewRecord{record: %{"id" => "11"}}, - %Changes.UpdatedRecord{record: %{"id" => "21"}}, - %Changes.DeletedRecord{old_record: %{"id" => "31"}, last?: true} - ]}, - {:flush, nil} - ] = actions - end - - test "keeps the transaction splice boundary when a later lsn update arrives" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) - {[], state} = Subqueries.handle_event(state, txn(160, [child_insert("12", "1")])) - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - - {actions, state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:store, - [ - %Changes.NewRecord{record: %{"id" => "11"}}, - %Changes.NewRecord{record: %{"id" => "12"}, last?: true} - ]}, - {:flush, nil} - ] = actions - end - - test "keeps the lsn splice boundary when the snapshot later reveals invisible txns" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("10", "1")])) - {[], state} = Subqueries.handle_event(state, txn(150, [child_insert("11", "1")])) - - {actions, state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 150, []}}) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:store, - [ - %Changes.NewRecord{record: %{"id" => "10"}}, - %Changes.NewRecord{record: %{"id" => "11"}, last?: true} - ]}, - {:flush, nil} - ] = actions - end - - test "waits for an lsn update even when the move-in query completes with an empty buffer" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - assert %Buffering{} = state - - {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}} - ] = actions - end - - test "uses an lsn update that arrived before the move-in query completed" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - - {actions, state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}} - ] = actions - end - - test "uses an lsn update that was already seen before the move-in started" do - state = new_state() - dep_handle = dep_handle(state) - - {[], state} = Subqueries.handle_event(state, global_last_seen_lsn(20)) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 300, []}}) - - {actions, state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}} - ] = actions - end - - test "defers queued move outs until after splice and starts the next move in" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} - ) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} - ) - - {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) - - assert %Buffering{ - move_in_values: [{2, "2"}], - views_before_move: views_before, - views_after_move: views_after - } = state - - assert views_before[["$sublink", "0"]] == MapSet.new() - assert views_after[["$sublink", "0"]] == MapSet.new([2]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:control, %{headers: %{event: "move-out", patterns: [%{pos: 0}]}}}, - :start_move_in_query - ] = actions - end - - test "applies a queued move out for the active move-in value after splice" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} - ) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} - ) - - {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new() - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:control, %{headers: %{event: "move-out", patterns: [%{pos: 0}]}}} - ] = actions - end - - test "batches consecutive move ins into a single active move in" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}, {2, "2"}], move_out: []}} - ) - - assert %Buffering{ - move_in_values: [{1, "1"}, {2, "2"}], - views_before_move: views_before, - views_after_move: views_after - } = state - - assert views_before[["$sublink", "0"]] == MapSet.new() - assert views_after[["$sublink", "0"]] == MapSet.new([1, 2]) - end - - test "cancels pending inverse ops while buffering" do - state = new_state() - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} - ) - - {[], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} - ) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} - ) - - {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new([1]) - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}} - ] = actions - end - - test "merges queued move outs into a single control message after splice" do - state = new_state(subquery_view: MapSet.new([2])) - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - {[], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} - ) - - {[], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} - ) - - {[], state} = Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - - {[], state} = - Subqueries.handle_event( - state, - {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} - ) - - {actions, state} = Subqueries.handle_event(state, global_last_seen_lsn(10)) - - assert %Steady{views: views} = state - view = views[["$sublink", "0"]] - assert view == MapSet.new() - - assert [ - {:control, %{headers: %{event: "move-in"}}}, - {:store, [%Changes.NewRecord{record: %{"id" => "99"}}]}, - {:control, %{headers: %{control: "snapshot-end"}}}, - {:control, %{headers: %{event: "move-out", patterns: patterns}}} - ] = actions - - assert length(patterns) == 2 - end - - test "emits :shutdown when buffered transactions exceed the limit" do - state = new_state(buffer_max_transactions: 3) - dep_handle = dep_handle(state) - - {[:start_move_in_query], state} = - Subqueries.handle_event( - state, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - assert %Buffering{} = state - - {[], state} = Subqueries.handle_event(state, txn(50, [child_insert("1", "1")])) - {[], state} = Subqueries.handle_event(state, txn(51, [child_insert("2", "1")])) - {[], state} = Subqueries.handle_event(state, txn(52, [child_insert("3", "1")])) - - {actions, _state} = Subqueries.handle_event(state, txn(53, [child_insert("4", "1")])) - - assert [:shutdown] = actions - end - - test "raises on dependency handle mismatch" do - assert_raise ArgumentError, ~r/unexpected dependency handle/, fn -> - new_state() - |> Subqueries.handle_event({:materializer_changes, "wrong", %{move_in: [], move_out: []}}) - end - end - - test "raises on query callbacks while steady" do - state = new_state() - - assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> - Subqueries.handle_event(state, {:pg_snapshot_known, {100, 200, []}}) - end - - assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> - Subqueries.handle_event(state, {:query_move_in_complete, [], lsn(1)}) - end - end - - test "builds a move-in where clause that excludes the current view" do - shape = shape() - {:ok, dnf_plan} = DnfPlan.compile(shape) - - assert {where, _params} = - DnfPlan.move_in_where_clause( - dnf_plan, - 0, - Enum.map([{1, "1"}, {2, "2"}], &elem(&1, 0)), - %{["$sublink", "0"] => MapSet.new([3])}, - shape.where.used_refs - ) - - assert is_binary(where) - end - - test "builds move-out control messages with the current hashing scheme" do - state = new_state() - - assert %{ - headers: %{ - event: "move-out", - patterns: [%{pos: 0, value: value}] - } - } = Subqueries.make_move_out_control_message(state, 0, [{1, "1"}]) - - assert value == - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:1") - |> Base.encode16(case: :lower) - end - - test "extracts tag structure for the direct subquery predicate" do - shape = shape() - - assert {[["parent_id"]], %{["$sublink", "0"] => _comparison_expr}} = - Subqueries.move_in_tag_structure(shape) - end - - defp new_state(opts \\ []) do - shape = Keyword.get(opts, :shape, shape()) - {:ok, dnf_plan} = DnfPlan.compile(shape) - dep_handle = hd(shape.shape_dependencies_handles) - - new_opts = [ - shape: shape, - stack_id: "stack-id", - shape_handle: "shape-handle", - dnf_plan: dnf_plan, - views: %{["$sublink", "0"] => Keyword.get(opts, :subquery_view, MapSet.new())}, - dependency_handle_to_ref: %{dep_handle => {0, ["$sublink", "0"]}} - ] - - new_opts = - case Keyword.fetch(opts, :buffer_max_transactions) do - {:ok, max} -> Keyword.put(new_opts, :buffer_max_transactions, max) - :error -> new_opts - end - - Subqueries.new(new_opts) - end - - defp new_negated_state(opts \\ []) do - new_state(Keyword.put(opts, :shape, negated_shape())) - end - - defp dep_handle(state) do - state.dependency_handle_to_ref |> Map.keys() |> hd() - end - - defp shape do - Shape.new!("child", - where: "parent_id IN (SELECT id FROM public.parent WHERE value = 'keep')", - inspector: @inspector, - feature_flags: ["allow_subqueries"] - ) - |> fill_handles() - end - - defp negated_shape do - Shape.new!("child", - where: "parent_id NOT IN (SELECT id FROM public.parent WHERE value = 'keep')", - inspector: @inspector, - feature_flags: ["allow_subqueries"] - ) - |> fill_handles() - end - - defp fill_handles(shape) do - filled_deps = Enum.map(shape.shape_dependencies, &fill_handles/1) - handles = Enum.map(filled_deps, &Shape.generate_id/1) - %{shape | shape_dependencies: filled_deps, shape_dependencies_handles: handles} - end - - defp txn(xid, changes) do - %Transaction{xid: xid, changes: changes, num_changes: length(changes), lsn: lsn(xid)} - end - - defp lsn(value), do: Lsn.from_integer(value) - defp global_last_seen_lsn(value), do: {:global_last_seen_lsn, value} - - defp child_insert(id, parent_id) do - %Changes.NewRecord{ - relation: {"public", "child"}, - record: %{"id" => id, "parent_id" => parent_id, "name" => "child-#{id}"} - } - |> Changes.fill_key(["id"]) - end - - defp child_update(id, parent_id) do - child_update(id, parent_id, parent_id) - end - - defp child_update(id, old_parent_id, new_parent_id) do - Changes.UpdatedRecord.new( - relation: {"public", "child"}, - old_record: %{"id" => id, "parent_id" => old_parent_id, "name" => "child-#{id}-old"}, - record: %{"id" => id, "parent_id" => new_parent_id, "name" => "child-#{id}-new"} - ) - |> Changes.fill_key(["id"]) - end - - defp child_delete(id, parent_id) do - %Changes.DeletedRecord{ - relation: {"public", "child"}, - old_record: %{"id" => id, "parent_id" => parent_id, "name" => "child-#{id}"} - } - |> Changes.fill_key(["id"]) - end -end diff --git a/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs index e51c571d15..0082c92739 100644 --- a/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs +++ b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs @@ -55,8 +55,17 @@ defmodule Electric.Shapes.Filter.SubqueryIndexTest do testexpr_par = %Ref{path: ["par_id"], type: :int8} sublink_ref = %Ref{path: @subquery_ref, type: {:array, :int8}} - ast_id = %Func{name: "sublink_membership_check", args: [testexpr_id, sublink_ref], type: :bool} - ast_par = %Func{name: "sublink_membership_check", args: [testexpr_par, sublink_ref], type: :bool} + ast_id = %Func{ + name: "sublink_membership_check", + args: [testexpr_id, sublink_ref], + type: :bool + } + + ast_par = %Func{ + name: "sublink_membership_check", + args: [testexpr_par, sublink_ref], + type: :bool + } %Electric.Shapes.DnfPlan{ disjuncts: [], @@ -218,7 +227,15 @@ defmodule Electric.Shapes.Filter.SubqueryIndexTest do test "removes candidate rows, membership, position metadata, and fallback", %{table: table} do plan = make_plan() SubqueryIndex.register_shape(table, "s1", @root_table, plan) - SubqueryIndex.seed_membership(table, "s1", @root_table, @subquery_ref, 0, MapSet.new([1, 2])) + + SubqueryIndex.seed_membership( + table, + "s1", + @root_table, + @subquery_ref, + 0, + MapSet.new([1, 2]) + ) # Verify data exists before unregister assert SubqueryIndex.has_positions?(table, "s1") diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index f1cc5fcc68..2e4367b1a7 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -1145,7 +1145,15 @@ defmodule Electric.Shapes.FilterTest do root_table = shape.root_table # Seed membership with value 1 (parent id 1 matches the subquery "WHERE value = 'keep'") - Filter.SubqueryIndex.seed_membership(index, "shape1", root_table, subquery_ref, 0, MapSet.new([1])) + Filter.SubqueryIndex.seed_membership( + index, + "shape1", + root_table, + subquery_ref, + 0, + MapSet.new([1]) + ) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) # parent_id=1 is in the subquery view, so NOT IN means this should NOT route @@ -1196,7 +1204,15 @@ defmodule Electric.Shapes.FilterTest do root_table = shape.root_table # Seed the membership view with values {1, 2} - Filter.SubqueryIndex.seed_membership(index, "shape1", root_table, subquery_ref, 0, MapSet.new([1, 2])) + Filter.SubqueryIndex.seed_membership( + index, + "shape1", + root_table, + subquery_ref, + 0, + MapSet.new([1, 2]) + ) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) # Only id matches (id=1, par_id=99) -> should route @@ -1295,7 +1311,15 @@ defmodule Electric.Shapes.FilterTest do subquery_ref = ["$sublink", "0"] root_table = shape.root_table - Filter.SubqueryIndex.seed_membership(index, "shape1", root_table, subquery_ref, 0, MapSet.new([1, 2])) + Filter.SubqueryIndex.seed_membership( + index, + "shape1", + root_table, + subquery_ref, + 0, + MapSet.new([1, 2]) + ) + Filter.SubqueryIndex.mark_ready(index, "shape1", root_table) # Both conditions match -> route @@ -1357,7 +1381,16 @@ defmodule Electric.Shapes.FilterTest do # Manually force fallback_s to stay on fallback (simulating unsupported) # by not calling mark_ready. indexed_s gets seeded and marked ready. subquery_ref = ["$sublink", "0"] - Filter.SubqueryIndex.seed_membership(index, "indexed_s", root_table, subquery_ref, 0, MapSet.new([1])) + + Filter.SubqueryIndex.seed_membership( + index, + "indexed_s", + root_table, + subquery_ref, + 0, + MapSet.new([1]) + ) + Filter.SubqueryIndex.mark_ready(index, "indexed_s", root_table) assert Filter.SubqueryIndex.fallback?(index, "fallback_s", root_table) diff --git a/packages/sync-service/test/integration/subquery_dependency_update_test.exs b/packages/sync-service/test/integration/subquery_dependency_update_test.exs index a7c5468199..b713ee44ae 100644 --- a/packages/sync-service/test/integration/subquery_dependency_update_test.exs +++ b/packages/sync-service/test/integration/subquery_dependency_update_test.exs @@ -310,7 +310,10 @@ defmodule Electric.Integration.SubqueryDependencyUpdateTest do item3_inserts = Enum.filter(change_messages, fn msg -> - match?(%ChangeMessage{headers: %{operation: :insert}, value: %{"id" => "item-3"}}, msg) + match?( + %ChangeMessage{headers: %{operation: :insert}, value: %{"id" => "item-3"}}, + msg + ) end) assert length(item3_inserts) == 1, From e4b6ba9c406cd928906af17ca35d7c24336a1cbb Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 25 Mar 2026 09:40:41 +0000 Subject: [PATCH 54/63] Introduce ResultStream --- .../shapes/consumer/event_handler/default.ex | 34 ++-- .../lib/electric/utils/result_stream.ex | 51 ++++++ .../electric/utils/result_stream_test.exs | 158 ++++++++++++++++++ 3 files changed, 225 insertions(+), 18 deletions(-) create mode 100644 packages/sync-service/lib/electric/utils/result_stream.ex create mode 100644 packages/sync-service/test/electric/utils/result_stream_test.exs diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex index d95cfb00e5..8cd6783ce0 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex @@ -9,6 +9,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Default do alias Electric.Shapes.Consumer.Materializer alias Electric.Shapes.Consumer.Plan alias Electric.Shapes.Shape + alias Electric.Utils.ResultStream require Shape @@ -34,31 +35,28 @@ defmodule Electric.Shapes.Consumer.EventHandler.Default do end result = - Enum.reduce_while(changes, [], fn change, acc -> - case change do - %Changes.TruncatedRelation{} -> - {:halt, {:truncate, xid}} - - _ -> - converted = - Shape.convert_change(state.shape, change, - stack_id: state.stack_id, - shape_handle: state.shape_handle, - extra_refs: extra_refs - ) - - {:cont, acc ++ converted} - end + changes + |> Stream.map(fn + %Changes.TruncatedRelation{} -> {:error, :truncate} + change -> {:ok, change} end) + |> ResultStream.flat_map( + &Shape.convert_change(state.shape, &1, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + extra_refs: extra_refs + ) + ) + |> ResultStream.to_list() case result do - {:truncate, _xid} -> + {:error, :truncate} -> {:stop, {:truncate, xid}} - [] -> + {:ok, []} -> {:ok, state, %Plan{ack_source_offset: last_log_offset}} - changes -> + {:ok, changes} -> plan = %Plan{ log_ops: [%LogOp.AppendChanges{changes: mark_last_change(changes), default_xid: xid}], ack_source_offset: last_log_offset diff --git a/packages/sync-service/lib/electric/utils/result_stream.ex b/packages/sync-service/lib/electric/utils/result_stream.ex new file mode 100644 index 0000000000..cb3996f37c --- /dev/null +++ b/packages/sync-service/lib/electric/utils/result_stream.ex @@ -0,0 +1,51 @@ +defmodule Electric.Utils.ResultStream do + @moduledoc """ + Composable operations on streams of `{:ok, value} | {:error, reason}` tuples. + + Short-circuits on the first error encountered, avoiding unnecessary work. + """ + + @type result(ok, err) :: {:ok, ok} | {:error, err} + + @doc """ + Lazily flat-maps over a stream of result tuples. + + For `{:ok, value}` items, applies `fun` to the unwrapped value. `fun` must + return a list of unwrapped values which are emitted individually. + + `{:error, reason}` items pass through unchanged and halt further processing. + """ + @spec flat_map(Enumerable.t(), (term() -> [term()])) :: Enumerable.t() + def flat_map(enumerable, fun) when is_function(fun, 1) do + Stream.transform(enumerable, :cont, fn + _item, :halt -> + {:halt, :halt} + + {:ok, value}, :cont -> + results = fun.(value) + {Enum.map(results, &{:ok, &1}), :cont} + + {:error, _reason} = error, :cont -> + {[error], :halt} + end) + end + + @doc """ + Consumes a stream of result tuples into a single result. + + Returns `{:ok, list}` if all items are `{:ok, value}`, collecting the + unwrapped values. Returns `{:error, reason}` on the first error encountered. + """ + @spec to_list(Enumerable.t()) :: {:ok, [term()]} | {:error, term()} + def to_list(enumerable) do + enumerable + |> Enum.reduce_while([], fn + {:ok, value}, acc -> {:cont, [value | acc]} + {:error, reason}, _acc -> {:halt, {:error, reason}} + end) + |> case do + {:error, _reason} = error -> error + acc -> {:ok, Enum.reverse(acc)} + end + end +end diff --git a/packages/sync-service/test/electric/utils/result_stream_test.exs b/packages/sync-service/test/electric/utils/result_stream_test.exs new file mode 100644 index 0000000000..215ceac91f --- /dev/null +++ b/packages/sync-service/test/electric/utils/result_stream_test.exs @@ -0,0 +1,158 @@ +defmodule Electric.Utils.ResultStreamTest do + use ExUnit.Case, async: true + + alias Electric.Utils.ResultStream + + describe "to_list/1" do + test "returns {:ok, []} for empty enumerable" do + assert {:ok, []} = ResultStream.to_list([]) + end + + test "collects all ok values" do + assert {:ok, [1, 2, 3]} = ResultStream.to_list([{:ok, 1}, {:ok, 2}, {:ok, 3}]) + end + + test "returns single ok value" do + assert {:ok, [:a]} = ResultStream.to_list([{:ok, :a}]) + end + + test "returns error at start" do + assert {:error, :boom} = ResultStream.to_list([{:error, :boom}, {:ok, 1}, {:ok, 2}]) + end + + test "returns error in middle" do + assert {:error, :boom} = ResultStream.to_list([{:ok, 1}, {:error, :boom}, {:ok, 3}]) + end + + test "returns first error when multiple exist" do + assert {:error, :first} = + ResultStream.to_list([{:ok, 1}, {:error, :first}, {:error, :second}]) + end + + test "returns single error" do + assert {:error, :only} = ResultStream.to_list([{:error, :only}]) + end + + test "works with streams" do + stream = Stream.map(1..3, &{:ok, &1}) + assert {:ok, [1, 2, 3]} = ResultStream.to_list(stream) + end + + test "short-circuits on error (does not consume rest of stream)" do + test_pid = self() + + stream = + Stream.map([{:ok, 1}, {:error, :stop}, {:ok, 3}], fn item -> + send(test_pid, {:visited, item}) + item + end) + + assert {:error, :stop} = ResultStream.to_list(stream) + + assert_received {:visited, {:ok, 1}} + assert_received {:visited, {:error, :stop}} + refute_received {:visited, {:ok, 3}} + end + end + + describe "flat_map/2" do + test "maps and flattens ok values" do + result = + [{:ok, 1}, {:ok, 2}, {:ok, 3}] + |> ResultStream.flat_map(&[&1, &1 * 10]) + |> ResultStream.to_list() + + assert {:ok, [1, 10, 2, 20, 3, 30]} = result + end + + test "handles fun returning empty list" do + result = + [{:ok, 1}, {:ok, 2}, {:ok, 3}] + |> ResultStream.flat_map(fn + 2 -> [] + x -> [x] + end) + |> ResultStream.to_list() + + assert {:ok, [1, 3]} = result + end + + test "passes through error and halts" do + result = + [{:ok, 1}, {:error, :boom}, {:ok, 3}] + |> ResultStream.flat_map(&[&1]) + |> ResultStream.to_list() + + assert {:error, :boom} = result + end + + test "does not call fun after error" do + test_pid = self() + + [{:ok, 1}, {:error, :stop}, {:ok, 3}] + |> ResultStream.flat_map(fn value -> + send(test_pid, {:mapped, value}) + [value] + end) + |> ResultStream.to_list() + + assert_received {:mapped, 1} + refute_received {:mapped, 3} + end + + test "works with empty input" do + assert {:ok, []} = + [] + |> ResultStream.flat_map(&[&1]) + |> ResultStream.to_list() + end + + test "error at start skips all mapping" do + test_pid = self() + + [{:error, :immediate}] + |> ResultStream.flat_map(fn value -> + send(test_pid, {:mapped, value}) + [value] + end) + |> ResultStream.to_list() + + refute_received {:mapped, _} + end + end + + describe "composition" do + test "flat_map pipelines compose" do + result = + [{:ok, 1}, {:ok, 2}] + |> ResultStream.flat_map(&[&1 * 2]) + |> ResultStream.flat_map(&[&1, &1 + 1]) + |> ResultStream.to_list() + + assert {:ok, [2, 3, 4, 5]} = result + end + + test "error propagates through chained flat_maps" do + result = + [{:ok, 1}, {:error, :mid}, {:ok, 3}] + |> ResultStream.flat_map(&[&1]) + |> ResultStream.flat_map(&[&1 * 10]) + |> ResultStream.to_list() + + assert {:error, :mid} = result + end + + test "works with Stream.map feeding into flat_map" do + result = + [1, 2, 3] + |> Stream.map(fn + 2 -> {:error, :bad} + n -> {:ok, n} + end) + |> ResultStream.flat_map(&[&1 * 10]) + |> ResultStream.to_list() + + assert {:error, :bad} = result + end + end +end From a85aba2dd0408be0ed8d7522c551f6b10115fe18 Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 25 Mar 2026 13:54:28 +0000 Subject: [PATCH 55/63] Support UUID --- .../lib/electric/replication/eval.ex | 16 ++++++++++++++++ .../lib/electric/shapes/dnf_plan.ex | 19 ++++++++++++++++--- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/packages/sync-service/lib/electric/replication/eval.ex b/packages/sync-service/lib/electric/replication/eval.ex index 989735f9d7..cee5da0ae4 100644 --- a/packages/sync-service/lib/electric/replication/eval.ex +++ b/packages/sync-service/lib/electric/replication/eval.ex @@ -66,4 +66,20 @@ defmodule Electric.Replication.Eval do def type_to_pg_cast({:row, _}, _), do: raise("Unsupported type: row") def type_to_pg_cast({:internal, _}, _), do: raise("Unsupported type: internal") def type_to_pg_cast(type, _) when is_atom(type), do: to_string(type) + + @doc """ + Convert a value from the eval representation to the format Postgrex expects + for binary protocol encoding. + + Most types (integers, floats, booleans, dates, times, etc.) use native Elixir + types that Postgrex handles directly. UUID is a notable exception: the eval + system stores UUIDs as human-readable strings, but Postgrex expects 16-byte + raw binaries. + """ + def value_to_postgrex(value, :uuid) when is_binary(value) do + {:ok, bin} = Ecto.UUID.dump(value) + bin + end + + def value_to_postgrex(value, _type), do: value end diff --git a/packages/sync-service/lib/electric/shapes/dnf_plan.ex b/packages/sync-service/lib/electric/shapes/dnf_plan.ex index cdfc5636c8..c866a31bc9 100644 --- a/packages/sync-service/lib/electric/shapes/dnf_plan.ex +++ b/packages/sync-service/lib/electric/shapes/dnf_plan.ex @@ -443,8 +443,17 @@ defmodule Electric.Shapes.DnfPlan do params = case values do - [] -> Enum.map(casts, fn _ -> [] end) - _ -> values |> Electric.Utils.unzip_any() |> Tuple.to_list() + [] -> + Enum.map(casts, fn _ -> [] end) + + _ -> + values + |> Electric.Utils.unzip_any() + |> Tuple.to_list() + |> Enum.zip(col_types) + |> Enum.map(fn {col_vals, col_type} -> + Enum.map(col_vals, &value_to_postgrex(&1, col_type)) + end) end sql = @@ -458,10 +467,14 @@ defmodule Electric.Shapes.DnfPlan do {:array, element_type} -> type_cast = Electric.Replication.Eval.type_to_pg_cast(element_type) sql = "#{lhs_sql} = ANY ($#{pidx}::#{type_cast}[])" - {sql, [values], pidx + 1} + {sql, [Enum.map(values, &value_to_postgrex(&1, element_type))], pidx + 1} end end + defp value_to_postgrex(value, type) do + Electric.Replication.Eval.value_to_postgrex(value, type) + end + defp lhs_sql_from_ast(%Func{name: "sublink_membership_check", args: [testexpr, _]}) do SqlGenerator.to_sql(testexpr) end From 34e5bc57a6adbb2545477fe625c491135819e3be Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 25 Mar 2026 14:26:00 +0000 Subject: [PATCH 56/63] Refactor Default --- .../shapes/consumer/event_handler/default.ex | 50 ++++++------------- 1 file changed, 14 insertions(+), 36 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex index 8cd6783ce0..9d2e760fb4 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex @@ -6,7 +6,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Default do alias Electric.Replication.Changes alias Electric.Replication.Changes.Transaction alias Electric.Shapes.Consumer.LogOp - alias Electric.Shapes.Consumer.Materializer alias Electric.Shapes.Consumer.Plan alias Electric.Shapes.Shape alias Electric.Utils.ResultStream @@ -28,28 +27,16 @@ defmodule Electric.Shapes.Consumer.EventHandler.Default do last_log_offset: last_log_offset, changes: changes }) do - extra_refs = - if Shape.are_deps_filled(state.shape) do - refs = Materializer.get_all_as_refs(state.shape, state.stack_id) - {refs, refs} - end - - result = - changes - |> Stream.map(fn - %Changes.TruncatedRelation{} -> {:error, :truncate} - change -> {:ok, change} - end) - |> ResultStream.flat_map( - &Shape.convert_change(state.shape, &1, - stack_id: state.stack_id, - shape_handle: state.shape_handle, - extra_refs: extra_refs - ) + changes + |> Stream.map(&error_on_truncate/1) + |> ResultStream.flat_map( + &Shape.convert_change(state.shape, &1, + stack_id: state.stack_id, + shape_handle: state.shape_handle ) - |> ResultStream.to_list() - - case result do + ) + |> ResultStream.to_list() + |> case do {:error, :truncate} -> {:stop, {:truncate, xid}} @@ -70,25 +57,16 @@ defmodule Electric.Shapes.Consumer.EventHandler.Default do {:ok, state, %Plan{}} end - def handle_event(_state, {:pg_snapshot_known, _snapshot}) do - raise ArgumentError, "received {:pg_snapshot_known, snapshot} in Default handler" - end - - def handle_event(_state, {:query_move_in_complete, _rows, _move_in_lsn}) do - raise ArgumentError, "received {:query_move_in_complete, ...} in Default handler" - end - - def handle_event(_state, {:materializer_changes, _dep_handle, _payload}) do - raise ArgumentError, "received {:materializer_changes, ...} in Default handler" - end - @impl true def routing_views(_state), do: %{} defp mark_last_change([]), do: [] defp mark_last_change(changes) do - {last, rest} = List.pop_at(changes, -1) - rest ++ [%{last | last?: true}] + [last | rest] = Enum.reverse(changes) + Enum.reverse([%{last | last?: true} | rest]) end + + defp error_on_truncate(%Changes.TruncatedRelation{}), do: {:error, :truncate} + defp error_on_truncate(change), do: {:ok, change} end From 80d6b046e18ffe648d2148f2094b181dc0873e1d Mon Sep 17 00:00:00 2001 From: rob Date: Wed, 25 Mar 2026 15:14:13 +0000 Subject: [PATCH 57/63] Fix nil event handler --- packages/sync-service/lib/electric/shapes/consumer.ex | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 57119dbbf0..c79f1ae960 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -342,6 +342,11 @@ defmodule Electric.Shapes.Consumer do {:noreply, state, state.hibernate_after} end + def handle_info({:global_last_seen_lsn, _lsn}, %{event_handler: nil} = state) do + # Consumer not yet initialized, ignore + {:noreply, state} + end + def handle_info({:global_last_seen_lsn, _lsn} = event, state) do case handle_event(event, state) do %{terminating?: true} = state -> From 847f0a05fa54193346c1b57834bbba2c7df0f2a8 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 26 Mar 2026 09:18:36 +0000 Subject: [PATCH 58/63] Support truncate --- .../event_handler/subqueries/buffering.ex | 58 ++++++++++-------- .../event_handler/subqueries/steady.ex | 7 ++- .../electric/shapes/consumer/subqueries.ex | 29 ++++++--- .../shapes/consumer/event_handler_test.exs | 59 +++++++++++++++++++ 4 files changed, 119 insertions(+), 34 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex index 09e98da55e..f180348744 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex @@ -221,34 +221,46 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do state.shape_handle ) - log_ops = - store_op(pre_txns, state, state.views_before_move) ++ - [%LogOp.AppendControl{message: move_in_broadcast}] ++ - move_in_snapshot_op(state.move_in_rows) ++ - store_op(post_txns, state, state.views_after_move) - - # The ack_source_offset should be the last buffered transaction's offset - # (or the last_txn_offset if there were txns) - all_txns = pre_txns ++ post_txns - - ack_offset = - case all_txns do - [] -> nil - txns -> txns |> List.last() |> Map.fetch!(:last_log_offset) - end + with {:ok, pre_ops} <- store_op(pre_txns, state, state.views_before_move), + {:ok, post_ops} <- store_op(post_txns, state, state.views_after_move) do + log_ops = + pre_ops ++ + [%LogOp.AppendControl{message: move_in_broadcast}] ++ + move_in_snapshot_op(state.move_in_rows) ++ + post_ops + + # The ack_source_offset should be the last buffered transaction's offset + # (or the last_txn_offset if there were txns) + all_txns = pre_txns ++ post_txns + + ack_offset = + case all_txns do + [] -> nil + txns -> txns |> List.last() |> Map.fetch!(:last_log_offset) + end - plan = %Plan{log_ops: log_ops, ack_source_offset: ack_offset} + plan = %Plan{log_ops: log_ops, ack_source_offset: ack_offset} - # Transition back to steady state, then drain any queued moves - state - |> to_steady_state() - |> Steady.drain_queue(plan) + # Transition back to steady state, then drain any queued moves + state + |> to_steady_state() + |> Steady.drain_queue(plan) + else + {:error, {:truncate, xid}} -> {:stop, {:truncate, xid}} + end end defp store_op(txns, state, views) do - case Enum.flat_map(txns, &Subqueries.convert_transaction(&1, state, views)) do - [] -> [] - changes -> [%LogOp.AppendChanges{changes: changes}] + Enum.reduce_while(txns, {:ok, []}, fn txn, {:ok, acc} -> + case Subqueries.convert_transaction(txn, state, views) do + {:error, :truncate} -> {:halt, {:error, {:truncate, txn.xid}}} + {:ok, changes} -> {:cont, {:ok, acc ++ changes}} + end + end) + |> case do + {:error, _} = error -> error + {:ok, []} -> {:ok, []} + {:ok, changes} -> {:ok, [%LogOp.AppendChanges{changes: changes}]} end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex index 5af13b4645..29e3df9c31 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex @@ -39,10 +39,13 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do @impl true def handle_event(state, %Transaction{} = txn) do case Subqueries.convert_transaction(txn, state, state.views) do - [] -> + {:error, :truncate} -> + {:stop, {:truncate, txn.xid}} + + {:ok, []} -> {:ok, state, %Plan{ack_source_offset: txn.last_log_offset}} - changes -> + {:ok, changes} -> plan = %Plan{ log_ops: [%LogOp.AppendChanges{changes: changes, default_xid: txn.xid}], ack_source_offset: txn.last_log_offset diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex index 4ed9208a60..2bc96bac65 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries.ex @@ -122,18 +122,29 @@ defmodule Electric.Shapes.Consumer.Subqueries do |> then(&{tag_structure, &1}) end - @spec convert_transaction(Transaction.t(), term(), map()) :: [Changes.change()] + @spec convert_transaction(Transaction.t(), term(), map()) :: + {:ok, [Changes.change()]} | {:error, :truncate} def convert_transaction(%Transaction{changes: changes}, %{shape: shape} = state, views) do changes - |> Enum.flat_map(fn change -> - Shape.convert_change(shape, change, - stack_id: state.stack_id, - shape_handle: state.shape_handle, - extra_refs: {views, views}, - dnf_plan: state.dnf_plan - ) + |> Enum.reduce_while([], fn + %Changes.TruncatedRelation{}, _acc -> + {:halt, {:error, :truncate}} + + change, acc -> + converted = + Shape.convert_change(shape, change, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + extra_refs: {views, views}, + dnf_plan: state.dnf_plan + ) + + {:cont, [converted | acc]} end) - |> mark_last_change() + |> case do + {:error, :truncate} -> {:error, :truncate} + acc -> {:ok, acc |> Enum.reverse() |> List.flatten() |> mark_last_change()} + end end @spec validate_dependency_handle!(term(), term()) :: :ok diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs index 4a71b3c223..185aa72a2a 100644 --- a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs @@ -585,6 +585,39 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do EventHandler.handle_event(handler, txn(53, [child_insert("4", "1")])) end + test "stops on TruncatedRelation while steady" do + handler = new_handler(subquery_view: MapSet.new([1])) + + assert {:stop, {:truncate, 1}} = + EventHandler.handle_event(handler, txn(1, [child_truncate()])) + end + + test "stops on TruncatedRelation while buffering once splice completes" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_truncate()])) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + assert {:stop, {:truncate, 50}} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + end + test "raises on dependency handle mismatch" do assert_raise ArgumentError, ~r/unexpected dependency handle/, fn -> new_handler() @@ -639,6 +672,17 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do assert {:ok, %EventHandler.Default{}, %Plan{log_ops: [], effects: []}} = EventHandler.handle_event(handler, {:global_last_seen_lsn, 42}) end + + test "stops on TruncatedRelation" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + assert {:stop, {:truncate, 1}} = + EventHandler.handle_event(handler, txn(1, [child_truncate()])) + end end describe "UnsupportedSubquery handler" do @@ -655,6 +699,17 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do {:materializer_changes, "dep", %{move_in: [{1, "1"}], move_out: []}} ) end + + test "stops on TruncatedRelation" do + handler = %EventHandler.UnsupportedSubquery{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + assert {:stop, {:truncate, 1}} = + EventHandler.handle_event(handler, txn(1, [child_truncate()])) + end end describe "routing_views/1" do @@ -755,6 +810,10 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do |> Changes.fill_key(["id"]) end + defp child_truncate do + %Changes.TruncatedRelation{relation: {"public", "child"}} + end + defp child_update(id, old_parent_id, new_parent_id) do Changes.UpdatedRecord.new( relation: {"public", "child"}, From 203f4fc677fa167bb3ae81231d858d97bd662115 Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 26 Mar 2026 11:51:44 +0000 Subject: [PATCH 59/63] Subscribe and unsubscribe to LSN updates --- .../lib/electric/shapes/consumer.ex | 3 +- .../lib/electric/shapes/consumer/effect.ex | 10 +++++ .../event_handler/subqueries/buffering.ex | 11 ++--- .../event_handler/subqueries/steady.ex | 21 ++++++--- .../lib/electric/shapes/consumer/plan.ex | 2 + .../electric/shapes/consumer/plan_executor.ex | 10 +++++ .../shapes/consumer/event_handler_test.exs | 43 ++++--------------- 7 files changed, 52 insertions(+), 48 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index c79f1ae960..56b664e20c 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -17,7 +17,7 @@ defmodule Electric.Shapes.Consumer do alias Electric.Shapes.Consumer.Materializer alias Electric.Shapes.ConsumerRegistry alias Electric.LogItems - alias Electric.LsnTracker + alias Electric.Postgres.Inspector alias Electric.Replication.Changes alias Electric.Replication.Changes.Transaction @@ -126,7 +126,6 @@ defmodule Electric.Shapes.Consumer do metadata = [shape_handle: shape_handle, stack_id: stack_id] Logger.metadata(metadata) Electric.Telemetry.Sentry.set_tags_context(metadata) - {:ok, _} = LsnTracker.subscribe_to_global_lsn_updates(stack_id) # Shape initialization will be complete when we receive a message {:initialize_shape, # , } which the ShapeCache is expected to send as soon as this process diff --git a/packages/sync-service/lib/electric/shapes/consumer/effect.ex b/packages/sync-service/lib/electric/shapes/consumer/effect.ex index beb9a467e2..a05b90664e 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/effect.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/effect.ex @@ -5,4 +5,14 @@ defmodule Electric.Shapes.Consumer.Effect do @moduledoc false defstruct [] end + + defmodule SubscribeGlobalLsn do + @moduledoc false + defstruct [] + end + + defmodule UnsubscribeGlobalLsn do + @moduledoc false + defstruct [] + end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex index f180348744..602afc4d65 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex @@ -22,8 +22,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do :trigger_dep_index, :move_in_values, :views_before_move, - :views_after_move, - :latest_seen_lsn + :views_after_move ] defstruct [ :shape, @@ -98,7 +97,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do views_before_move: state.views, views_after_move: views_after, dependency_handle_to_ref: state.dependency_handle_to_ref, - latest_seen_lsn: state.latest_seen_lsn, queue: queue, buffer_max_transactions: state.buffer_max_transactions } @@ -239,7 +237,11 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do txns -> txns |> List.last() |> Map.fetch!(:last_log_offset) end - plan = %Plan{log_ops: log_ops, ack_source_offset: ack_offset} + plan = %Plan{ + log_ops: log_ops, + effects: [%Electric.Shapes.Consumer.Effect.UnsubscribeGlobalLsn{}], + ack_source_offset: ack_offset + } # Transition back to steady state, then drain any queued moves state @@ -280,7 +282,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do dnf_plan: state.dnf_plan, views: state.views_after_move, dependency_handle_to_ref: state.dependency_handle_to_ref, - latest_seen_lsn: state.latest_seen_lsn, queue: state.queue, buffer_max_transactions: state.buffer_max_transactions } diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex index 29e3df9c31..fd53050e48 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex @@ -19,7 +19,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do :dnf_plan, views: %{}, dependency_handle_to_ref: %{}, - latest_seen_lsn: nil, queue: MoveQueue.new(), buffer_max_transactions: 1000 ] @@ -31,7 +30,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do dnf_plan: DnfPlan.t(), views: %{[String.t()] => MapSet.t()}, dependency_handle_to_ref: %{String.t() => {non_neg_integer(), [String.t()]}}, - latest_seen_lsn: Electric.Postgres.Lsn.t() | nil, queue: MoveQueue.t(), buffer_max_transactions: pos_integer() } @@ -55,8 +53,9 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do end end - def handle_event(state, {:global_last_seen_lsn, lsn}) do - {:ok, %{state | latest_seen_lsn: Subqueries.normalize_global_lsn(lsn)}, %Plan{}} + def handle_event(state, {:global_last_seen_lsn, _lsn}) do + # Straggler message after unsubscribe; ignore. + {:ok, state, %Plan{}} end def handle_event(state, {:materializer_changes, dep_handle, payload}) do @@ -126,7 +125,12 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do plan = %{ plan - | effects: plan.effects ++ [%Electric.Shapes.Consumer.Effect.StartMoveInQuery{}] + | effects: + plan.effects ++ + [ + %Electric.Shapes.Consumer.Effect.SubscribeGlobalLsn{}, + %Electric.Shapes.Consumer.Effect.StartMoveInQuery{} + ] } {:ok, buffering, plan} @@ -150,7 +154,12 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do plan = %{ plan - | effects: plan.effects ++ [%Electric.Shapes.Consumer.Effect.StartMoveInQuery{}] + | effects: + plan.effects ++ + [ + %Electric.Shapes.Consumer.Effect.SubscribeGlobalLsn{}, + %Electric.Shapes.Consumer.Effect.StartMoveInQuery{} + ] } {:ok, buffering, plan} diff --git a/packages/sync-service/lib/electric/shapes/consumer/plan.ex b/packages/sync-service/lib/electric/shapes/consumer/plan.ex index 90817257e6..1ecce88a0f 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/plan.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/plan.ex @@ -16,4 +16,6 @@ defmodule Electric.Shapes.Consumer.Plan do @type effect() :: %Electric.Shapes.Consumer.Effect.StartMoveInQuery{} + | %Electric.Shapes.Consumer.Effect.SubscribeGlobalLsn{} + | %Electric.Shapes.Consumer.Effect.UnsubscribeGlobalLsn{} end diff --git a/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex index 2e90b71382..8f2c187925 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex @@ -123,6 +123,16 @@ defmodule Electric.Shapes.Consumer.PlanExecutor do acc end + defp execute_effect(%Effect.SubscribeGlobalLsn{}, acc) do + {:ok, _} = Electric.LsnTracker.subscribe_to_global_lsn_updates(acc.state.stack_id) + acc + end + + defp execute_effect(%Effect.UnsubscribeGlobalLsn{}, acc) do + :ok = Electric.LsnTracker.unsubscribe_from_global_lsn_updates(acc.state.stack_id) + acc + end + # -- Ack -- defp apply_ack(acc, nil), do: acc diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs index 185aa72a2a..3218994310 100644 --- a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs @@ -73,7 +73,7 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} ) - assert %Plan{effects: [%Effect.StartMoveInQuery{}]} = plan + assert %Plan{effects: [%Effect.SubscribeGlobalLsn{}, %Effect.StartMoveInQuery{}]} = plan assert %Buffering{ views_before_move: %{["$sublink", "0"] => before_view}, @@ -109,7 +109,8 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do handler = new_handler() dep_handle = dep_handle(handler) - assert {:ok, %Buffering{} = handler, %Plan{effects: [%Effect.StartMoveInQuery{}]}} = + assert {:ok, %Buffering{} = handler, + %Plan{effects: [%Effect.SubscribeGlobalLsn{}, %Effect.StartMoveInQuery{}]}} = EventHandler.handle_event( handler, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} @@ -326,38 +327,6 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do } = plan end - test "uses an lsn update that was already seen before the move-in started" do - handler = new_handler() - dep_handle = dep_handle(handler) - - assert {:ok, handler, %Plan{}} = - EventHandler.handle_event(handler, global_last_seen_lsn(20)) - - assert {:ok, %Buffering{} = handler, _plan} = - EventHandler.handle_event( - handler, - {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} - ) - - assert {:ok, %Buffering{} = handler, _plan} = - EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) - - assert {:ok, %Steady{views: views}, plan} = - EventHandler.handle_event( - handler, - {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} - ) - - assert views[["$sublink", "0"]] == MapSet.new([1]) - - assert %Plan{ - log_ops: [ - %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, - %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} - ] - } = plan - end - test "defers queued move outs until after splice and starts the next move in" do handler = new_handler() dep_handle = dep_handle(handler) @@ -403,7 +372,11 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do message: %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} } ], - effects: [%Effect.StartMoveInQuery{}] + effects: [ + %Effect.UnsubscribeGlobalLsn{}, + %Effect.SubscribeGlobalLsn{}, + %Effect.StartMoveInQuery{} + ] } = plan end From 0dd4b75120b2759c2fe9828d132f86ab48b94ddb Mon Sep 17 00:00:00 2001 From: rob Date: Thu, 26 Mar 2026 21:14:51 +0000 Subject: [PATCH 60/63] Reduce message passing for NOT IN subquery --- .../lib/electric/shapes/consumer.ex | 40 +-------------- .../lib/electric/shapes/consumer/effect.ex | 10 ++++ .../electric/shapes/consumer/event_handler.ex | 7 --- .../shapes/consumer/event_handler/default.ex | 3 -- .../event_handler/subqueries/buffering.ex | 48 ++++++++--------- .../event_handler/subqueries/steady.ex | 51 +++++++++++++++---- .../event_handler/unsupported_subquery.ex | 2 - .../lib/electric/shapes/consumer/plan.ex | 2 + .../electric/shapes/consumer/plan_executor.ex | 21 ++++++++ .../lib/electric/shapes/dnf_plan.ex | 13 ++--- .../shapes/consumer/event_handler_test.exs | 43 ++++------------ .../test/electric/shapes/dnf_plan_test.exs | 11 ++-- .../shapes/filter/subquery_index_test.exs | 6 +-- 13 files changed, 118 insertions(+), 139 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 56b664e20c..f526ec0165 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -900,15 +900,12 @@ defmodule Electric.Shapes.Consumer do end defp apply_event(state, event) do - old_handler = state.event_handler - - case EventHandler.handle_event(old_handler, event) do + case EventHandler.handle_event(state.event_handler, event) do {:stop, reason} -> {:stop, reason} {:ok, new_handler, plan} -> state = %{state | event_handler: new_handler} - sync_subquery_index(state, old_handler, new_handler) previous_offset = state.latest_offset result = PlanExecutor.execute(plan, state) @@ -1195,41 +1192,6 @@ defmodule Electric.Shapes.Consumer do end end - defp sync_subquery_index(state, old_handler, new_handler) do - case SubqueryIndex.for_stack(state.stack_id) do - nil -> :ok - index -> do_sync_subquery_index(index, state, old_handler, new_handler) - end - end - - defp do_sync_subquery_index(index, state, old_handler, new_handler) do - old_views = EventHandler.routing_views(old_handler) - new_views = EventHandler.routing_views(new_handler) - - if old_views != new_views do - root_table = state.shape.root_table - shape_handle = state.shape_handle - - for {ref, new_view} <- new_views do - old_view = Map.get(old_views, ref, MapSet.new()) - dep_index = ref |> List.last() |> String.to_integer() - - added = MapSet.difference(new_view, old_view) - removed = MapSet.difference(old_view, new_view) - - for value <- added do - SubqueryIndex.add_value(index, shape_handle, root_table, ref, dep_index, value) - end - - for value <- removed do - SubqueryIndex.remove_value(index, shape_handle, root_table, ref, dep_index, value) - end - end - end - - :ok - end - defp all_materializers_alive?(state) do Enum.all?(state.shape.shape_dependencies_handles, fn shape_handle -> name = Materializer.name(state.stack_id, shape_handle) diff --git a/packages/sync-service/lib/electric/shapes/consumer/effect.ex b/packages/sync-service/lib/electric/shapes/consumer/effect.ex index a05b90664e..92d3b6fecd 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/effect.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/effect.ex @@ -15,4 +15,14 @@ defmodule Electric.Shapes.Consumer.Effect do @moduledoc false defstruct [] end + + defmodule AddToSubqueryIndex do + @moduledoc false + defstruct [:dep_index, :subquery_ref, :values] + end + + defmodule RemoveFromSubqueryIndex do + @moduledoc false + defstruct [:dep_index, :subquery_ref, :values] + end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex index 012ae5beac..b362489015 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex @@ -8,15 +8,8 @@ defmodule Electric.Shapes.Consumer.EventHandler do @callback handle_event(t(), term()) :: {:ok, t(), Plan.t()} | {:stop, term()} - @callback routing_views(t()) :: %{[String.t()] => MapSet.t()} - @spec handle_event(t(), term()) :: {:ok, t(), Plan.t()} | {:stop, term()} def handle_event(handler, event) do handler.__struct__.handle_event(handler, event) end - - @spec routing_views(t()) :: %{[String.t()] => MapSet.t()} - def routing_views(handler) do - handler.__struct__.routing_views(handler) - end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex index 9d2e760fb4..46613a759d 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex @@ -57,9 +57,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Default do {:ok, state, %Plan{}} end - @impl true - def routing_views(_state), do: %{} - defp mark_last_change([]), do: [] defp mark_last_change(changes) do diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex index 602afc4d65..8e3722ca72 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex @@ -5,6 +5,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do alias Electric.Postgres.Lsn alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Effect alias Electric.Shapes.Consumer.EventHandler.Subqueries.Steady alias Electric.Shapes.Consumer.LogOp alias Electric.Shapes.Consumer.Plan @@ -20,6 +21,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do :shape_handle, :dnf_plan, :trigger_dep_index, + :trigger_subquery_ref, :move_in_values, :views_before_move, :views_after_move @@ -30,6 +32,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do :shape_handle, :dnf_plan, :trigger_dep_index, + :trigger_subquery_ref, :move_in_values, :views_before_move, :views_after_move, @@ -50,6 +53,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do shape_handle: String.t(), dnf_plan: DnfPlan.t(), trigger_dep_index: non_neg_integer(), + trigger_subquery_ref: [String.t()], move_in_values: [Subqueries.move_value()], views_before_move: %{[String.t()] => MapSet.t()}, views_after_move: %{[String.t()] => MapSet.t()}, @@ -93,6 +97,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do shape_handle: state.shape_handle, dnf_plan: state.dnf_plan, trigger_dep_index: dep_index, + trigger_subquery_ref: subquery_ref, move_in_values: move_in_values, views_before_move: state.views, views_after_move: views_after, @@ -166,29 +171,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do |> maybe_splice(nil) end - @impl true - def routing_views(%__MODULE__{ - views_before_move: before, - views_after_move: after_move, - dnf_plan: plan, - trigger_dep_index: trigger_dep - }) do - polarity = Map.get(plan.dependency_polarities, trigger_dep, :positive) - - Map.merge(before, after_move, fn ref, before_view, after_view -> - dep_index = ref |> List.last() |> String.to_integer() - - if dep_index == trigger_dep do - case polarity do - :positive -> MapSet.union(before_view, after_view) - :negated -> MapSet.intersection(before_view, after_view) - end - else - after_view - end - end) - end - # -- Splice logic -- defp maybe_splice(state, last_txn_offset) do @@ -237,9 +219,27 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do txns -> txns |> List.last() |> Map.fetch!(:last_log_offset) end + polarity = + Map.get(state.dnf_plan.dependency_polarities, state.trigger_dep_index, :positive) + + deferred_index_effects = + case polarity do + :positive -> + [] + + :negated -> + [ + %Effect.RemoveFromSubqueryIndex{ + dep_index: state.trigger_dep_index, + subquery_ref: state.trigger_subquery_ref, + values: state.move_in_values + } + ] + end + plan = %Plan{ log_ops: log_ops, - effects: [%Electric.Shapes.Consumer.Effect.UnsubscribeGlobalLsn{}], + effects: [%Effect.UnsubscribeGlobalLsn{}] ++ deferred_index_effects, ack_source_offset: ack_offset } diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex index fd53050e48..c7b057f6dd 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex @@ -4,6 +4,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do @behaviour Electric.Shapes.Consumer.EventHandler alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Effect alias Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering alias Electric.Shapes.Consumer.LogOp alias Electric.Shapes.Consumer.Plan @@ -77,9 +78,6 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do "received {:query_move_in_complete, rows, move_in_lsn} while no move-in is buffering" end - @impl true - def routing_views(%__MODULE__{views: views}), do: views - @spec drain_queue(t(), Plan.t()) :: {:ok, t() | Buffering.t(), Plan.t()} | {:stop, term()} def drain_queue(%__MODULE__{} = state, %Plan{} = plan) do @@ -109,10 +107,25 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do state.shape_handle ) - plan = %{plan | log_ops: plan.log_ops ++ [%LogOp.AppendControl{message: broadcast}]} + # Case A: positive dep move-out — remove from index after broadcast + plan = %{ + plan + | log_ops: plan.log_ops ++ [%LogOp.AppendControl{message: broadcast}], + effects: + plan.effects ++ + [ + %Effect.RemoveFromSubqueryIndex{ + dep_index: dep_index, + subquery_ref: subquery_ref, + values: move_out_values + } + ] + } + drain_queue(next_state, plan) :move_in -> + # Case B: negated dep move-out → Buffering — index update deferred to splice buffering = Buffering.from_steady( state, @@ -128,8 +141,8 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do | effects: plan.effects ++ [ - %Electric.Shapes.Consumer.Effect.SubscribeGlobalLsn{}, - %Electric.Shapes.Consumer.Effect.StartMoveInQuery{} + %Effect.SubscribeGlobalLsn{}, + %Effect.StartMoveInQuery{} ] } @@ -142,6 +155,7 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do case effect do :move_in -> + # Case C: positive dep move-in → Buffering — add to index immediately buffering = Buffering.from_steady( state, @@ -157,14 +171,20 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do | effects: plan.effects ++ [ - %Electric.Shapes.Consumer.Effect.SubscribeGlobalLsn{}, - %Electric.Shapes.Consumer.Effect.StartMoveInQuery{} + %Effect.AddToSubqueryIndex{ + dep_index: dep_index, + subquery_ref: subquery_ref, + values: move_in_values + }, + %Effect.SubscribeGlobalLsn{}, + %Effect.StartMoveInQuery{} ] } {:ok, buffering, plan} :move_out -> + # Case D: negated dep move-in → broadcast — add to index after broadcast next_state = %{ state | queue: queue, @@ -181,7 +201,20 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do state.shape_handle ) - plan = %{plan | log_ops: plan.log_ops ++ [%LogOp.AppendControl{message: broadcast}]} + plan = %{ + plan + | log_ops: plan.log_ops ++ [%LogOp.AppendControl{message: broadcast}], + effects: + plan.effects ++ + [ + %Effect.AddToSubqueryIndex{ + dep_index: dep_index, + subquery_ref: subquery_ref, + values: move_in_values + } + ] + } + drain_queue(next_state, plan) end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex index d7c2e2f7ca..876b01f0d6 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/unsupported_subquery.ex @@ -47,6 +47,4 @@ defmodule Electric.Shapes.Consumer.EventHandler.UnsupportedSubquery do {:ok, state, %Plan{}} end - @impl true - def routing_views(_state), do: %{} end diff --git a/packages/sync-service/lib/electric/shapes/consumer/plan.ex b/packages/sync-service/lib/electric/shapes/consumer/plan.ex index 1ecce88a0f..29f1ef209e 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/plan.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/plan.ex @@ -18,4 +18,6 @@ defmodule Electric.Shapes.Consumer.Plan do %Electric.Shapes.Consumer.Effect.StartMoveInQuery{} | %Electric.Shapes.Consumer.Effect.SubscribeGlobalLsn{} | %Electric.Shapes.Consumer.Effect.UnsubscribeGlobalLsn{} + | %Electric.Shapes.Consumer.Effect.AddToSubqueryIndex{} + | %Electric.Shapes.Consumer.Effect.RemoveFromSubqueryIndex{} end diff --git a/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex index 8f2c187925..d020b5e4ad 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex @@ -6,6 +6,7 @@ defmodule Electric.Shapes.Consumer.PlanExecutor do alias Electric.Shapes.Consumer.Plan alias Electric.Shapes.Consumer.Subqueries alias Electric.Shapes.Consumer.Subqueries.QueryRow + alias Electric.Shapes.Filter.SubqueryIndex alias Electric.LogItems alias Electric.Replication.LogOffset alias Electric.ShapeCache @@ -133,6 +134,26 @@ defmodule Electric.Shapes.Consumer.PlanExecutor do acc end + defp execute_effect(%Effect.AddToSubqueryIndex{} = effect, acc) do + update_subquery_index(acc, effect.dep_index, effect.subquery_ref, effect.values, :add) + end + + defp execute_effect(%Effect.RemoveFromSubqueryIndex{} = effect, acc) do + update_subquery_index(acc, effect.dep_index, effect.subquery_ref, effect.values, :remove) + end + + defp update_subquery_index(acc, dep_index, subquery_ref, values, op) do + state = acc.state + index = SubqueryIndex.for_stack(state.stack_id) + fun = if op == :add, do: &SubqueryIndex.add_value/6, else: &SubqueryIndex.remove_value/6 + + for {value, _original} <- values do + fun.(index, state.shape_handle, state.shape.root_table, subquery_ref, dep_index, value) + end + + acc + end + # -- Ack -- defp apply_ack(acc, nil), do: acc diff --git a/packages/sync-service/lib/electric/shapes/dnf_plan.ex b/packages/sync-service/lib/electric/shapes/dnf_plan.ex index c866a31bc9..790cc4acf0 100644 --- a/packages/sync-service/lib/electric/shapes/dnf_plan.ex +++ b/packages/sync-service/lib/electric/shapes/dnf_plan.ex @@ -25,8 +25,7 @@ defmodule Electric.Shapes.DnfPlan do :positions, :dependency_positions, :dependency_disjuncts, - :dependency_polarities, - :has_negated_subquery + :dependency_polarities ] @type tag_columns :: [String.t()] | {:hash_together, [String.t()]} @@ -48,8 +47,7 @@ defmodule Electric.Shapes.DnfPlan do positions: %{Decomposer.position() => position_info()}, dependency_positions: %{non_neg_integer() => [Decomposer.position()]}, dependency_disjuncts: %{non_neg_integer() => [non_neg_integer()]}, - dependency_polarities: %{non_neg_integer() => :positive | :negated}, - has_negated_subquery: boolean() + dependency_polarities: %{non_neg_integer() => :positive | :negated} } @doc """ @@ -547,8 +545,7 @@ defmodule Electric.Shapes.DnfPlan do positions: positions, dependency_positions: build_dependency_positions(positions), dependency_disjuncts: build_dependency_disjuncts(decomposition.disjuncts, positions), - dependency_polarities: build_dependency_polarities(positions), - has_negated_subquery: has_negated_subquery?(positions) + dependency_polarities: build_dependency_polarities(positions) }} end end @@ -639,10 +636,6 @@ defmodule Electric.Shapes.DnfPlan do end) end - defp has_negated_subquery?(positions) do - Enum.any?(positions, fn {_pos, info} -> info.is_subquery and info.negated end) - end - defp ignore_polarity_for_trigger?(info, trigger_dep, opts) do Keyword.get(opts, :ignore_trigger_polarity?, false) and info.is_subquery and info.dependency_index == trigger_dep diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs index 3218994310..de80e4bc2b 100644 --- a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs @@ -110,7 +110,13 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do dep_handle = dep_handle(handler) assert {:ok, %Buffering{} = handler, - %Plan{effects: [%Effect.SubscribeGlobalLsn{}, %Effect.StartMoveInQuery{}]}} = + %Plan{ + effects: [ + %Effect.AddToSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, + %Effect.SubscribeGlobalLsn{}, + %Effect.StartMoveInQuery{} + ] + }} = EventHandler.handle_event( handler, {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} @@ -374,6 +380,8 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do ], effects: [ %Effect.UnsubscribeGlobalLsn{}, + %Effect.RemoveFromSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, + %Effect.AddToSubqueryIndex{dep_index: 0, values: [{2, "2"}]}, %Effect.SubscribeGlobalLsn{}, %Effect.StartMoveInQuery{} ] @@ -685,39 +693,6 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do end end - describe "routing_views/1" do - test "returns empty map for Default handler" do - handler = %EventHandler.Default{ - shape: simple_shape(), - stack_id: "s", - shape_handle: "h" - } - - assert %{} == EventHandler.routing_views(handler) - end - - test "returns exact views for Steady handler" do - handler = new_handler(subquery_view: MapSet.new([1, 2])) - assert %{["$sublink", "0"] => view} = EventHandler.routing_views(handler) - assert view == MapSet.new([1, 2]) - end - - test "returns conservative projection for Buffering handler" do - handler = new_handler(subquery_view: MapSet.new([1])) - dep_handle = dep_handle(handler) - - {:ok, %Buffering{} = handler, _plan} = - EventHandler.handle_event( - handler, - {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} - ) - - views = EventHandler.routing_views(handler) - # For positive deps, buffering uses union of before and after - assert views[["$sublink", "0"]] == MapSet.new([1, 2]) - end - end - # -- Helpers -- defp new_handler(opts \\ []) do diff --git a/packages/sync-service/test/electric/shapes/dnf_plan_test.exs b/packages/sync-service/test/electric/shapes/dnf_plan_test.exs index 5836f2c3dc..dcaffcfcfc 100644 --- a/packages/sync-service/test/electric/shapes/dnf_plan_test.exs +++ b/packages/sync-service/test/electric/shapes/dnf_plan_test.exs @@ -50,7 +50,7 @@ defmodule Electric.Shapes.DnfPlanTest do assert plan.dependency_positions == %{0 => [0]} assert plan.dependency_disjuncts == %{0 => [0]} - assert plan.has_negated_subquery == false + assert plan.dependency_polarities == %{0 => :positive} end end @@ -83,7 +83,7 @@ defmodule Electric.Shapes.DnfPlanTest do # Each dependency maps to its own position and disjunct assert plan.dependency_positions == %{0 => [0], 1 => [1]} assert plan.dependency_disjuncts == %{0 => [0], 1 => [1]} - assert plan.has_negated_subquery == false + assert plan.dependency_polarities == %{0 => :positive, 1 => :positive} end test "(x IN sq1 AND status = 'open') OR y IN sq2" do @@ -182,29 +182,26 @@ defmodule Electric.Shapes.DnfPlanTest do end describe "compile/1 - negated subqueries" do - test "NOT with subquery marks has_negated_subquery" do + test "NOT with subquery has negated polarity" do {where, deps} = parse_where_with_sublinks(~S"NOT x IN (SELECT id FROM dep1)", 1) shape = make_shape(where, deps) assert {:ok, plan} = DnfPlan.compile(shape) - assert plan.has_negated_subquery == true - pos0 = plan.positions[0] assert pos0.is_subquery == true assert pos0.negated == true assert plan.dependency_polarities == %{0 => :negated} end - test "positive subquery does not mark has_negated_subquery" do + test "positive subquery has positive polarity" do {where, deps} = parse_where_with_sublinks(~S"x IN (SELECT id FROM dep1)", 1) shape = make_shape(where, deps) assert {:ok, plan} = DnfPlan.compile(shape) - assert plan.has_negated_subquery == false assert plan.dependency_polarities == %{0 => :positive} end end diff --git a/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs index 0082c92739..ba1366c864 100644 --- a/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs +++ b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs @@ -45,8 +45,7 @@ defmodule Electric.Shapes.Filter.SubqueryIndexTest do }, dependency_positions: %{dep_index => [0]}, dependency_disjuncts: %{}, - dependency_polarities: %{dep_index => polarity}, - has_negated_subquery: polarity == :negated + dependency_polarities: %{dep_index => polarity} } end @@ -93,8 +92,7 @@ defmodule Electric.Shapes.Filter.SubqueryIndexTest do }, dependency_positions: %{0 => [0, 1]}, dependency_disjuncts: %{}, - dependency_polarities: %{0 => :positive}, - has_negated_subquery: false + dependency_polarities: %{0 => :positive} } end From f74fb2941be8c4ff973c6568f9d74ced86698a1e Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 27 Mar 2026 10:08:18 +0000 Subject: [PATCH 61/63] Fix race condition of missed lsn update with queued moves --- .../event_handler/subqueries/buffering.ex | 24 ++++++++-- .../shapes/consumer/event_handler_test.exs | 47 +++++++++++++++++-- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex index 8e3722ca72..7904111698 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex @@ -239,14 +239,28 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do plan = %Plan{ log_ops: log_ops, - effects: [%Effect.UnsubscribeGlobalLsn{}] ++ deferred_index_effects, + effects: deferred_index_effects, ack_source_offset: ack_offset } - # Transition back to steady state, then drain any queued moves - state - |> to_steady_state() - |> Steady.drain_queue(plan) + # Transition back to steady state, then drain any queued moves. + # If drain_queue starts a new move-in, it will add SubscribeGlobalLsn + # (and we skip the unsubscribe so we stay subscribed continuously). + # If it doesn't, we need to unsubscribe. + case state |> to_steady_state() |> Steady.drain_queue(plan) do + {:ok, %Steady{} = steady, plan} -> + # No new move-in started — unsubscribe now + {:ok, steady, %{plan | effects: [%Effect.UnsubscribeGlobalLsn{} | plan.effects]}} + + other -> + # New move-in started — stay subscribed, drop the redundant SubscribeGlobalLsn, + # and carry latest_seen_lsn so the new Buffering can resolve the boundary + # immediately if the move-in query's LSN matches. + {:ok, %__MODULE__{} = handler, plan} = other + + {:ok, %{handler | latest_seen_lsn: state.latest_seen_lsn}, + %{plan | effects: Enum.reject(plan.effects, &match?(%Effect.SubscribeGlobalLsn{}, &1))}} + end else {:error, {:truncate, xid}} -> {:stop, {:truncate, xid}} end diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs index de80e4bc2b..86a2e00d6a 100644 --- a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs @@ -297,7 +297,8 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do log_ops: [ %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} - ] + ], + effects: [%Effect.UnsubscribeGlobalLsn{}] } = plan end @@ -379,15 +380,55 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do } ], effects: [ - %Effect.UnsubscribeGlobalLsn{}, %Effect.RemoveFromSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, %Effect.AddToSubqueryIndex{dep_index: 0, values: [{2, "2"}]}, - %Effect.SubscribeGlobalLsn{}, %Effect.StartMoveInQuery{} ] } = plan end + test "chained move-in resolves without needing a new lsn broadcast" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, %Plan{}} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(10)} + ) + + # First splice completes, second move-in starts + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + # Second move-in resolves with no further lsn broadcasts + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {200, 300, []}}) + + assert {:ok, %Steady{views: views}, _plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("98", "2")], lsn(10)} + ) + + assert views[["$sublink", "0"]] == MapSet.new([2]) + end + test "applies a queued move out for the active move-in value after splice" do handler = new_handler() dep_handle = dep_handle(handler) From 5129850134524238e5b7d700937f823c0b1652f7 Mon Sep 17 00:00:00 2001 From: rob Date: Fri, 27 Mar 2026 10:13:35 +0000 Subject: [PATCH 62/63] Make :materializer_changes synchronous --- .../lib/electric/shapes/consumer.ex | 53 +++++++++++++------ .../electric/shapes/consumer/materializer.ex | 2 +- .../shapes/consumer/materializer_test.exs | 10 +++- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index f526ec0165..f15d76418d 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -72,6 +72,11 @@ defmodule Electric.Shapes.Consumer do |> GenServer.call(:await_snapshot_start, timeout) end + @spec materializer_changes(pid(), Electric.shape_handle(), map()) :: :ok + def materializer_changes(consumer_pid, dep_handle, events) do + GenServer.call(consumer_pid, {:materializer_changes, dep_handle, events}) + end + @spec subscribe_materializer(Electric.stack_id(), Electric.shape_handle(), pid()) :: {:ok, LogOffset.t()} def subscribe_materializer(stack_id, shape_handle, pid) do @@ -240,6 +245,19 @@ defmodule Electric.Shapes.Consumer do state.hibernate_after} end + def handle_call( + {:materializer_changes, dep_handle, %{move_in: move_in, move_out: move_out} = events}, + _from, + state + ) do + Logger.debug(fn -> + "Consumer reacting to #{length(move_in)} move ins and #{length(move_out)} move outs from its #{dep_handle} dependency" + end) + + result = apply_event(state, {:materializer_changes, dep_handle, events}) + handle_apply_call_result(state, result) + end + def handle_call({:stop, reason}, _from, state) do {reason, state} = stop_with_reason(reason, state) {:stop, reason, :ok, state} @@ -362,23 +380,6 @@ defmodule Electric.Shapes.Consumer do {:noreply, %{state | writer: writer}, state.hibernate_after} end - def handle_info( - {:materializer_changes, dep_handle, %{move_in: move_in, move_out: move_out}}, - state - ) do - Logger.debug(fn -> - "Consumer reacting to #{length(move_in)} move ins and #{length(move_out)} move outs from its #{dep_handle} dependency" - end) - - handle_apply_event_result( - state, - apply_event( - state, - {:materializer_changes, dep_handle, %{move_in: move_in, move_out: move_out}} - ) - ) - end - def handle_info({:pg_snapshot_known, snapshot}, state) do Logger.debug(fn -> "Snapshot known for active move-in" end) handle_apply_event_result(state, apply_event(state, {:pg_snapshot_known, snapshot})) @@ -881,6 +882,24 @@ defmodule Electric.Shapes.Consumer do end end + defp handle_apply_call_result(state, {:stop, reason}) do + state = handle_event_stop(state, reason) + + if state.terminating? do + {:reply, :ok, state, {:continue, :stop_and_clean}} + else + {:stop, @stop_and_clean_reason, :ok, mark_for_removal(state)} + end + end + + defp handle_apply_call_result(_old_state, {state, notification, _num_changes, _total_size}) do + if notification do + :ok = notify_new_changes(state, notification) + end + + {:reply, :ok, state, state.hibernate_after} + end + defp handle_apply_event_result(state, {:stop, reason}) do state = handle_event_stop(state, reason) diff --git a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex index d71dffc8e5..ed269667bf 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex @@ -401,7 +401,7 @@ defmodule Electric.Shapes.Consumer.Materializer do if events != %{} do for pid <- state.subscribers do - send(pid, {:materializer_changes, state.shape_handle, events}) + Consumer.materializer_changes(pid, state.shape_handle, events) end end diff --git a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs index f96182266a..7a90f1b7a6 100644 --- a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs @@ -10,6 +10,7 @@ defmodule Electric.Shapes.Consumer.MaterializerTest do alias Electric.ShapeCache.Storage alias Electric.Shapes.ConsumerRegistry alias Electric.Replication.LogOffset + alias Electric.Shapes.Consumer alias Electric.Shapes.Consumer.Materializer @moduletag :tmp_dir @@ -1244,7 +1245,7 @@ defmodule Electric.Shapes.Consumer.MaterializerTest do end defp with_materializer(ctx, opts \\ []) do - {:ok, _pid} = + {:ok, pid} = Materializer.start_link(%{ stack_id: ctx.stack_id, shape_handle: ctx.shape_handle, @@ -1253,6 +1254,13 @@ defmodule Electric.Shapes.Consumer.MaterializerTest do materialized_type: Keyword.get(opts, :materialized_type, {:array, :int8}) }) + Repatch.patch(Consumer, :materializer_changes, [mode: :shared], fn pid, handle, events -> + send(pid, {:materializer_changes, handle, events}) + :ok + end) + + Repatch.allow(self(), pid) + respond_to_call(:await_snapshot_start, :started) respond_to_call( From 5999217d0292a313be3aad52c17c96b35a104600 Mon Sep 17 00:00:00 2001 From: rob Date: Mon, 30 Mar 2026 17:09:22 +0100 Subject: [PATCH 63/63] Get lastest lsn when subscribing to it --- .../sync-service/lib/electric/lsn_tracker.ex | 40 +++++++++-- .../event_handler/subqueries/buffering.ex | 14 +++- .../electric/shapes/consumer/plan_executor.ex | 1 + .../test/electric/lsn_tracker_test.exs | 14 +++- .../shapes/consumer/event_handler_test.exs | 35 +++++++++ .../test/electric/shapes/consumer_test.exs | 72 +++++++++++++++++++ 6 files changed, 167 insertions(+), 9 deletions(-) diff --git a/packages/sync-service/lib/electric/lsn_tracker.ex b/packages/sync-service/lib/electric/lsn_tracker.ex index e00649021c..dca45d8360 100644 --- a/packages/sync-service/lib/electric/lsn_tracker.ex +++ b/packages/sync-service/lib/electric/lsn_tracker.ex @@ -62,6 +62,14 @@ defmodule Electric.LsnTracker do end def broadcast_last_seen_lsn(stack_ref, lsn) when is_integer(lsn) do + # Store the broadcast LSN so newly subscribing consumers can read the + # current value without waiting for the next broadcast. + try do + stack_ref |> table() |> :ets.insert({:last_broadcast_lsn, lsn}) + rescue + ArgumentError -> :ok + end + registry = Electric.StackSupervisor.registry_name(stack_ref) if Process.whereis(registry) do @@ -73,13 +81,35 @@ defmodule Electric.LsnTracker do :ok end + @doc """ + Returns the most recently broadcast LSN, or 0 if none has been broadcast yet. + """ + @spec get_last_broadcast_lsn(stack_ref()) :: non_neg_integer() + def get_last_broadcast_lsn(stack_ref) do + case :ets.lookup(table(stack_ref), :last_broadcast_lsn) do + [{:last_broadcast_lsn, lsn}] -> lsn + [] -> 0 + end + rescue + ArgumentError -> 0 + end + @spec subscribe_to_global_lsn_updates(stack_ref(), term()) :: {:ok, pid()} | {:error, term()} def subscribe_to_global_lsn_updates(stack_ref, value \\ []) do - Registry.register( - Electric.StackSupervisor.registry_name(stack_ref), - @global_lsn_updates_topic, - value - ) + with {:ok, _} <- + Registry.register( + Electric.StackSupervisor.registry_name(stack_ref), + @global_lsn_updates_topic, + value + ) do + last_lsn = get_last_broadcast_lsn(stack_ref) + + if last_lsn > 0 do + send(self(), {:global_last_seen_lsn, last_lsn}) + end + + {:ok, self()} + end end @spec unsubscribe_from_global_lsn_updates(stack_ref()) :: :ok diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex index 7904111698..d519f204e6 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex @@ -123,10 +123,11 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do def handle_event(state, {:global_last_seen_lsn, lsn}) do lsn = Subqueries.normalize_global_lsn(lsn) + latest_seen_lsn = newer_lsn(state.latest_seen_lsn, lsn) state - |> Map.put(:latest_seen_lsn, lsn) - |> maybe_buffer_boundary_from_lsn(lsn) + |> Map.put(:latest_seen_lsn, latest_seen_lsn) + |> maybe_buffer_boundary_from_lsn(latest_seen_lsn) |> maybe_splice(nil) end @@ -351,6 +352,15 @@ defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do maybe_buffer_boundary_from_lsn(state, state.latest_seen_lsn) end + defp newer_lsn(nil, %Lsn{} = lsn), do: lsn + + defp newer_lsn(%Lsn{} = current, %Lsn{} = candidate) do + case Lsn.compare(current, candidate) do + :lt -> candidate + _ -> current + end + end + defp apply_dependency_move(subquery_view, move_in_values, :move_in) do Enum.reduce(move_in_values, subquery_view, fn {value, _original_value}, view -> MapSet.put(view, value) diff --git a/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex index d020b5e4ad..733293ec65 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/plan_executor.ex @@ -126,6 +126,7 @@ defmodule Electric.Shapes.Consumer.PlanExecutor do defp execute_effect(%Effect.SubscribeGlobalLsn{}, acc) do {:ok, _} = Electric.LsnTracker.subscribe_to_global_lsn_updates(acc.state.stack_id) + acc end diff --git a/packages/sync-service/test/electric/lsn_tracker_test.exs b/packages/sync-service/test/electric/lsn_tracker_test.exs index 22c529ae0d..b9d4a44730 100644 --- a/packages/sync-service/test/electric/lsn_tracker_test.exs +++ b/packages/sync-service/test/electric/lsn_tracker_test.exs @@ -1,7 +1,9 @@ defmodule Electric.LsnTrackerTest do use ExUnit.Case, async: true - import Support.ComponentSetup, only: [with_registry: 1, with_stack_id_from_test: 1] + import Support.ComponentSetup, + only: [with_lsn_tracker: 1, with_registry: 1, with_stack_id_from_test: 1] + alias Electric.LsnTracker alias Electric.Postgres.Lsn @@ -75,7 +77,7 @@ defmodule Electric.LsnTrackerTest do end describe "broadcast_last_seen_lsn/2" do - setup [:with_registry] + setup [:with_registry, :with_lsn_tracker] test "delivers messages to processes registered for global_lsn_updates", ctx do LsnTracker.subscribe_to_global_lsn_updates(ctx.stack_id) @@ -85,6 +87,14 @@ defmodule Electric.LsnTrackerTest do assert_receive {:global_last_seen_lsn, 42} end + test "replays the most recent broadcast to newly registered processes", ctx do + :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 42) + + assert {:ok, _} = LsnTracker.subscribe_to_global_lsn_updates(ctx.stack_id) + + assert_receive {:global_last_seen_lsn, 42} + end + test "delivers to multiple registered processes", ctx do test_pid = self() LsnTracker.subscribe_to_global_lsn_updates(ctx.stack_id) diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs index 86a2e00d6a..808f6984c6 100644 --- a/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler_test.exs @@ -334,6 +334,41 @@ defmodule Electric.Shapes.Consumer.EventHandlerTest do } = plan end + test "keeps the newest seen lsn when an older update arrives later" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert {:ok, %Buffering{} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + {:query_move_in_complete, [child_insert("99", "1")], lsn(20)} + ) + + assert views[["$sublink", "0"]] == MapSet.new([1]) + + assert %Plan{ + log_ops: [ + %LogOp.AppendControl{message: %{headers: %{event: "move-in"}}}, + %LogOp.AppendMoveInSnapshot{rows: [%Changes.NewRecord{record: %{"id" => "99"}}]} + ] + } = plan + end + test "defers queued move outs until after splice and starts the next move in" do handler = new_handler() dep_handle = dep_handle(handler) diff --git a/packages/sync-service/test/electric/shapes/consumer_test.exs b/packages/sync-service/test/electric/shapes/consumer_test.exs index 8d7a31fd18..a677ae06c7 100644 --- a/packages/sync-service/test/electric/shapes/consumer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer_test.exs @@ -2061,6 +2061,78 @@ defmodule Electric.Shapes.ConsumerTest do ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) end + test "consumer replays the latest broadcast when subscribing for a move-in", ctx do + parent = self() + + Repatch.patch( + Electric.Shapes.Consumer.Subqueries, + :query_move_in_async, + [mode: :shared], + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) + :ok + end + ) + + Support.TestUtils.activate_mocks_for_descendant_procs(Consumer) + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + {:ok, shape} = Electric.Shapes.fetch_shape_by_handle(ctx.stack_id, shape_handle) + [dep_handle] = shape.shape_dependencies_handles + + consumer_pid = Consumer.whereis(ctx.stack_id, shape_handle) + ref = Shapes.Consumer.register_for_changes(ctx.stack_id, shape_handle) + + assert :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 100) + + assert :ok = + Consumer.materializer_changes(consumer_pid, dep_handle, %{ + move_in: [{1, "1"}], + move_out: [] + }) + + assert_receive {:query_requested, ^consumer_pid} + + send(consumer_pid, {:pg_snapshot_known, {100, 300, []}}) + + send( + consumer_pid, + {:query_move_in_complete, + [ + %Electric.Shapes.Consumer.Subqueries.QueryRow{ + key: ~s'"public"."test_table"/"1"', + json: + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"}, + "headers" => %{ + "operation" => "insert", + "relation" => ["public", "test_table"] + } + }) + } + ], Lsn.from_integer(100)} + ) + + assert_receive {^ref, :new_changes, _offset}, @receive_timeout + + shape_storage = Storage.for_shape(shape_handle, ctx.storage) + + assert [ + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert"}, + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"} + }, + %{"headers" => %{"control" => "snapshot-end"}} + ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) + end + test "consumer startup seeds the stack-scoped subquery index", ctx do alias Electric.Shapes.Filter.SubqueryIndex