diff --git a/.changeset/green-ladybugs-deliver.md b/.changeset/green-ladybugs-deliver.md new file mode 100644 index 0000000000..809b6fe079 --- /dev/null +++ b/.changeset/green-ladybugs-deliver.md @@ -0,0 +1,8 @@ +--- +'@core/elixir-client': minor +'@core/sync-service': minor +--- + +Add Move-in/out support for subqueries combined using `AND`, `OR`, `NOT`, and other compound `WHERE` expressions. Previously these shapes would return `409` on a subquery move, forcing clients to discard the shape and resync it from scratch. The sync service now reconciles those changes in-stream. + +This release also changes the wire protocol. Older `@core/elixir-client` versions are not compatible with the sync service from this release. TanStack DB clients need `@tanstack/db >= 0.6.2` and `@tanstack/electric-db-collection >= 0.3.0`. diff --git a/packages/elixir-client/lib/electric/client/message.ex b/packages/elixir-client/lib/electric/client/message.ex index 1be05998aa..3d96a809cf 100644 --- a/packages/elixir-client/lib/electric/client/message.ex +++ b/packages/elixir-client/lib/electric/client/message.ex @@ -13,7 +13,8 @@ defmodule Electric.Client.Message do txids: [], op_position: 0, tags: [], - removed_tags: [] + removed_tags: [], + active_conditions: [] ] @type operation :: :insert | :update | :delete @@ -29,7 +30,8 @@ defmodule Electric.Client.Message do txids: txids(), op_position: non_neg_integer(), tags: [tag()], - removed_tags: [tag()] + removed_tags: [tag()], + active_conditions: [boolean()] } @doc false @@ -44,7 +46,8 @@ defmodule Electric.Client.Message do lsn: Map.get(msg, "lsn", nil), op_position: Map.get(msg, "op_position", 0), tags: Map.get(msg, "tags", []), - removed_tags: Map.get(msg, "removed_tags", []) + removed_tags: Map.get(msg, "removed_tags", []), + active_conditions: Map.get(msg, "active_conditions", []) } end @@ -187,14 +190,22 @@ defmodule Electric.Client.Message do @enforce_keys [:shape_handle, :offset, :schema] - defstruct [:shape_handle, :offset, :schema, tag_to_keys: %{}, key_data: %{}] + defstruct [ + :shape_handle, + :offset, + :schema, + tag_to_keys: %{}, + key_data: %{}, + disjunct_positions: nil + ] @type t :: %__MODULE__{ shape_handle: Client.shape_handle(), offset: Offset.t(), schema: Client.schema(), - tag_to_keys: %{String.t() => MapSet.t(String.t())}, - key_data: %{String.t() => %{tags: MapSet.t(String.t()), msg: ChangeMessage.t()}} + tag_to_keys: %{optional(term()) => MapSet.t(String.t())}, + key_data: %{optional(String.t()) => map()}, + disjunct_positions: [[non_neg_integer()]] | nil } end @@ -251,6 +262,57 @@ defmodule Electric.Client.Message do end end + defmodule MoveInMessage do + @moduledoc """ + Represents a move-in event from the server. + + Move-in events are sent when the server's subquery filter has changed and + rows may now be included in the shape. The `patterns` field contains position + and hash information that the client uses to update `active_conditions` on + tracked rows. + """ + + defstruct [:patterns, :handle, :request_timestamp] + + @type pattern :: %{pos: non_neg_integer(), value: String.t()} + @type t :: %__MODULE__{ + patterns: [pattern()], + handle: Client.shape_handle(), + request_timestamp: DateTime.t() + } + + def from_message( + %{"headers" => %{"event" => "move-in", "patterns" => patterns}}, + handle, + request_timestamp + ) do + %__MODULE__{ + patterns: normalize_patterns(patterns), + handle: handle, + request_timestamp: request_timestamp + } + end + + def from_message( + %{headers: %{event: "move-in", patterns: patterns}}, + handle, + request_timestamp + ) do + %__MODULE__{ + patterns: normalize_patterns(patterns), + handle: handle, + request_timestamp: request_timestamp + } + end + + defp normalize_patterns(patterns) do + Enum.map(patterns, fn + %{"pos" => pos, "value" => value} -> %{pos: pos, value: value} + %{pos: _, value: _} = pattern -> pattern + end) + end + end + defguard is_insert(msg) when is_struct(msg, ChangeMessage) and msg.headers.operation == :insert def parse(%{"value" => _} = msg, shape_handle, value_mapper_fun, request_timestamp) do @@ -288,6 +350,24 @@ defmodule Electric.Client.Message do [MoveOutMessage.from_message(msg, shape_handle, request_timestamp)] end + def parse( + %{"headers" => %{"event" => "move-in"}} = msg, + shape_handle, + _value_mapper_fun, + request_timestamp + ) do + [MoveInMessage.from_message(msg, shape_handle, request_timestamp)] + end + + def parse( + %{headers: %{event: "move-in"}} = msg, + shape_handle, + _value_mapper_fun, + request_timestamp + ) do + [MoveInMessage.from_message(msg, shape_handle, request_timestamp)] + end + def parse("", _handle, _value_mapper_fun, _request_timestamp) do [] end diff --git a/packages/elixir-client/lib/electric/client/poll.ex b/packages/elixir-client/lib/electric/client/poll.ex index 5051facaba..59b3751c9e 100644 --- a/packages/elixir-client/lib/electric/client/poll.ex +++ b/packages/elixir-client/lib/electric/client/poll.ex @@ -234,10 +234,21 @@ defmodule Electric.Client.Poll do end defp handle_message(%Message.ChangeMessage{} = msg, state) do - {tag_to_keys, key_data} = - TagTracker.update_tag_index(state.tag_to_keys, state.key_data, msg) + {tag_to_keys, key_data, disjunct_positions} = + TagTracker.update_tag_index( + state.tag_to_keys, + state.key_data, + state.disjunct_positions, + msg + ) - {:message, msg, %{state | tag_to_keys: tag_to_keys, key_data: key_data}} + {:message, msg, + %{ + state + | tag_to_keys: tag_to_keys, + key_data: key_data, + disjunct_positions: disjunct_positions + }} end defp handle_message( @@ -248,6 +259,7 @@ defmodule Electric.Client.Poll do TagTracker.generate_synthetic_deletes( state.tag_to_keys, state.key_data, + state.disjunct_positions, patterns, request_timestamp ) @@ -255,6 +267,20 @@ defmodule Electric.Client.Poll do {:messages, synthetic_deletes, %{state | tag_to_keys: tag_to_keys, key_data: key_data}} end + defp handle_message( + %Message.MoveInMessage{patterns: patterns}, + state + ) do + {tag_to_keys, key_data} = + TagTracker.handle_move_in( + state.tag_to_keys, + state.key_data, + patterns + ) + + {:skip, %{state | tag_to_keys: tag_to_keys, key_data: key_data}} + end + defp handle_schema(%Fetch.Response{schema: schema}, client, %{value_mapper_fun: nil} = state) when is_map(schema) do {parser_module, parser_opts} = client.parser diff --git a/packages/elixir-client/lib/electric/client/shape_state.ex b/packages/elixir-client/lib/electric/client/shape_state.ex index 1f7f457bf9..f76961291a 100644 --- a/packages/elixir-client/lib/electric/client/shape_state.ex +++ b/packages/elixir-client/lib/electric/client/shape_state.ex @@ -46,6 +46,7 @@ defmodule Electric.Client.ShapeState do tag_to_keys: %{}, key_data: %{}, stale_cache_retry_count: 0, + disjunct_positions: nil, recent_requests: [], fast_loop_consecutive_count: 0 ] @@ -59,6 +60,7 @@ defmodule Electric.Client.ShapeState do up_to_date?: boolean(), tag_to_keys: %{optional(term()) => MapSet.t()}, key_data: %{optional(term()) => %{tags: MapSet.t(), msg: term()}}, + disjunct_positions: [[non_neg_integer()]] | nil, stale_cache_buster: String.t() | nil, stale_cache_retry_count: non_neg_integer(), recent_requests: [{integer(), Offset.t()}], @@ -95,7 +97,8 @@ defmodule Electric.Client.ShapeState do schema: resume.schema, up_to_date?: true, tag_to_keys: Map.get(resume, :tag_to_keys, %{}), - key_data: Map.get(resume, :key_data, %{}) + key_data: Map.get(resume, :key_data, %{}), + disjunct_positions: Map.get(resume, :disjunct_positions) } end @@ -116,7 +119,8 @@ defmodule Electric.Client.ShapeState do tag_to_keys: %{}, key_data: %{}, recent_requests: [], - fast_loop_consecutive_count: 0 + fast_loop_consecutive_count: 0, + disjunct_positions: nil } end @@ -130,7 +134,8 @@ defmodule Electric.Client.ShapeState do offset: state.offset, schema: state.schema, tag_to_keys: state.tag_to_keys, - key_data: state.key_data + key_data: state.key_data, + disjunct_positions: state.disjunct_positions } end diff --git a/packages/elixir-client/lib/electric/client/tag_tracker.ex b/packages/elixir-client/lib/electric/client/tag_tracker.ex index 56f0efddf7..5353a28cf3 100644 --- a/packages/elixir-client/lib/electric/client/tag_tracker.ex +++ b/packages/elixir-client/lib/electric/client/tag_tracker.ex @@ -8,81 +8,128 @@ defmodule Electric.Client.TagTracker do ## Data Structures - Two maps are maintained: - - `tag_to_keys`: `%{tag_value => MapSet}` - which keys have each tag - - `key_data`: `%{key => %{tags: MapSet, msg: msg}}` - each key's current tags and latest message + Three structures are maintained: + - `tag_to_keys`: `%{{position, hash} => MapSet}` - which keys have each position-hash pair + - `key_data`: `%{key => %{tags: MapSet<{pos, hash}>, active_conditions: [boolean()] | nil, msg: msg}}` - each key's current state + - `disjunct_positions`: `[[integer()]] | nil` - shared across all keys, derived once from the first tagged message - This allows: - 1. Avoiding duplicate entries when a row is updated (we update the msg, not add a new entry) - 2. Checking if a row still has other tags before generating a synthetic delete + Tags arrive as slash-delimited strings per disjunct (e.g., `"hash1/hash2/"`, `"//hash3"`). + They are normalized into 2D arrays and indexed by `{position, hash_value}` tuples. + + For shapes with `active_conditions`, visibility is evaluated using DNF (Disjunctive Normal Form): + a row is visible if at least one disjunct is satisfied (OR of ANDs over positions). """ alias Electric.Client.Message.ChangeMessage alias Electric.Client.Message.Headers - @type tag :: String.t() + @type position_hash :: {non_neg_integer(), String.t()} @type key :: String.t() - @type tag_to_keys :: %{optional(tag()) => MapSet.t(key())} - @type key_data :: %{optional(key()) => %{tags: MapSet.t(tag()), msg: ChangeMessage.t()}} + @type tag_to_keys :: %{optional(position_hash()) => MapSet.t(key())} + @type key_data :: %{ + optional(key()) => %{ + tags: MapSet.t(position_hash()), + active_conditions: [boolean()] | nil, + msg: ChangeMessage.t() + } + } + @type disjunct_positions :: [[non_neg_integer()]] | nil @doc """ Update the tag index when a change message is received. - Returns `{updated_tag_to_keys, updated_key_data}`. + Tags are normalized from slash-delimited wire format to position-indexed entries. + `disjunct_positions` is derived once from the first tagged message and reused for all + subsequent messages, since it is determined by the shape's WHERE clause structure. + + Returns `{updated_tag_to_keys, updated_key_data, disjunct_positions}`. """ - @spec update_tag_index(tag_to_keys(), key_data(), ChangeMessage.t()) :: - {tag_to_keys(), key_data()} - def update_tag_index(tag_to_keys, key_data, %ChangeMessage{headers: headers, key: key} = msg) do - new_tags = headers.tags || [] - removed_tags = headers.removed_tags || [] + @spec update_tag_index(tag_to_keys(), key_data(), disjunct_positions(), ChangeMessage.t()) :: + {tag_to_keys(), key_data(), disjunct_positions()} + def update_tag_index( + tag_to_keys, + key_data, + disjunct_positions, + %ChangeMessage{headers: headers, key: key} = msg + ) do + raw_new_tags = headers.tags || [] + raw_removed_tags = headers.removed_tags || [] + + active_conditions = + case headers.active_conditions do + [] -> nil + nil -> nil + ac -> ac + end + + # Normalize tags to 2D arrays + normalized_new = normalize_tags(raw_new_tags) + normalized_removed = normalize_tags(raw_removed_tags) + + # Extract position-hash entries + new_entries = extract_position_entries(normalized_new) + removed_entries = extract_position_entries(normalized_removed) # Get current data for this key current_data = Map.get(key_data, key) - current_tags = if current_data, do: current_data.tags, else: MapSet.new() - - # Calculate the new set of tags for this key - updated_tags = - current_tags - |> MapSet.difference(MapSet.new(removed_tags)) - |> MapSet.union(MapSet.new(new_tags)) + current_entries = if current_data, do: current_data.tags, else: MapSet.new() + + # Calculate updated entries + updated_entries = + current_entries + |> MapSet.difference(removed_entries) + |> MapSet.union(new_entries) + + # Derive disjunct positions once from the first tagged message + disjunct_positions = + case disjunct_positions do + nil -> + case derive_disjunct_positions(normalized_new) do + [] -> nil + positions -> positions + end + + already_set -> + already_set + end - # For deletes, remove the key entirely case headers.operation do :delete -> - # Remove key from all its tags in tag_to_keys + # Remove key from all its entries in tag_to_keys updated_tag_to_keys = - Enum.reduce(updated_tags, tag_to_keys, fn tag, acc -> - remove_key_from_tag(acc, tag, key) + Enum.reduce(updated_entries, tag_to_keys, fn entry, acc -> + remove_key_from_tag(acc, entry, key) end) - # Remove key from key_data - {updated_tag_to_keys, Map.delete(key_data, key)} + {updated_tag_to_keys, Map.delete(key_data, key), disjunct_positions} _ -> - # If no tags (current or new), don't track this key - if MapSet.size(updated_tags) == 0 do - # Remove key from all its previous tags in tag_to_keys + if MapSet.size(updated_entries) == 0 do + # No entries - remove key from tracking updated_tag_to_keys = - Enum.reduce(current_tags, tag_to_keys, fn tag, acc -> - remove_key_from_tag(acc, tag, key) + Enum.reduce(current_entries, tag_to_keys, fn entry, acc -> + remove_key_from_tag(acc, entry, key) end) - # Remove key from key_data - {updated_tag_to_keys, Map.delete(key_data, key)} + {updated_tag_to_keys, Map.delete(key_data, key), disjunct_positions} else - # Update tag_to_keys: remove from old tags, add to new tags - tags_to_remove = MapSet.difference(current_tags, updated_tags) - tags_to_add = MapSet.difference(updated_tags, current_tags) + # Update tag_to_keys: remove old entries, add new entries + entries_to_remove = MapSet.difference(current_entries, updated_entries) + entries_to_add = MapSet.difference(updated_entries, current_entries) updated_tag_to_keys = tag_to_keys - |> remove_key_from_tags(tags_to_remove, key) - |> add_key_to_tags(tags_to_add, key) + |> remove_key_from_tags(entries_to_remove, key) + |> add_key_to_tags(entries_to_add, key) - # Update key_data with new tags and latest message - updated_key_data = Map.put(key_data, key, %{tags: updated_tags, msg: msg}) + updated_key_data = + Map.put(key_data, key, %{ + tags: updated_entries, + active_conditions: active_conditions, + msg: msg + }) - {updated_tag_to_keys, updated_key_data} + {updated_tag_to_keys, updated_key_data, disjunct_positions} end end end @@ -90,50 +137,90 @@ defmodule Electric.Client.TagTracker do @doc """ Generate synthetic delete messages for keys matching move-out patterns. + Patterns contain `%{pos: position, value: hash}` maps. For keys with + `active_conditions`, positions are deactivated and visibility is re-evaluated + using DNF with the shared `disjunct_positions`. For keys without + `active_conditions`, the old behavior applies: delete when no entries remain. + Returns `{synthetic_deletes, updated_tag_to_keys, updated_key_data}`. """ - @spec generate_synthetic_deletes(tag_to_keys(), key_data(), [map()], DateTime.t()) :: + @spec generate_synthetic_deletes( + tag_to_keys(), + key_data(), + disjunct_positions(), + [map()], + DateTime.t() + ) :: {[ChangeMessage.t()], tag_to_keys(), key_data()} - def generate_synthetic_deletes(tag_to_keys, key_data, patterns, request_timestamp) do - # Assumption: move-out patterns only include simple tag values; positional matching - # for composite tags is not needed with the current server behavior. - - # First pass: collect all keys that match any pattern and remove those tags - {matched_keys_with_tags, updated_tag_to_keys} = - Enum.reduce(patterns, {%{}, tag_to_keys}, fn %{value: tag_value}, {keys_acc, ttk_acc} -> - case Map.pop(ttk_acc, tag_value) do - {nil, ttk_acc} -> - {keys_acc, ttk_acc} - - {keys_in_tag, ttk_acc} -> - # Track which tags were removed for each key - updated_keys_acc = - Enum.reduce(keys_in_tag, keys_acc, fn key, acc -> - removed_tags = Map.get(acc, key, MapSet.new()) - Map.put(acc, key, MapSet.put(removed_tags, tag_value)) - end) - - {updated_keys_acc, ttk_acc} + def generate_synthetic_deletes( + tag_to_keys, + key_data, + disjunct_positions, + patterns, + request_timestamp + ) do + # First pass: collect all keys that match any pattern (without modifying tag_to_keys) + matched_keys_with_entries = + Enum.reduce(patterns, %{}, fn %{pos: pos, value: value}, keys_acc -> + tag_key = {pos, value} + + case Map.get(tag_to_keys, tag_key) do + nil -> + keys_acc + + keys_in_tag -> + Enum.reduce(keys_in_tag, keys_acc, fn key, acc -> + Map.update(acc, key, MapSet.new([tag_key]), &MapSet.put(&1, tag_key)) + end) end end) - # Second pass: for each matched key, update its tags and check if it should be deleted - {keys_to_delete, updated_key_data} = - Enum.reduce(matched_keys_with_tags, {[], key_data}, fn {key, removed_tags}, - {deletes, kd_acc} -> + # Second pass: evaluate visibility, update key_data and tag_to_keys together + {keys_to_delete, updated_key_data, updated_tag_to_keys} = + Enum.reduce(matched_keys_with_entries, {[], key_data, tag_to_keys}, fn {key, + removed_entries}, + {deletes, kd_acc, + ttk_acc} -> case Map.get(kd_acc, key) do nil -> - {deletes, kd_acc} - - %{tags: current_tags, msg: msg} -> - remaining_tags = MapSet.difference(current_tags, removed_tags) - - if MapSet.size(remaining_tags) == 0 do - # No remaining tags - key should be deleted - {[{key, msg} | deletes], Map.delete(kd_acc, key)} - else - # Still has other tags - update key_data but don't delete - {deletes, Map.put(kd_acc, key, %{tags: remaining_tags, msg: msg})} + {deletes, kd_acc, ttk_acc} + + %{tags: current_entries, msg: msg} = data -> + case data.active_conditions do + nil -> + # Legacy/simple shapes without active_conditions still use + # empty-tag-set deletion semantics rather than DNF visibility. + remaining_entries = MapSet.difference(current_entries, removed_entries) + ttk_acc = remove_key_from_tags(ttk_acc, removed_entries, key) + + if MapSet.size(remaining_entries) == 0 do + {[{key, msg} | deletes], Map.delete(kd_acc, key), ttk_acc} + else + updated_data = %{data | tags: remaining_entries} + {deletes, Map.put(kd_acc, key, updated_data), ttk_acc} + end + + active_conditions -> + deactivated_positions = + MapSet.new(removed_entries, fn {pos, _} -> pos end) + + updated_ac = + active_conditions + |> Enum.with_index() + |> Enum.map(fn {val, idx} -> + if MapSet.member?(deactivated_positions, idx), do: false, else: val + end) + + visible = row_visible?(updated_ac, disjunct_positions) + + if not visible do + ttk_acc = remove_key_from_tags(ttk_acc, current_entries, key) + + {[{key, msg} | deletes], Map.delete(kd_acc, key), ttk_acc} + else + updated_data = %{data | active_conditions: updated_ac} + {deletes, Map.put(kd_acc, key, updated_data), ttk_acc} + end end end end) @@ -157,7 +244,118 @@ defmodule Electric.Client.TagTracker do {synthetic_deletes, updated_tag_to_keys, updated_key_data} end - # Private helpers + @doc """ + Evaluate DNF visibility from active_conditions and disjunct structure. + + A row is visible if at least one disjunct is satisfied. + A disjunct is satisfied when all its positions have `active_conditions[pos] == true`. + """ + @spec row_visible?([boolean()], [[non_neg_integer()]]) :: boolean() + def row_visible?(active_conditions, disjunct_positions) do + Enum.any?(disjunct_positions, fn positions -> + Enum.all?(positions, fn pos -> + Enum.at(active_conditions, pos, false) == true + end) + end) + end + + @doc """ + Activate positions for keys matching move-in patterns. + + Sets `active_conditions[pos]` to `true` for keys that have + matching `{pos, value}` entries in the tag index. + + Returns `{updated_tag_to_keys, updated_key_data}`. + """ + @spec handle_move_in(tag_to_keys(), key_data(), [map()]) :: + {tag_to_keys(), key_data()} + def handle_move_in(tag_to_keys, key_data, patterns) do + updated_key_data = + Enum.reduce(patterns, key_data, fn %{pos: pos, value: value}, kd_acc -> + tag_key = {pos, value} + + case Map.get(tag_to_keys, tag_key) do + nil -> + kd_acc + + keys -> + Enum.reduce(keys, kd_acc, fn key, acc -> + case Map.get(acc, key) do + %{active_conditions: ac} = data when ac != nil -> + updated_ac = List.replace_at(ac, pos, true) + Map.put(acc, key, %{data | active_conditions: updated_ac}) + + _ -> + acc + end + end) + end + end) + + {tag_to_keys, updated_key_data} + end + + @doc """ + Normalize slash-delimited wire format tags to 2D arrays. + + Each tag string represents a disjunct with "/" separating position hashes. + Empty strings are replaced with nil (position not relevant to this disjunct). + + ## Examples + + iex> Electric.Client.TagTracker.normalize_tags(["hash_a/hash_b"]) + [["hash_a", "hash_b"]] + + iex> Electric.Client.TagTracker.normalize_tags(["hash_a/", "/hash_b"]) + [["hash_a", nil], [nil, "hash_b"]] + + iex> Electric.Client.TagTracker.normalize_tags(["tag_a"]) + [["tag_a"]] + """ + @spec normalize_tags([String.t()]) :: [[String.t() | nil]] + def normalize_tags([]), do: [] + + def normalize_tags(tags) do + Enum.map(tags, fn tag -> + tag + |> String.split("/") + |> Enum.map(fn + "" -> nil + hash -> hash + end) + end) + end + + # --- Private helpers --- + + # Extract {position, hash} entries from normalized 2D tags. + defp extract_position_entries(normalized_tags) do + normalized_tags + |> Enum.flat_map(fn disjunct -> + disjunct + |> Enum.with_index() + |> Enum.flat_map(fn + {nil, _pos} -> [] + {hash, pos} -> [{pos, hash}] + end) + end) + |> MapSet.new() + end + + # Derive disjunct positions from normalized tags. + # Each disjunct lists the positions that are non-nil. + defp derive_disjunct_positions([]), do: [] + + defp derive_disjunct_positions(normalized_tags) do + Enum.map(normalized_tags, fn disjunct -> + disjunct + |> Enum.with_index() + |> Enum.flat_map(fn + {nil, _pos} -> [] + {_hash, pos} -> [pos] + end) + end) + end defp remove_key_from_tags(tag_to_keys, tags, key) do Enum.reduce(tags, tag_to_keys, fn tag, acc -> diff --git a/packages/elixir-client/test/electric/client/tag_tracker_test.exs b/packages/elixir-client/test/electric/client/tag_tracker_test.exs index 18e41f2071..ab1898b286 100644 --- a/packages/elixir-client/test/electric/client/tag_tracker_test.exs +++ b/packages/elixir-client/test/electric/client/tag_tracker_test.exs @@ -8,6 +8,7 @@ defmodule Electric.Client.TagTrackerTest do defp make_change_msg(key, operation, opts) do tags = Keyword.get(opts, :tags, []) removed_tags = Keyword.get(opts, :removed_tags, []) + active_conditions = Keyword.get(opts, :active_conditions, []) value = Keyword.get(opts, :value, %{"id" => key}) %ChangeMessage{ @@ -19,66 +20,67 @@ defmodule Electric.Client.TagTrackerTest do relation: ["public", "test"], handle: "test-handle", tags: tags, - removed_tags: removed_tags + removed_tags: removed_tags, + active_conditions: active_conditions }, request_timestamp: DateTime.utc_now() } end - describe "update_tag_index/3" do + describe "update_tag_index/4" do test "tracks new tags for inserts" do msg = make_change_msg("key1", :insert, tags: ["tag_a", "tag_b"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) assert tag_to_keys == %{ - "tag_a" => MapSet.new(["key1"]), - "tag_b" => MapSet.new(["key1"]) + {0, "tag_a"} => MapSet.new(["key1"]), + {0, "tag_b"} => MapSet.new(["key1"]) } assert Map.has_key?(key_data, "key1") - assert key_data["key1"].tags == MapSet.new(["tag_a", "tag_b"]) + assert key_data["key1"].tags == MapSet.new([{0, "tag_a"}, {0, "tag_b"}]) end test "updates tags for updates" do # Initial insert with tag_a msg1 = make_change_msg("key1", :insert, tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) # Update adds tag_b msg2 = make_change_msg("key1", :update, tags: ["tag_b"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{ - "tag_a" => MapSet.new(["key1"]), - "tag_b" => MapSet.new(["key1"]) + {0, "tag_a"} => MapSet.new(["key1"]), + {0, "tag_b"} => MapSet.new(["key1"]) } - assert key_data["key1"].tags == MapSet.new(["tag_a", "tag_b"]) + assert key_data["key1"].tags == MapSet.new([{0, "tag_a"}, {0, "tag_b"}]) end test "removes tags when removed_tags specified" do # Initial insert with tag_a and tag_b msg1 = make_change_msg("key1", :insert, tags: ["tag_a", "tag_b"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) # Update removes tag_a msg2 = make_change_msg("key1", :update, removed_tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{ - "tag_b" => MapSet.new(["key1"]) + {0, "tag_b"} => MapSet.new(["key1"]) } - assert key_data["key1"].tags == MapSet.new(["tag_b"]) + assert key_data["key1"].tags == MapSet.new([{0, "tag_b"}]) end test "removes key from tracking on delete" do msg1 = make_change_msg("key1", :insert, tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) msg2 = make_change_msg("key1", :delete, tags: []) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{} assert key_data == %{} @@ -86,7 +88,7 @@ defmodule Electric.Client.TagTrackerTest do test "handles messages without tags" do msg = make_change_msg("key1", :insert, tags: []) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) assert tag_to_keys == %{} assert key_data == %{} @@ -96,11 +98,11 @@ defmodule Electric.Client.TagTrackerTest do msg1 = make_change_msg("key1", :insert, tags: ["shared_tag"]) msg2 = make_change_msg("key2", :insert, tags: ["shared_tag"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + {tag_to_keys, key_data, _dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) assert tag_to_keys == %{ - "shared_tag" => MapSet.new(["key1", "key2"]) + {0, "shared_tag"} => MapSet.new(["key1", "key2"]) } assert Map.has_key?(key_data, "key1") @@ -108,21 +110,30 @@ defmodule Electric.Client.TagTrackerTest do end end - describe "generate_synthetic_deletes/4" do + describe "generate_synthetic_deletes/5" do test "generates deletes for keys matching pattern" do - # Set up: two keys with tag_a - msg1 = make_change_msg("key1", :insert, tags: ["tag_a"], value: %{"id" => "1"}) - msg2 = make_change_msg("key2", :insert, tags: ["tag_a"], value: %{"id" => "2"}) + msg1 = + make_change_msg("key1", :insert, + tags: ["tag_a"], + active_conditions: [true], + value: %{"id" => "1"} + ) + + msg2 = + make_change_msg("key2", :insert, + tags: ["tag_a"], + active_conditions: [true], + value: %{"id" => "2"} + ) + + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) - - # Move-out for tag_a patterns = [%{pos: 0, value: "tag_a"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) assert length(deletes) == 2 @@ -138,38 +149,46 @@ defmodule Electric.Client.TagTrackerTest do assert new_key_data == %{} end - test "does not delete keys with remaining tags" do - # Set up: key1 has tag_a and tag_b - msg = make_change_msg("key1", :insert, tags: ["tag_a", "tag_b"], value: %{"id" => "1"}) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + test "does not delete keys still visible via another disjunct" do + # key1 has two disjuncts: pos 0 and pos 1 + msg = + make_change_msg("key1", :insert, + tags: ["tag_a/", "/tag_b"], + active_conditions: [true, true], + value: %{"id" => "1"} + ) + + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) - # Move-out only for tag_a + # Move-out only for pos 0 patterns = [%{pos: 0, value: "tag_a"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) - # No synthetic deletes - key1 still has tag_b assert deletes == [] + assert new_key_data["key1"].active_conditions == [false, true] - # tag_a removed, tag_b remains - assert new_tag_to_keys == %{ - "tag_b" => MapSet.new(["key1"]) - } - - assert new_key_data["key1"].tags == MapSet.new(["tag_b"]) + # tag_to_keys entries preserved for move-in broadcasts + assert Map.has_key?(new_tag_to_keys, {0, "tag_a"}) + assert Map.has_key?(new_tag_to_keys, {1, "tag_b"}) end test "handles non-existent tag pattern" do - msg = make_change_msg("key1", :insert, tags: ["tag_a"]) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg) + msg = + make_change_msg("key1", :insert, + tags: ["tag_a"], + active_conditions: [true] + ) + + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) patterns = [%{pos: 0, value: "nonexistent_tag"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) assert deletes == [] assert new_tag_to_keys == tag_to_keys @@ -177,21 +196,463 @@ defmodule Electric.Client.TagTrackerTest do end test "handles multiple patterns in one call" do - msg1 = make_change_msg("key1", :insert, tags: ["tag_a"]) - msg2 = make_change_msg("key2", :insert, tags: ["tag_b"]) + msg1 = + make_change_msg("key1", :insert, + tags: ["tag_a"], + active_conditions: [true] + ) - {tag_to_keys, key_data} = TagTracker.update_tag_index(%{}, %{}, msg1) - {tag_to_keys, key_data} = TagTracker.update_tag_index(tag_to_keys, key_data, msg2) + msg2 = + make_change_msg("key2", :insert, + tags: ["tag_b"], + active_conditions: [true] + ) + + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(tag_to_keys, key_data, dp, msg2) patterns = [%{pos: 0, value: "tag_a"}, %{pos: 0, value: "tag_b"}] timestamp = DateTime.utc_now() {deletes, new_tag_to_keys, new_key_data} = - TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, patterns, timestamp) + TagTracker.generate_synthetic_deletes(tag_to_keys, key_data, dp, patterns, timestamp) assert length(deletes) == 2 assert new_tag_to_keys == %{} assert new_key_data == %{} end + + test "falls back to empty-tag-set deletion when active_conditions are missing" do + msg = + make_change_msg("key1", :insert, + tags: ["tag_a", "tag_b"], + value: %{"id" => "1"} + ) + + {tag_to_keys, key_data, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + assert key_data["key1"].active_conditions == nil + + {deletes, tag_to_keys, key_data} = + TagTracker.generate_synthetic_deletes( + tag_to_keys, + key_data, + dp, + [%{pos: 0, value: "tag_a"}], + DateTime.utc_now() + ) + + assert deletes == [] + assert key_data["key1"].active_conditions == nil + assert key_data["key1"].tags == MapSet.new([{0, "tag_b"}]) + assert tag_to_keys == %{{0, "tag_b"} => MapSet.new(["key1"])} + + {deletes, tag_to_keys, key_data} = + TagTracker.generate_synthetic_deletes( + tag_to_keys, + key_data, + dp, + [%{pos: 0, value: "tag_b"}], + DateTime.utc_now() + ) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + assert tag_to_keys == %{} + assert key_data == %{} + end + end + + describe "normalize_tags/1" do + test "normalizes slash-delimited tags to 2D structure" do + assert TagTracker.normalize_tags(["hash1/hash2/", "//hash3"]) == + [["hash1", "hash2", nil], [nil, nil, "hash3"]] + + assert TagTracker.normalize_tags(["tag_a"]) == [["tag_a"]] + assert TagTracker.normalize_tags([]) == [] + end + + test "single-position tags normalize to single-element lists" do + assert TagTracker.normalize_tags(["hash_a", "hash_b"]) == + [["hash_a"], ["hash_b"]] + end + + test "multi-position tags with mixed nils" do + assert TagTracker.normalize_tags(["hash_a/", "/hash_b"]) == + [["hash_a", nil], [nil, "hash_b"]] + end + end + + describe "tag_tracker with DNF wire format" do + test "removed_tags in slash-delimited format are correctly filtered" do + msg1 = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + + assert ttk == %{ + {0, "hash_a"} => MapSet.new(["key1"]), + {1, "hash_b"} => MapSet.new(["key1"]) + } + + # Remove hash_a via slash-delimited removed_tags, add new hash at pos 0 + msg2 = + make_change_msg("key1", :update, + tags: ["hash_c/hash_b"], + removed_tags: ["hash_a/"], + active_conditions: [true, true] + ) + + {ttk, _kd, _dp} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + + assert ttk == %{ + {0, "hash_c"} => MapSet.new(["key1"]), + {1, "hash_b"} => MapSet.new(["key1"]) + } + end + + test "row_visible? evaluates DNF correctly" do + # Disjunct 0 needs positions [0, 1], disjunct 1 needs positions [2] + disjunct_positions = [[0, 1], [2]] + + # All active + assert TagTracker.row_visible?([true, true, true], disjunct_positions) + + # Only disjunct 0 satisfied + assert TagTracker.row_visible?([true, true, false], disjunct_positions) + + # Only disjunct 1 satisfied + assert TagTracker.row_visible?([false, false, true], disjunct_positions) + + # No disjunct satisfied (pos 0 false means disjunct 0 fails, pos 2 false means disjunct 1 fails) + refute TagTracker.row_visible?([false, true, false], disjunct_positions) + refute TagTracker.row_visible?([false, false, false], disjunct_positions) + end + + test "generate_synthetic_deletes only deletes when all disjuncts unsatisfied" do + # Key1 has two disjuncts: disjunct 0 uses pos 0, disjunct 1 uses pos 1 + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Move-out at position 0 - disjunct 1 still satisfied + patterns = [%{pos: 0, value: "hash_a"}] + timestamp = DateTime.utc_now() + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + # Still visible via disjunct 1 + assert deletes == [] + assert kd["key1"].active_conditions == [false, true] + + # Move-out at position 1 - no disjunct satisfied + patterns = [%{pos: 1, value: "hash_b"}] + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + end + + test "handle_move_in activates correct positions" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, false] + ) + + {ttk, kd, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Position 1 is inactive + refute Enum.at(kd["key1"].active_conditions, 1) + + # Move-in activates position 1 + patterns = [%{pos: 1, value: "hash_b"}] + {_ttk, kd} = TagTracker.handle_move_in(ttk, kd, patterns) + + assert kd["key1"].active_conditions == [true, true] + end + + test "position-based tag_to_keys index for multi-disjunct shapes" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b", "hash_c/hash_d"], + active_conditions: [true, true] + ) + + {ttk, _kd, _dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + assert Map.has_key?(ttk, {0, "hash_a"}) + assert Map.has_key?(ttk, {1, "hash_b"}) + assert Map.has_key?(ttk, {0, "hash_c"}) + assert Map.has_key?(ttk, {1, "hash_d"}) + end + + test "active_conditions stored from headers and disjunct_positions derived once" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, false] + ) + + {_ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + assert kd["key1"].active_conditions == [true, false] + assert dp == [[0, 1]] + end + + test "orphaned tag_to_keys entries after delete do not cause phantom deletes" do + # Shape: (A AND C) OR (B AND C) → disjuncts [[0,1], [2,3]] + # Row "r" has all 4 positions active with hash "X" + msg = + make_change_msg("r", :insert, + tags: ["X/X//", "//X/X"], + active_conditions: [true, true, true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Deactivate positions 1 and 3 (dep C moves out with hash "X") + # Both disjuncts lose their C position → row invisible → deleted from key_data + patterns = [%{pos: 1, value: "X"}, %{pos: 3, value: "X"}] + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert length(deletes) == 1 + assert hd(deletes).key == "r" + refute Map.has_key?(kd, "r") + + # Bug: {0, "X"} and {2, "X"} are still in tag_to_keys as orphans + # pointing to the deleted key "r" + + # Re-insert row "r" with NEW hash "Y" at all positions (move-in) + msg = + make_change_msg("r", :insert, + tags: ["Y/Y//", "//Y/Y"], + active_conditions: [true, true, true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(ttk, kd, dp, msg) + + # Deactivate position 0 with STALE hash "X" — should have NO effect + # since the row's current hash at pos 0 is "Y", not "X" + patterns = [%{pos: 0, value: "X"}] + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert deletes == [] + # Without fix: active_conditions would be corrupted to [false, true, true, true] + assert kd["r"].active_conditions == [true, true, true, true] + + # Now a legitimate deactivation at position 2 with current hash "Y" + patterns = [%{pos: 2, value: "Y"}] + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + # Disjunct 0 ([0,1]) is still fully active → row should remain visible + # Without fix: the corrupted pos 0 causes both disjuncts to fail → phantom delete + assert deletes == [] + end + + test "disjunct structure derived correctly from slash-delimited tags" do + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + ) + + {_ttk, _kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Disjunct 0 uses position 0, disjunct 1 uses position 1 + assert dp == [[0], [1]] + end + + test "multi-disjunct: row stays when one disjunct lost, deleted when all lost" do + # Tags: ["hash_a/hash_b/", "//hash_c"] + # Disjunct 0 covers positions [0, 1], disjunct 1 covers position [2] + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b/", "//hash_c"], + active_conditions: [true, true, true], + value: %{"id" => "1", "name" => "User 1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + assert dp == [[0, 1], [2]] + + # Move-out at position 0 → disjunct 0 fails (needs [0,1]), disjunct 1 (pos 2) still satisfied + patterns = [%{pos: 0, value: "hash_a"}] + timestamp = DateTime.utc_now() + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert deletes == [] + assert kd["key1"].active_conditions == [false, true, true] + + # Move-out at position 2 → disjunct 1 also fails, no disjunct satisfied + patterns = [%{pos: 2, value: "hash_c"}] + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + end + + test "overwrite active_conditions when row is re-sent (move-in overwrite)" do + # Insert row with active_conditions [true, false] + msg1 = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, false], + value: %{"id" => "1", "name" => "User 1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + assert kd["key1"].active_conditions == [true, false] + + # Server re-sends the same row with updated active_conditions + msg2 = + make_change_msg("key1", :update, + tags: ["hash_a/hash_b"], + active_conditions: [true, true], + value: %{"id" => "1", "name" => "User 1 updated"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + assert kd["key1"].active_conditions == [true, true] + + # Verify the overwritten active_conditions work correctly: + # With single disjunct [0,1], move-out at pos 0 should make row invisible + patterns = [%{pos: 0, value: "hash_a"}] + timestamp = DateTime.utc_now() + + {deletes, _ttk, _kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, timestamp) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + end + + test "move-out preserves tag_to_keys so move-in can re-activate" do + # Row with two disjuncts: pos 0 and pos 1 + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true], + value: %{"id" => "1", "name" => "User 1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Move-out at pos 0 — row stays visible via disjunct 1 + patterns = [%{pos: 0, value: "hash_a"}] + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert deletes == [] + assert kd["key1"].active_conditions == [false, true] + + # Move-in at pos 0 — should find key1 via preserved tag_to_keys entry + patterns = [%{pos: 0, value: "hash_a"}] + {ttk, kd} = TagTracker.handle_move_in(ttk, kd, patterns) + + assert kd["key1"].active_conditions == [true, true] + + # Now both disjuncts active again; move-out at pos 1 alone should not delete + patterns = [%{pos: 1, value: "hash_b"}] + + {deletes, _ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert deletes == [] + assert kd["key1"].active_conditions == [true, false] + end + + test "deleted row cleans up all tag_to_keys entries" do + # Row with entries at pos 0 and pos 1 in a single disjunct + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, true], + value: %{"id" => "1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + assert Map.has_key?(ttk, {0, "hash_a"}) + assert Map.has_key?(ttk, {1, "hash_b"}) + + # Move-out at pos 0 — single disjunct [0,1] fails → row deleted + patterns = [%{pos: 0, value: "hash_a"}] + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert length(deletes) == 1 + assert kd == %{} + # Both entries cleaned, not just the matched {0, "hash_a"} + refute Map.has_key?(ttk, {0, "hash_a"}) + refute Map.has_key?(ttk, {1, "hash_b"}) + end + + test "multiple patterns deactivating same row in one call" do + # Row with single disjunct needing both pos 0 and pos 1 + msg = + make_change_msg("key1", :insert, + tags: ["hash_a/hash_b"], + active_conditions: [true, true], + value: %{"id" => "1"} + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg) + + # Both positions deactivated in one call + patterns = [%{pos: 0, value: "hash_a"}, %{pos: 1, value: "hash_b"}] + + {deletes, ttk, kd} = + TagTracker.generate_synthetic_deletes(ttk, kd, dp, patterns, DateTime.utc_now()) + + assert length(deletes) == 1 + assert hd(deletes).key == "key1" + assert kd == %{} + assert ttk == %{} + end + + test "disjunct_positions derived once and reused across keys" do + msg1 = + make_change_msg("key1", :insert, + tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + ) + + {ttk, kd, dp} = TagTracker.update_tag_index(%{}, %{}, nil, msg1) + assert dp == [[0], [1]] + + # Second key with different hashes but same structure + msg2 = + make_change_msg("key2", :insert, + tags: ["hash_c/", "/hash_d"], + active_conditions: [true, false] + ) + + {_ttk, _kd, dp2} = TagTracker.update_tag_index(ttk, kd, dp, msg2) + + # disjunct_positions unchanged — derived once, reused + assert dp2 == dp + end end end diff --git a/packages/elixir-client/test/electric/client_test.exs b/packages/elixir-client/test/electric/client_test.exs index d7105f32fe..91ce33ba19 100644 --- a/packages/elixir-client/test/electric/client_test.exs +++ b/packages/elixir-client/test/electric/client_test.exs @@ -1601,7 +1601,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-abc"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-abc"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111", "name" => "test"} }, @@ -1664,13 +1668,21 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["shared-tag"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["shared-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, %{ "key" => "row-2", - "headers" => %{"operation" => "insert", "tags" => ["shared-tag"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["shared-tag"], + "active_conditions" => [true] + }, "offset" => "1_1", "value" => %{"id" => "2222"} }, @@ -1732,7 +1744,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-A"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-A"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1794,7 +1810,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["old-tag"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["old-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1808,7 +1828,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", "tags" => ["new-tag"], - "removed_tags" => ["old-tag"] + "removed_tags" => ["old-tag"], + "active_conditions" => [true] }, "offset" => "2_0", "value" => %{"id" => "1111", "name" => "updated"} @@ -1871,7 +1892,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -1882,7 +1907,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "delete", "tags" => ["my-tag"]}, + "headers" => %{ + "operation" => "delete", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "2_0", "value" => %{"id" => "1111"} }, @@ -1949,7 +1978,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111", "version" => "1"} }, @@ -1963,7 +1996,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", # Same tag, but no removed_tags - this is the problematic case - "tags" => ["my-tag"] + "tags" => ["my-tag"], + "active_conditions" => [true] }, "offset" => "2_0", "value" => %{"id" => "1111", "version" => "2"} @@ -2025,15 +2059,20 @@ defmodule Electric.ClientTest do "Expected 1 synthetic delete but got #{length(delete_msgs)} - duplicate entries in tag_index" end - test "row with multiple tags - partial move-out should not delete if other tags remain", + test "row with multiple disjuncts - partial move-out should not delete if another disjunct satisfied", ctx do - # Edge case: row has multiple tags, move-out for one tag shouldn't delete - # if the row still belongs to the shape via another tag + # Row matches via two disjuncts at different positions. + # Move-out for one position shouldn't delete if the other disjunct is still satisfied. + # Tags: ["tag-a/", "/tag-b"] means disjunct 0 uses pos 0, disjunct 1 uses pos 1 body1 = Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-a", "tag-b"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-a/", "/tag-b"], + "active_conditions" => [true, true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -2045,7 +2084,7 @@ defmodule Electric.ClientTest do %{ "headers" => %{ "event" => "move-out", - # Only moving out tag-a, row still has tag-b + # Only moving out pos 0, disjunct 1 (pos 1) still satisfied "patterns" => [%{"pos" => 0, "value" => "tag-a"}] } }, @@ -2078,17 +2117,13 @@ defmodule Electric.ClientTest do bypass_response(ctx, responses) - # insert, up-to-date, up-to-date - # BUG: Currently generates a synthetic delete even though row still has tag-b - # EXPECTED: No synthetic delete since row still belongs via tag-b + # insert, up-to-date, up-to-date (no synthetic delete) msgs = stream(ctx, 3) delete_msgs = Enum.filter(msgs, &match?(%ChangeMessage{headers: %{operation: :delete}}, &1)) - # This documents expected behavior - row should NOT be deleted - # If this fails, it confirms the bug that partial move-out incorrectly deletes assert delete_msgs == [], - "Row with multiple tags should not be deleted when only one tag is moved out" + "Row should not be deleted when another disjunct is still satisfied" end test "synthetic delete uses latest value after update", ctx do @@ -2097,7 +2132,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111", "name" => "original"} }, @@ -2111,7 +2150,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", "tags" => ["my-tag"], - "removed_tags" => ["my-tag"] + "removed_tags" => ["my-tag"], + "active_conditions" => [true] }, "offset" => "2_0", "value" => %{"id" => "1111", "name" => "updated"} @@ -2174,70 +2214,6 @@ defmodule Electric.ClientTest do "Synthetic delete should use latest value, got: #{inspect(delete.value)}" end - test "multiple patterns matching same row generates single delete", ctx do - # Edge case: move-out with multiple patterns that both match the same row - body1 = - Jason.encode!([ - %{ - "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-a", "tag-b"]}, - "offset" => "1_0", - "value" => %{"id" => "1111"} - }, - %{"headers" => %{"control" => "up-to-date", "global_last_seen_lsn" => 9998}} - ]) - - body2 = - Jason.encode!([ - %{ - "headers" => %{ - "event" => "move-out", - # Both patterns match the same row - "patterns" => [ - %{"pos" => 0, "value" => "tag-a"}, - %{"pos" => 1, "value" => "tag-b"} - ] - } - }, - %{"headers" => %{"control" => "up-to-date", "global_last_seen_lsn" => 9999}} - ]) - - schema = Jason.encode!(%{"id" => %{type: "text"}}) - - {:ok, responses} = - start_supervised( - {Agent, - fn -> - %{ - {"-1", nil} => [ - &bypass_resp(&1, body1, - shape_handle: "my-shape", - last_offset: "1_0", - schema: schema - ) - ], - {"1_0", "my-shape"} => [ - &bypass_resp(&1, body2, - shape_handle: "my-shape", - last_offset: "2_0" - ) - ] - } - end} - ) - - bypass_response(ctx, responses) - - # insert, up-to-date, synthetic delete, up-to-date - msgs = stream(ctx, 4) - - delete_msgs = Enum.filter(msgs, &match?(%ChangeMessage{headers: %{operation: :delete}}, &1)) - - # Should only generate 1 delete, not 2 - assert length(delete_msgs) == 1, - "Multiple patterns matching same row should generate single delete, got #{length(delete_msgs)}" - end - test "update removing all tags should clear tag index so move-out is a no-op", ctx do # This test demonstrates the stale tag-index entry bug: # When a row is updated to remove ALL its tags (with removed_tags but no new tags), @@ -2252,7 +2228,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["tag-A"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["tag-A"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -2266,8 +2246,8 @@ defmodule Electric.ClientTest do "headers" => %{ "operation" => "update", # Remove the old tag but add NO new tags - "removed_tags" => ["tag-A"] - # Note: no "tags" field, meaning this row now has zero tags + "removed_tags" => ["tag-A"], + "active_conditions" => [false] }, "offset" => "2_0", "value" => %{"id" => "1111", "name" => "updated"} @@ -2340,7 +2320,11 @@ defmodule Electric.ClientTest do Jason.encode!([ %{ "key" => "row-1", - "headers" => %{"operation" => "insert", "tags" => ["my-tag"]}, + "headers" => %{ + "operation" => "insert", + "tags" => ["my-tag"], + "active_conditions" => [true] + }, "offset" => "1_0", "value" => %{"id" => "1111"} }, @@ -2417,6 +2401,87 @@ defmodule Electric.ClientTest do [delete] = delete_msgs assert delete.value["id"] == "1111" end + + test "resume preserves legacy move-out state when active_conditions are missing", ctx do + body1 = + Jason.encode!([ + %{ + "key" => "row-1", + "headers" => %{ + "operation" => "insert", + "tags" => ["legacy-tag"] + }, + "offset" => "1_0", + "value" => %{"id" => "1111"} + }, + %{"headers" => %{"control" => "up-to-date", "global_last_seen_lsn" => 9998}} + ]) + + schema = Jason.encode!(%{"id" => %{type: "text"}}) + + {:ok, responses} = + start_supervised( + {Agent, + fn -> + %{ + {"-1", nil} => [ + &bypass_resp(&1, body1, + shape_handle: "my-shape", + last_offset: "1_0", + schema: schema + ) + ] + } + end} + ) + + bypass_response(ctx, responses) + + msgs = stream(ctx, live: false) |> Enum.to_list() + + resume_msg = Enum.find(msgs, &match?(%ResumeMessage{}, &1)) + assert resume_msg != nil + + body2 = + Jason.encode!([ + %{ + "headers" => %{ + "event" => "move-out", + "patterns" => [%{"pos" => 0, "value" => "legacy-tag"}] + } + }, + %{"headers" => %{"control" => "up-to-date", "global_last_seen_lsn" => 9999}} + ]) + + {:ok, responses2} = + start_supervised( + {Agent, + fn -> + %{ + {"1_0", "my-shape"} => [ + &bypass_resp(&1, body2, + shape_handle: "my-shape", + last_offset: "2_0" + ) + ] + } + end}, + id: :legacy_responses2 + ) + + bypass_response(ctx, responses2) + + resumed_msgs = stream(ctx, resume: resume_msg, live: false) |> Enum.to_list() + + delete_msgs = + Enum.filter(resumed_msgs, &match?(%ChangeMessage{headers: %{operation: :delete}}, &1)) + + assert length(delete_msgs) == 1, + "Legacy move-out after resume should generate synthetic delete, got: #{inspect(resumed_msgs)}" + + [delete] = delete_msgs + assert delete.value["id"] == "1111" + end end defp bypass_response_endpoint(ctx, responses, opts) do diff --git a/packages/sync-service/config/runtime.exs b/packages/sync-service/config/runtime.exs index 383ed2448e..69f43a9702 100644 --- a/packages/sync-service/config/runtime.exs +++ b/packages/sync-service/config/runtime.exs @@ -260,6 +260,8 @@ config :electric, # The ELECTRIC_EXPERIMENTAL_MAX_BATCH_SIZE is undocumented and used for testing only. max_batch_size: env!("ELECTRIC_EXPERIMENTAL_MAX_BATCH_SIZE", :integer, nil), service_port: env!("ELECTRIC_PORT", :integer, nil), + subquery_buffer_max_transactions: + env!("ELECTRIC_SUBQUERY_BUFFER_MAX_TRANSACTIONS", :integer, nil), shape_hibernate_after: shape_hibernate_after, shape_enable_suspend?: shape_enable_suspend?, storage_dir: storage_dir, diff --git a/packages/sync-service/lib/electric/config.ex b/packages/sync-service/lib/electric/config.ex index 9fd47cf729..c33ac3c271 100644 --- a/packages/sync-service/lib/electric/config.ex +++ b/packages/sync-service/lib/electric/config.ex @@ -122,7 +122,8 @@ defmodule Electric.Config do Electric.ShapeCache.ShapeStatus.ShapeDb.Connection.default!(:synchronous), shape_db_cache_size: Electric.ShapeCache.ShapeStatus.ShapeDb.Connection.default!(:cache_size), exclude_spans: MapSet.new(), - live_dashboard_port: nil + live_dashboard_port: nil, + subquery_buffer_max_transactions: 1000 ] @installation_id_key "electric_installation_id" diff --git a/packages/sync-service/lib/electric/log_items.ex b/packages/sync-service/lib/electric/log_items.ex index e61adc973d..7af61b6219 100644 --- a/packages/sync-service/lib/electric/log_items.ex +++ b/packages/sync-service/lib/electric/log_items.ex @@ -49,6 +49,11 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }} ] end @@ -69,6 +74,11 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }} ] end @@ -90,6 +100,11 @@ defmodule Electric.LogItems do |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) |> put_if_true(:removed_tags, change.move_tags != [], change.removed_move_tags) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) } |> Map.merge(put_update_values(change, pk_cols, replica))} ] @@ -117,6 +132,11 @@ defmodule Electric.LogItems do change.move_tags != [], change.move_tags ++ change.removed_move_tags ) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }}, {new_offset, %{ @@ -133,6 +153,11 @@ defmodule Electric.LogItems do } |> put_if_true(:last, change.last?) |> put_if_true(:tags, change.move_tags != [], change.move_tags) + |> put_if_true( + :active_conditions, + change.active_conditions != [], + change.active_conditions + ) }} ] end diff --git a/packages/sync-service/lib/electric/lsn_tracker.ex b/packages/sync-service/lib/electric/lsn_tracker.ex index f9aaf03050..c0f0be2b80 100644 --- a/packages/sync-service/lib/electric/lsn_tracker.ex +++ b/packages/sync-service/lib/electric/lsn_tracker.ex @@ -60,6 +60,14 @@ defmodule Electric.LsnTracker do end def broadcast_last_seen_lsn(stack_ref, lsn) when is_integer(lsn) do + # Store the broadcast LSN so newly subscribing consumers can read the + # current value without waiting for the next broadcast. + try do + stack_ref |> table() |> :ets.insert({:last_broadcast_lsn, lsn}) + rescue + ArgumentError -> :ok + end + registry = Electric.StackSupervisor.registry_name(stack_ref) if Process.whereis(registry) do @@ -71,13 +79,35 @@ defmodule Electric.LsnTracker do :ok end + @doc """ + Returns the most recently broadcast LSN, or 0 if none has been broadcast yet. + """ + @spec get_last_broadcast_lsn(stack_ref()) :: non_neg_integer() + def get_last_broadcast_lsn(stack_ref) do + case :ets.lookup(table(stack_ref), :last_broadcast_lsn) do + [{:last_broadcast_lsn, lsn}] -> lsn + [] -> 0 + end + rescue + ArgumentError -> 0 + end + @spec subscribe_to_global_lsn_updates(stack_ref(), term()) :: {:ok, pid()} | {:error, term()} def subscribe_to_global_lsn_updates(stack_ref, value \\ []) do - Registry.register( - Electric.StackSupervisor.registry_name(stack_ref), - @global_lsn_updates_topic, - value - ) + with {:ok, _} <- + Registry.register( + Electric.StackSupervisor.registry_name(stack_ref), + @global_lsn_updates_topic, + value + ) do + last_lsn = get_last_broadcast_lsn(stack_ref) + + if last_lsn > 0 do + send(self(), {:global_last_seen_lsn, last_lsn}) + end + + {:ok, self()} + end end @spec unsubscribe_from_global_lsn_updates(stack_ref()) :: :ok diff --git a/packages/sync-service/lib/electric/replication/changes.ex b/packages/sync-service/lib/electric/replication/changes.ex index cd026a9e2f..717d7b7df0 100644 --- a/packages/sync-service/lib/electric/replication/changes.ex +++ b/packages/sync-service/lib/electric/replication/changes.ex @@ -182,7 +182,15 @@ defmodule Electric.Replication.Changes do end defmodule NewRecord do - defstruct [:relation, :record, :log_offset, :key, last?: false, move_tags: []] + defstruct [ + :relation, + :record, + :log_offset, + :key, + last?: false, + move_tags: [], + active_conditions: [] + ] @type t() :: %__MODULE__{ relation: Changes.relation_name(), @@ -190,7 +198,8 @@ defmodule Electric.Replication.Changes do log_offset: LogOffset.t(), key: String.t() | nil, last?: boolean(), - move_tags: [Changes.tag()] + move_tags: [Changes.tag()], + active_conditions: [boolean()] } end @@ -205,7 +214,8 @@ defmodule Electric.Replication.Changes do move_tags: [], removed_move_tags: [], changed_columns: MapSet.new(), - last?: false + last?: false, + active_conditions: [] ] @type t() :: %__MODULE__{ @@ -218,7 +228,8 @@ defmodule Electric.Replication.Changes do move_tags: [Changes.tag()], removed_move_tags: [Changes.tag()], changed_columns: MapSet.t(), - last?: boolean() + last?: boolean(), + active_conditions: [boolean()] } def new(attrs) do @@ -254,7 +265,15 @@ defmodule Electric.Replication.Changes do end defmodule DeletedRecord do - defstruct [:relation, :old_record, :log_offset, :key, move_tags: [], last?: false] + defstruct [ + :relation, + :old_record, + :log_offset, + :key, + move_tags: [], + last?: false, + active_conditions: [] + ] @type t() :: %__MODULE__{ relation: Changes.relation_name(), @@ -262,7 +281,8 @@ defmodule Electric.Replication.Changes do log_offset: LogOffset.t(), key: String.t() | nil, move_tags: [Changes.tag()], - last?: boolean() + last?: boolean(), + active_conditions: [boolean()] } end @@ -412,7 +432,8 @@ defmodule Electric.Replication.Changes do relation: change.relation, record: change.record, key: change.key, - log_offset: change.log_offset + log_offset: change.log_offset, + active_conditions: change.active_conditions } end @@ -422,7 +443,8 @@ defmodule Electric.Replication.Changes do old_record: change.old_record, key: change.old_key || change.key, log_offset: change.log_offset, - move_tags: change.move_tags + move_tags: change.move_tags, + active_conditions: change.active_conditions } end diff --git a/packages/sync-service/lib/electric/replication/eval.ex b/packages/sync-service/lib/electric/replication/eval.ex index 989735f9d7..cee5da0ae4 100644 --- a/packages/sync-service/lib/electric/replication/eval.ex +++ b/packages/sync-service/lib/electric/replication/eval.ex @@ -66,4 +66,20 @@ defmodule Electric.Replication.Eval do def type_to_pg_cast({:row, _}, _), do: raise("Unsupported type: row") def type_to_pg_cast({:internal, _}, _), do: raise("Unsupported type: internal") def type_to_pg_cast(type, _) when is_atom(type), do: to_string(type) + + @doc """ + Convert a value from the eval representation to the format Postgrex expects + for binary protocol encoding. + + Most types (integers, floats, booleans, dates, times, etc.) use native Elixir + types that Postgrex handles directly. UUID is a notable exception: the eval + system stores UUIDs as human-readable strings, but Postgrex expects 16-byte + raw binaries. + """ + def value_to_postgrex(value, :uuid) when is_binary(value) do + {:ok, bin} = Ecto.UUID.dump(value) + bin + end + + def value_to_postgrex(value, _type), do: value end diff --git a/packages/sync-service/lib/electric/replication/eval/decomposer.ex b/packages/sync-service/lib/electric/replication/eval/decomposer.ex new file mode 100644 index 0000000000..391ceffe33 --- /dev/null +++ b/packages/sync-service/lib/electric/replication/eval/decomposer.ex @@ -0,0 +1,280 @@ +defmodule Electric.Replication.Eval.Decomposer do + @moduledoc """ + Decomposes a query to an expanded DNF form. + + Takes a where clause part of a query and decomposes it into a list of expressions + in a Disjunctive Normal Form (DNF). Each expression is a conjunction of comparisons. + + To avoid duplication, it returns a list of lists, where the outer list is a list of disjuncts (conjunctions), + and the inner list is a list of comparisons. Each comparison (i.e. the sub-expression of the original where clause) + is represented by an Erlang reference, which is then mentioned in the map of references to the AST of the + referenced subexpression. + + ## NOT handling + + To properly convert to DNF, NOT expressions are pushed down to leaf expressions using De Morgan's laws: + - `NOT (a OR b)` becomes `(NOT a) AND (NOT b)` + - `NOT (a AND b)` becomes `(NOT a) OR (NOT b)` + + Because of this, leaf expressions in the result can be either: + - `ref` - a positive reference to a subexpression + - `{:not, ref}` - a negated reference to a subexpression + - `nil` - this position is not part of this disjunct + + The subexpressions map always contains the base (non-negated) form of each expression. + + ## Expanded format + + The "expanded" part means that each inner list MUST be the same length, equal to the total count of expressions + across all disjuncts. Each position in the inner list corresponds to a specific expression slot from the original + query structure, and contains either a reference (possibly negated) to that subexpression or `nil` if that + expression is not part of the given disjunct. + + References allow deduplication: if the same subexpression appears in multiple disjuncts, they will share the + same reference (but occupy different positions, since positions correspond to the original query structure). + + ## Examples + + For the query (already in a normalized form): + + ```sql + WHERE (a = 1 AND b = 2) OR (c = 3 AND d = 4) OR (a = 1 AND c = 3) + ``` + + Has 3 disjuncts with 2 + 2 + 2 = 6 total expression slots. It will be decomposed into: + + ``` + [[r1, r2, nil, nil, nil, nil], + [nil, nil, r3, r4, nil, nil], + [nil, nil, nil, nil, r1, r3]] + ``` + + Where: + - Positions 0-1 correspond to disjunct 1's expressions (`a = 1`, `b = 2`) + - Positions 2-3 correspond to disjunct 2's expressions (`c = 3`, `d = 4`) + - Positions 4-5 correspond to disjunct 3's expressions (`a = 1`, `c = 3`) + - `r1` appears at positions 0 and 4 (same subexpression `a = 1`) + - `r3` appears at positions 2 and 5 (same subexpression `c = 3`) + + The reference map will contain: `r1 => "a = 1"`, `r2 => "b = 2"`, `r3 => "c = 3"`, `r4 => "d = 4"`. + + For a query with NOT that needs De Morgan transformation: + + ```sql + WHERE NOT (a = 1 OR b = 2) + ``` + + Becomes `(NOT a = 1) AND (NOT b = 2)` - a single disjunct with two negated terms: + + ``` + [[{:not, r1}, {:not, r2}]] + ``` + + And for: + + ```sql + WHERE NOT (a = 1 AND b = 2) + ``` + + Becomes `(NOT a = 1) OR (NOT b = 2)` - two disjuncts: + + ``` + [[{:not, r1}, nil], + [nil, {:not, r2}]] + ``` + """ + + alias Electric.Replication.Eval.Parser + alias Electric.Replication.Eval.Parser.Func + alias Electric.Replication.Eval.SqlGenerator + + @type position :: non_neg_integer() + @type literal :: {position(), :positive | :negated} + @type conjunction :: [literal()] + @type dnf :: [conjunction()] + + @type subexpression :: %{ + ast: Parser.tree_part(), + is_subquery: boolean(), + negated: boolean() + } + + @type decomposition :: %{ + disjuncts: dnf(), + disjuncts_positions: [[position()]], + subexpressions: %{position() => subexpression()}, + position_count: non_neg_integer() + } + + @max_disjuncts 100 + + @spec decompose(query :: Parser.tree_part()) :: {:ok, decomposition()} | {:error, term()} + def decompose(query) do + internal_dnf = to_dnf(query, false) + + if length(internal_dnf) > @max_disjuncts do + {:error, + "WHERE clause too complex for DNF decomposition " <> + "(#{length(internal_dnf)} disjuncts exceeds limit of #{@max_disjuncts})"} + else + {expanded, ref_subexpressions} = expand(internal_dnf) + {:ok, to_decomposition(expanded, ref_subexpressions)} + end + end + + # Convert AST to internal DNF representation + # negated? tracks whether we're inside a NOT context (for De Morgan transformations) + defp to_dnf(%Func{name: name, args: args}, negated) when name in ~w(and or not) do + case {name, negated} do + {"or", false} -> + # OR: concatenate disjuncts from all branches + Enum.flat_map(args, &to_dnf(&1, false)) + + {"or", true} -> + # NOT OR => AND (De Morgan's law) + # NOT (a OR b) = NOT a AND NOT b + args_dnfs = Enum.map(args, &to_dnf(&1, true)) + cross_product(args_dnfs) + + {"and", false} -> + # AND: cross-product of disjuncts from all branches + args_dnfs = Enum.map(args, &to_dnf(&1, false)) + cross_product(args_dnfs) + + {"and", true} -> + # NOT AND => OR (De Morgan's law) + # NOT (a AND b) = NOT a OR NOT b + Enum.flat_map(args, &to_dnf(&1, true)) + + {"not", _} -> + # NOT: flip the negation state (handles double negation automatically) + [arg] = args + to_dnf(arg, not negated) + end + end + + defp to_dnf(leaf, negated) do + # Leaf expression: single disjunct with single term + [[{leaf, negated}]] + end + + # Cross-product of multiple DNF forms + # Used for AND distribution: (A1 OR A2) AND (B1 OR B2) => (A1 AND B1) OR (A1 AND B2) OR (A2 AND B1) OR (A2 AND B2) + defp cross_product([]), do: [[]] + + defp cross_product([dnf | rest]) do + rest_product = cross_product(rest) + + for disjunct <- dnf, rest_disjunct <- rest_product do + disjunct ++ rest_disjunct + end + end + + # Expand internal DNF to fixed-width format with references + defp expand(internal_dnf) do + # Calculate width of each disjunct and total width + widths = Enum.map(internal_dnf, &length/1) + total_width = Enum.sum(widths) + + # Calculate start positions for each disjunct: [0, w1, w1+w2, ...] + start_positions = calc_start_positions(widths) + + # Build subexpressions map with deduplication based on SQL string + {ast_to_ref, subexpressions} = build_subexpressions(internal_dnf) + + # Expand each disjunct to full width + disjuncts = + internal_dnf + |> Enum.zip(start_positions) + |> Enum.map(fn {disjunct, start_pos} -> + # Create a list of nils of the total width + row = List.duplicate(nil, total_width) + + # Fill in the terms at the appropriate positions + disjunct + |> Enum.with_index() + |> Enum.reduce(row, fn {{ast, negated}, term_idx}, row -> + pos = start_pos + term_idx + ref = Map.fetch!(ast_to_ref, deparse(ast)) + term = if negated, do: {:not, ref}, else: ref + List.replace_at(row, pos, term) + end) + end) + + {disjuncts, subexpressions} + end + + defp calc_start_positions(widths) do + widths + |> Enum.reduce({[], 0}, fn width, {positions, acc} -> + {positions ++ [acc], acc + width} + end) + |> elem(0) + end + + defp build_subexpressions(internal_dnf) do + internal_dnf + |> List.flatten() + |> Enum.map(fn {ast, _negated} -> ast end) + |> Enum.reduce({%{}, %{}}, fn ast, {ast_to_ref, subexpressions} -> + key = deparse(ast) + + case Map.fetch(ast_to_ref, key) do + {:ok, _ref} -> + {ast_to_ref, subexpressions} + + :error -> + ref = make_ref() + {Map.put(ast_to_ref, key, ref), Map.put(subexpressions, ref, ast)} + end + end) + end + + # Convert AST node back to SQL string for deduplication + defp deparse(ast) do + SqlGenerator.to_sql(ast) + end + + # Convert ref-based expanded format to position-indexed decomposition + defp to_decomposition(expanded, ref_subexpressions) do + position_count = if expanded == [[]], do: 0, else: length(hd(expanded)) + subexpressions = build_position_subexpressions(expanded, ref_subexpressions, position_count) + + disjuncts = + Enum.map(expanded, fn row -> + row + |> Enum.with_index() + |> Enum.flat_map(fn + {nil, _pos} -> [] + {{:not, _ref}, pos} -> [{pos, :negated}] + {_ref, pos} -> [{pos, :positive}] + end) + end) + + disjuncts_positions = Enum.map(disjuncts, fn conj -> Enum.map(conj, &elem(&1, 0)) end) + + %{ + disjuncts: disjuncts, + disjuncts_positions: disjuncts_positions, + subexpressions: subexpressions, + position_count: position_count + } + end + + defp build_position_subexpressions(_expanded, _ref_subexpressions, 0), do: %{} + + defp build_position_subexpressions(expanded, ref_subexpressions, position_count) do + Map.new(0..(position_count - 1), fn pos -> + term = Enum.find_value(expanded, fn row -> Enum.at(row, pos) end) + {ref, negated} = ref_and_polarity(term) + ast = Map.fetch!(ref_subexpressions, ref) + {pos, %{ast: ast, is_subquery: is_subquery?(ast), negated: negated}} + end) + end + + defp ref_and_polarity({:not, ref}), do: {ref, true} + defp ref_and_polarity(ref) when is_reference(ref), do: {ref, false} + + defp is_subquery?(%Func{name: "sublink_membership_check"}), do: true + defp is_subquery?(_), do: false +end diff --git a/packages/sync-service/lib/electric/replication/eval/parser.ex b/packages/sync-service/lib/electric/replication/eval/parser.ex index 2be4015fe2..022e59dddf 100644 --- a/packages/sync-service/lib/electric/replication/eval/parser.ex +++ b/packages/sync-service/lib/electric/replication/eval/parser.ex @@ -1113,7 +1113,7 @@ defmodule Electric.Replication.Eval.Parser do comparisons = [left_comparison, right_comparison], {:ok, reduced} <- build_bool_chain( - %{name: "or", impl: &pg_and/2, strict?: false}, + %{name: "and", impl: &pg_and/2, strict?: false}, comparisons, expr.location ) do diff --git a/packages/sync-service/lib/electric/replication/eval/runner.ex b/packages/sync-service/lib/electric/replication/eval/runner.ex index 21e1a48cfc..9b19b49592 100644 --- a/packages/sync-service/lib/electric/replication/eval/runner.ex +++ b/packages/sync-service/lib/electric/replication/eval/runner.ex @@ -49,44 +49,70 @@ defmodule Electric.Replication.Eval.Runner do @doc """ Run a PG function parsed by `Electric.Replication.Eval.Parser` based on the inputs """ - @spec execute(Expr.t(), map()) :: {:ok, term()} | {:error, {%Func{}, [term()]}} - def execute(%Expr{} = tree, ref_values) do - execute_node(tree.eval, ref_values) + @spec execute(Expr.t(), map(), keyword()) :: {:ok, term()} | {:error, {%Func{}, [term()]}} + def execute(%Expr{} = tree, ref_values, opts \\ []) do + ctx = %{refs: ref_values, subquery_member?: Keyword.get(opts, :subquery_member?)} + execute_node(tree.eval, ctx) catch {:could_not_compute, func} -> {:error, func} end - defp execute_node(%Const{value: value}, _), do: {:ok, value} - defp execute_node(%Ref{path: path}, refs), do: {:ok, Map.fetch!(refs, path)} + defp execute_node(%Const{value: value}, _ctx), do: {:ok, value} - defp execute_node(%Array{elements: elements}, refs) do - Utils.map_while_ok(elements, &execute_node(&1, refs)) + defp execute_node( + %Ref{path: ["$sublink", _] = path}, + %{refs: refs, subquery_member?: subquery_member?} + ) + when is_function(subquery_member?, 2) do + {:ok, Map.get(refs, path, {:subquery_ref, path})} + end + + defp execute_node(%Ref{path: path}, %{refs: refs}), do: {:ok, Map.fetch!(refs, path)} + + defp execute_node(%Array{elements: elements}, ctx) do + Utils.map_while_ok(elements, &execute_node(&1, ctx)) end - defp execute_node(%RowExpr{elements: elements}, refs) do - with {:ok, elements} <- Utils.map_while_ok(elements, &execute_node(&1, refs)) do + defp execute_node(%RowExpr{elements: elements}, ctx) do + with {:ok, elements} <- Utils.map_while_ok(elements, &execute_node(&1, ctx)) do {:ok, List.to_tuple(elements)} end end defp execute_node( %Func{name: "coalesce", variadic_arg: 0, args: [%Array{elements: elements}]}, - refs + ctx ) do - execute_coalesce(elements, refs) + execute_coalesce(elements, ctx) + end + + defp execute_node( + %Func{name: "sublink_membership_check"} = func, + %{subquery_member?: subquery_member?} = ctx + ) + when is_function(subquery_member?, 2) do + with {:ok, [value, {:subquery_ref, path}]} <- + Utils.map_while_ok(func.args, &execute_node(&1, ctx)) do + {:ok, + try do + subquery_member?.(path, value) + rescue + _ -> throw({:could_not_compute, %{func | args: [value, {:subquery_ref, path}]}}) + end} + end end - defp execute_node(%Func{args: args} = func, refs) do - with {:ok, args} <- Utils.map_while_ok(args, &execute_node(&1, refs)) do + defp execute_node(%Func{args: args} = func, ctx) do + with {:ok, args} <- Utils.map_while_ok(args, &execute_node(&1, ctx)) do apply_func(func, args) end end - defp execute_coalesce([], _refs), do: {:ok, nil} + defp execute_coalesce([], _ctx), do: {:ok, nil} - defp execute_coalesce([arg | rest], refs) do - case execute_node(arg, refs) do - {:ok, nil} -> execute_coalesce(rest, refs) + defp execute_coalesce([arg | rest], ctx) do + case execute_node(arg, ctx) do + {:ok, nil} -> execute_coalesce(rest, ctx) {:ok, value} -> {:ok, value} end end diff --git a/packages/sync-service/lib/electric/replication/eval/sql_generator.ex b/packages/sync-service/lib/electric/replication/eval/sql_generator.ex new file mode 100644 index 0000000000..ed7cd14ca6 --- /dev/null +++ b/packages/sync-service/lib/electric/replication/eval/sql_generator.ex @@ -0,0 +1,369 @@ +defmodule Electric.Replication.Eval.SqlGenerator do + @moduledoc """ + Converts a parsed WHERE clause AST back into a SQL string. + + This is the inverse of `Parser` — where `Parser` turns SQL text into an AST, + `SqlGenerator` turns that AST back into SQL text. Used whenever the server + needs to embed a condition in a generated query (snapshot active_conditions, + move-in exclusion clauses, etc.). + + Uses precedence-aware parenthesization to produce minimal, readable SQL. + Parentheses are only added when needed to preserve the AST's evaluation order. + + Must handle every AST node type that `Parser` can produce. Raises + `ArgumentError` for unrecognised nodes so gaps are caught at shape + creation time, but the property-based round-trip test (see Tests below) + enforces that no parseable expression triggers this error. + """ + + alias Electric.Replication.Eval.Parser.{Const, Ref, Func, Array, RowExpr} + + # PostgreSQL operator precedence (higher number = tighter binding) + # See: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-PRECEDENCE + @prec_or 10 + @prec_and 20 + @prec_not 30 + @prec_is 40 + @prec_comparison 50 + @prec_like_in 60 + @prec_other_op 70 + @prec_addition 80 + @prec_multiplication 90 + @prec_exponent 100 + @prec_unary 110 + @prec_cast 130 + @prec_atom 1000 + + @doc """ + Convert an AST node to a SQL string. + + Handles: comparison operators (=, <>, <, >, <=, >=), pattern matching + (LIKE, ILIKE, NOT LIKE, NOT ILIKE), nullability (IS NULL, IS NOT NULL), + membership (IN), logical operators (AND, OR, NOT), boolean tests + (IS TRUE, IS FALSE, IS UNKNOWN, etc.), column references, constants + (strings, integers, floats, booleans, NULL), type casts, arithmetic + operators (+, -, *, /, ^, |/, @, &, |, #, ~), string concatenation (||), + array operators (@>, <@, &&), array/slice access, DISTINCT/NOT DISTINCT, + ANY/ALL, and sublink membership checks. + + Raises `ArgumentError` for unrecognised AST nodes. + """ + @spec to_sql(term()) :: String.t() + def to_sql(ast) do + {sql, _prec} = to_sql_prec(ast) + sql + end + + # --- Private: precedence-aware SQL generation --- + # Each clause returns {sql_string, precedence_level} + + # Comparison operators + defp to_sql_prec(%Func{name: "\"=\"", args: [left, right]}), + do: binary_op(left, "=", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\"<>\"", args: [left, right]}), + do: binary_op(left, "<>", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\"<\"", args: [left, right]}), + do: binary_op(left, "<", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\">\"", args: [left, right]}), + do: binary_op(left, ">", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\"<=\"", args: [left, right]}), + do: binary_op(left, "<=", right, @prec_comparison) + + defp to_sql_prec(%Func{name: "\">=\"", args: [left, right]}), + do: binary_op(left, ">=", right, @prec_comparison) + + # Pattern matching + defp to_sql_prec(%Func{name: "\"~~\"", args: [left, right]}), + do: binary_op(left, "LIKE", right, @prec_like_in) + + defp to_sql_prec(%Func{name: "\"~~*\"", args: [left, right]}), + do: binary_op(left, "ILIKE", right, @prec_like_in) + + defp to_sql_prec(%Func{name: "\"!~~\"", args: [left, right]}), + do: binary_op(left, "NOT LIKE", right, @prec_like_in) + + defp to_sql_prec(%Func{name: "\"!~~*\"", args: [left, right]}), + do: binary_op(left, "NOT ILIKE", right, @prec_like_in) + + # Nullability — parser produces "is null"/"is not null" from constant folding + # and "IS_NULL"/"IS_NOT_NULL" from NullTest on column refs + defp to_sql_prec(%Func{name: name, args: [arg]}) when name in ["is null", "IS_NULL"], + do: postfix_op(arg, "IS NULL", @prec_is) + + defp to_sql_prec(%Func{name: name, args: [arg]}) when name in ["is not null", "IS_NOT_NULL"], + do: postfix_op(arg, "IS NOT NULL", @prec_is) + + # Boolean tests + defp to_sql_prec(%Func{name: "IS_TRUE", args: [arg]}), + do: postfix_op(arg, "IS TRUE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_NOT_TRUE", args: [arg]}), + do: postfix_op(arg, "IS NOT TRUE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_FALSE", args: [arg]}), + do: postfix_op(arg, "IS FALSE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_NOT_FALSE", args: [arg]}), + do: postfix_op(arg, "IS NOT FALSE", @prec_is) + + defp to_sql_prec(%Func{name: "IS_UNKNOWN", args: [arg]}), + do: postfix_op(arg, "IS UNKNOWN", @prec_is) + + defp to_sql_prec(%Func{name: "IS_NOT_UNKNOWN", args: [arg]}), + do: postfix_op(arg, "IS NOT UNKNOWN", @prec_is) + + # Membership (IN with literal array) + defp to_sql_prec(%Func{name: "in", args: [left, %Array{elements: elements}]}) do + values = Enum.map_join(elements, ", ", &to_sql/1) + {"#{wrap(left, @prec_like_in)} IN (#{values})", @prec_like_in} + end + + # Sublink membership check (IN with subquery) — rendered as a placeholder + # since the actual subquery SQL is not in the AST + defp to_sql_prec(%Func{name: "sublink_membership_check", args: [left, %Ref{path: path}]}) do + sublink_ref = Enum.join(path, ".") + {"#{wrap(left, @prec_like_in)} IN (SELECT #{sublink_ref})", @prec_like_in} + end + + # Logical operators + defp to_sql_prec(%Func{name: "not", args: [inner]}), + do: prefix_op("NOT", inner, @prec_not) + + defp to_sql_prec(%Func{name: "and", args: args}) do + conditions = Enum.map_join(args, " AND ", &wrap(&1, @prec_and)) + {conditions, @prec_and} + end + + defp to_sql_prec(%Func{name: "or", args: args}) do + conditions = Enum.map_join(args, " OR ", &wrap(&1, @prec_or)) + {conditions, @prec_or} + end + + # DISTINCT / NOT DISTINCT — args are [left, right, comparison_func] + defp to_sql_prec(%Func{name: "values_distinct?", args: [left, right | _]}), + do: binary_op(left, "IS DISTINCT FROM", right, @prec_is) + + defp to_sql_prec(%Func{name: "values_not_distinct?", args: [left, right | _]}), + do: binary_op(left, "IS NOT DISTINCT FROM", right, @prec_is) + + # ANY / ALL — arg is a single Func with map_over_array_in_pos + defp to_sql_prec(%Func{name: "any", args: [%Func{} = inner]}) do + {op_sql, left, right} = extract_mapped_operator(inner) + {"#{wrap(left, @prec_comparison)} #{op_sql} ANY(#{to_sql(right)})", @prec_comparison} + end + + defp to_sql_prec(%Func{name: "all", args: [%Func{} = inner]}) do + {op_sql, left, right} = extract_mapped_operator(inner) + {"#{wrap(left, @prec_comparison)} #{op_sql} ALL(#{to_sql(right)})", @prec_comparison} + end + + # Arithmetic binary operators + defp to_sql_prec(%Func{name: "\"+\"", args: [left, right]}), + do: binary_op(left, "+", right, @prec_addition) + + defp to_sql_prec(%Func{name: "\"-\"", args: [left, right]}), + do: binary_op(left, "-", right, @prec_addition) + + defp to_sql_prec(%Func{name: "\"*\"", args: [left, right]}), + do: binary_op(left, "*", right, @prec_multiplication) + + defp to_sql_prec(%Func{name: "\"/\"", args: [left, right]}), + do: binary_op(left, "/", right, @prec_multiplication) + + defp to_sql_prec(%Func{name: "\"^\"", args: [left, right]}), + do: binary_op_right(left, "^", right, @prec_exponent) + + # Bitwise binary operators + defp to_sql_prec(%Func{name: "\"&\"", args: [left, right]}), + do: binary_op(left, "&", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"|\"", args: [left, right]}), + do: binary_op(left, "|", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"#\"", args: [left, right]}), + do: binary_op(left, "#", right, @prec_other_op) + + # Unary operators + defp to_sql_prec(%Func{name: "\"+\"", args: [arg]}), + do: prefix_op("+", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"-\"", args: [arg]}), + do: prefix_op("-", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"~\"", args: [arg]}), + do: prefix_op("~", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"|/\"", args: [arg]}), + do: prefix_op("|/", arg, @prec_unary) + + defp to_sql_prec(%Func{name: "\"@\"", args: [arg]}), + do: prefix_op("@", arg, @prec_unary) + + # String concatenation + defp to_sql_prec(%Func{name: "\"||\"", args: [left, right]}), + do: binary_op(left, "||", right, @prec_other_op) + + # Array operators + defp to_sql_prec(%Func{name: "\"@>\"", args: [left, right]}), + do: binary_op(left, "@>", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"<@\"", args: [left, right]}), + do: binary_op(left, "<@", right, @prec_other_op) + + defp to_sql_prec(%Func{name: "\"&&\"", args: [left, right]}), + do: binary_op(left, "&&", right, @prec_other_op) + + # Named functions (lower, upper, like, ilike, array_*, justify_*, timezone, casts, etc.) + # These are Func nodes where the name is a plain identifier (no quotes around operators) + defp to_sql_prec(%Func{name: name, args: args}) + when name in ~w(lower upper like ilike array_cat array_prepend array_append array_ndims + justify_days justify_hours justify_interval timezone + index_access slice_access) do + arg_list = Enum.map_join(args, ", ", &to_sql/1) + {"#{name}(#{arg_list})", @prec_atom} + end + + # Type cast functions (e.g., "int4_to_bool", "text_to_int4") + defp to_sql_prec(%Func{name: name, args: [arg]}) when is_binary(name) do + if String.contains?(name, "_to_") do + target_type = name |> String.split("_to_") |> List.last() + {"#{wrap(arg, @prec_cast)}::#{target_type}", @prec_cast} + else + raise ArgumentError, + "SqlGenerator.to_sql/1: unsupported AST node: %Func{name: #{inspect(name)}}. " <> + "This WHERE clause contains an operator or expression type that " <> + "cannot be converted back to SQL for active_conditions generation." + end + end + + # Column references + defp to_sql_prec(%Ref{path: path}) do + {Enum.map_join(path, ".", &~s|"#{&1}"|), @prec_atom} + end + + # Constants + defp to_sql_prec(%Const{value: nil}), do: {"NULL", @prec_atom} + defp to_sql_prec(%Const{value: true}), do: {"true", @prec_atom} + defp to_sql_prec(%Const{value: false}), do: {"false", @prec_atom} + + defp to_sql_prec(%Const{value: value}) when is_binary(value) do + escaped = String.replace(value, "'", "''") + {"'#{escaped}'", @prec_atom} + end + + defp to_sql_prec(%Const{value: value}) when is_integer(value) or is_float(value), + do: {"#{value}", @prec_atom} + + # Constant-folded arrays (parser evaluates e.g. ARRAY[1, 2] to %Const{value: [1, 2]}) + defp to_sql_prec(%Const{value: value}) when is_list(value) do + elements = Enum.map_join(value, ", ", &const_list_element_to_sql/1) + {"ARRAY[#{elements}]", @prec_atom} + end + + # Date/time/interval constants — the parser constant-folds typed literals + # (e.g. '2024-01-01'::date) into Const nodes with Elixir struct values. + defp to_sql_prec(%Const{value: %Date{} = d}), do: {"'#{Date.to_iso8601(d)}'::date", @prec_atom} + defp to_sql_prec(%Const{value: %Time{} = t}), do: {"'#{Time.to_iso8601(t)}'::time", @prec_atom} + + defp to_sql_prec(%Const{value: %NaiveDateTime{} = ndt}), + do: {"'#{NaiveDateTime.to_iso8601(ndt)}'::timestamp", @prec_atom} + + defp to_sql_prec(%Const{value: %DateTime{} = dt}), + do: {"'#{DateTime.to_iso8601(dt)}'::timestamptz", @prec_atom} + + defp to_sql_prec(%Const{value: %PgInterop.Interval{} = i}), + do: {"'#{PgInterop.Interval.format(i)}'::interval", @prec_atom} + + # Row expressions — e.g. ROW(a, b) or (a, b) in row comparisons + defp to_sql_prec(%RowExpr{elements: elements}) do + values = Enum.map_join(elements, ", ", &to_sql/1) + {"ROW(#{values})", @prec_atom} + end + + # Array literals + defp to_sql_prec(%Array{elements: elements}) do + values = Enum.map_join(elements, ", ", &to_sql/1) + {"ARRAY[#{values}]", @prec_atom} + end + + # Catch-all — fail loudly so unsupported operators are caught at shape + # creation time, not at query time. + defp to_sql_prec(other) do + raise ArgumentError, + "SqlGenerator.to_sql/1: unsupported AST node: #{inspect(other)}. " <> + "This WHERE clause contains an operator or expression type that " <> + "cannot be converted back to SQL for active_conditions generation." + end + + # --- Precedence helpers --- + + # Binary operator, left-associative: left child at prec, right child at prec+1 + defp binary_op(left, op, right, prec) do + {"#{wrap(left, prec)} #{op} #{wrap(right, prec + 1)}", prec} + end + + # Binary operator, right-associative: left child at prec+1, right child at prec + defp binary_op_right(left, op, right, prec) do + {"#{wrap(left, prec + 1)} #{op} #{wrap(right, prec)}", prec} + end + + # Prefix unary operator: operand at same prec (same-level nesting is fine) + defp prefix_op(op, operand, prec) do + {"#{op} #{wrap(operand, prec)}", prec} + end + + # Postfix unary operator: operand must be strictly higher precedence to avoid + # ambiguity (e.g. `x IS DISTINCT FROM y IS NULL` is ambiguous) + defp postfix_op(operand, op, prec) do + {"#{wrap(operand, prec + 1)} #{op}", prec} + end + + # Wrap an AST node in parens if its precedence is lower than the context + defp wrap(ast, context_prec) do + {sql, prec} = to_sql_prec(ast) + if prec < context_prec, do: "(#{sql})", else: sql + end + + # --- Unchanged helpers --- + + # Helper for rendering constant-folded array elements (plain Elixir values, not AST nodes) + defp const_list_element_to_sql(nil), do: "NULL" + defp const_list_element_to_sql(true), do: "true" + defp const_list_element_to_sql(false), do: "false" + + defp const_list_element_to_sql(value) when is_binary(value) do + escaped = String.replace(value, "'", "''") + "'#{escaped}'" + end + + defp const_list_element_to_sql(value) when is_integer(value) or is_float(value), + do: "#{value}" + + defp const_list_element_to_sql(value) when is_list(value) do + elements = Enum.map_join(value, ", ", &const_list_element_to_sql/1) + "ARRAY[#{elements}]" + end + + # Helper for ANY/ALL: extract the operator, left operand, and array right operand + # from a Func with map_over_array_in_pos set + defp extract_mapped_operator(%Func{name: name, args: [left, right]}) do + op_sql = + case name do + ~s|"="| -> "=" + ~s|"<>"| -> "<>" + ~s|"<"| -> "<" + ~s|">"| -> ">" + ~s|"<="| -> "<=" + ~s|">="| -> ">=" + ~s|"~~"| -> "LIKE" + ~s|"~~*"| -> "ILIKE" + other -> String.trim(other, "\"") + end + + {op_sql, left, right} + end +end diff --git a/packages/sync-service/lib/electric/replication/shape_log_collector.ex b/packages/sync-service/lib/electric/replication/shape_log_collector.ex index 39758f69c3..754f3e077f 100644 --- a/packages/sync-service/lib/electric/replication/shape_log_collector.ex +++ b/packages/sync-service/lib/electric/replication/shape_log_collector.ex @@ -26,7 +26,7 @@ defmodule Electric.Replication.ShapeLogCollector do alias Electric.Replication.Changes.Relation alias Electric.Replication.Changes.TransactionFragment alias Electric.Replication.LogOffset - alias Electric.Shapes.Consumer.Materializer + alias Electric.Shapes.DependencyLayers alias Electric.Shapes.EventRouter alias Electric.Shapes.Partitions @@ -244,7 +244,6 @@ defmodule Electric.Replication.ShapeLogCollector do pids_by_shape_handle: %{}, event_router: opts - |> Map.put(:refs_fun, &Materializer.get_all_as_refs(&1, stack_id)) |> Keyword.new() |> EventRouter.new(), flush_tracker: diff --git a/packages/sync-service/lib/electric/shape_cache/storage.ex b/packages/sync-service/lib/electric/shape_cache/storage.ex index b9553972a4..065a9e62c6 100644 --- a/packages/sync-service/lib/electric/shape_cache/storage.ex +++ b/packages/sync-service/lib/electric/shape_cache/storage.ex @@ -88,9 +88,12 @@ defmodule Electric.ShapeCache.Storage do @doc """ Write a move in snapshot to the storage. Should write it alongside the main log, with stiching being done via a separate call `append_move_in_snapshot_to_log!`. + + The stream items are `[key, tags, json]`, where `tags` is the row tag list + and `json` is the encoded log item body. """ @callback write_move_in_snapshot!( - Enumerable.t({key :: String.t(), value :: Querying.json_iodata()}), + Enumerable.t(row()), name :: String.t(), shape_opts() ) :: :ok diff --git a/packages/sync-service/lib/electric/shapes/api/params.ex b/packages/sync-service/lib/electric/shapes/api/params.ex index e4a94f74bf..c1eefbefdd 100644 --- a/packages/sync-service/lib/electric/shapes/api/params.ex +++ b/packages/sync-service/lib/electric/shapes/api/params.ex @@ -3,6 +3,7 @@ defmodule Electric.Shapes.Api.Params do alias Electric.Replication.LogOffset alias Electric.Shapes.Api + alias Electric.Shapes.DnfPlan alias Electric.Shapes.Shape import Ecto.Changeset @@ -330,7 +331,13 @@ defmodule Electric.Shapes.Api.Params do log_mode: fetch_field!(changeset, :log) ) do {:ok, shape} -> - put_change(changeset, :shape_definition, shape) + case DnfPlan.compile(shape) do + {:error, reason} -> + add_error(changeset, :where, reason) + + _ok -> + put_change(changeset, :shape_definition, shape) + end {:error, :connection_not_available} -> add_error( diff --git a/packages/sync-service/lib/electric/shapes/consumer.ex b/packages/sync-service/lib/electric/shapes/consumer.ex index 2917bd57c0..39f583f1d2 100644 --- a/packages/sync-service/lib/electric/shapes/consumer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer.ex @@ -1,11 +1,12 @@ defmodule Electric.Shapes.Consumer do use GenServer, restart: :temporary - alias Electric.Shapes.Consumer.ChangeHandling + alias Electric.Shapes.Consumer.EventHandler + alias Electric.Shapes.Consumer.EventHandlerBuilder + alias Electric.Shapes.Consumer.Effects alias Electric.Shapes.Consumer.InitialSnapshot - alias Electric.Shapes.Consumer.MoveHandling - alias Electric.Shapes.Consumer.MoveIns alias Electric.Shapes.Consumer.PendingTxn + alias Electric.Shapes.Consumer.SetupEffects alias Electric.Shapes.Consumer.State import Electric.Shapes.Consumer.State, only: :macros @@ -16,6 +17,7 @@ defmodule Electric.Shapes.Consumer do alias Electric.Shapes.Consumer.Materializer alias Electric.Shapes.ConsumerRegistry alias Electric.LogItems + alias Electric.Postgres.Inspector alias Electric.Replication.Changes alias Electric.Replication.Changes.Transaction @@ -132,6 +134,32 @@ defmodule Electric.Shapes.Consumer do end @impl GenServer + def handle_continue({:init_consumer, config}, state) do + %{ + stack_id: stack_id, + shape_handle: shape_handle + } = state + + {:ok, shape} = ShapeCache.ShapeStatus.fetch_shape_by_handle(stack_id, shape_handle) + + state = State.initialize_shape(state, shape, config) + + stack_storage = ShapeCache.Storage.for_stack(stack_id) + storage = ShapeCache.Storage.for_shape(shape_handle, stack_storage) + + # TODO: Remove. Only needed for InMemoryStorage + case ShapeCache.Storage.start_link(storage) do + {:ok, _pid} -> :ok + :ignore -> :ok + end + + writer = ShapeCache.Storage.init_writer!(storage, shape) + + state = State.initialize(state, storage, writer) + + finish_initialization(state, config.action, config.otel_ctx) + end + def handle_continue(:stop_and_clean, state) do stop_and_clean(state) end @@ -243,31 +271,7 @@ defmodule Electric.Shapes.Consumer do state = State.initialize(state, storage, writer) - if all_materializers_alive?(state) && subscribe(state, opts.action) do - Logger.debug("Writer for #{shape_handle} initialized") - - # We start the snapshotter even if there's a snapshot because it also performs the call - # to PublicationManager.add_shape/3. We *could* do that call here and avoid spawning a - # process if the shape already has a snapshot but the current semantics rely on being able - # to wait for the snapshot asynchronously and if we called publication manager here it would - # block and prevent await_snapshot_start calls from adding snapshot subscribers. - - {:ok, _pid} = - Shapes.DynamicConsumerSupervisor.start_snapshotter( - stack_id, - %{ - stack_id: stack_id, - shape: shape, - shape_handle: shape_handle, - storage: storage, - otel_ctx: Map.get(opts, :otel_ctx, nil) - } - ) - - {:noreply, state} - else - stop_and_clean(state) - end + finish_initialization(state, opts.action, Map.get(opts, :otel_ctx, nil)) end def handle_info({ShapeCache.Storage, :flushed, flushed_offset}, state) do @@ -287,6 +291,16 @@ defmodule Electric.Shapes.Consumer do {:noreply, state, state.hibernate_after} end + def handle_info({:global_last_seen_lsn, _lsn} = event, state) do + case handle_event(event, state) do + %{terminating?: true} = state -> + {:noreply, state, {:continue, :stop_and_clean}} + + state -> + {:noreply, state, state.hibernate_after} + end + end + # This is part of the storage module contract - messages tagged storage should be applied to the writer state. def handle_info({ShapeCache.Storage, message}, state) do writer = ShapeCache.Storage.apply_message(state.writer, message) @@ -301,63 +315,38 @@ defmodule Electric.Shapes.Consumer do "Consumer reacting to #{length(move_in)} move ins and #{length(move_out)} move outs from its #{dep_handle} dependency" end) - feature_flags = Electric.StackConfig.lookup(state.stack_id, :feature_flags, []) - tagged_subqueries_enabled? = "tagged_subqueries" in feature_flags - - # We need to invalidate the consumer in the following cases: - # - tagged subqueries are disabled since we cannot support causally correct event processing of 3+ level dependency trees - # so we just invalidating this middle shape instead - # - the where clause has an OR combined with the subquery so we can't tell if the move ins/outs actually affect the shape or not - # - the where clause has a NOT combined with the subquery (e.g. NOT IN) since move-in to the subquery - # should cause move-out from the outer shape, which isn't implemented - # - the shape has multiple subqueries at the same level since we can't correctly determine - # which dependency caused the move-in/out - should_invalidate? = - not tagged_subqueries_enabled? or state.or_with_subquery? or state.not_with_subquery? or - length(state.shape.shape_dependencies) > 1 - - if should_invalidate? do - stop_and_clean(state) - else - {state, notification} = - state - |> MoveHandling.process_move_ins(dep_handle, move_in) - |> MoveHandling.process_move_outs(dep_handle, move_out) - - :ok = notify_new_changes(state, notification) - - {:noreply, state} - end + handle_apply_event_result( + state, + apply_event( + state, + {:materializer_changes, dep_handle, %{move_in: move_in, move_out: move_out}} + ) + ) end - def handle_info({:pg_snapshot_known, name, snapshot}, state) do - Logger.debug(fn -> "Snapshot known for move-in #{name}" end) - - # Update the snapshot in waiting_move_ins - move_handling_state = MoveIns.set_snapshot(state.move_handling_state, name, snapshot) - - # Garbage collect touches visible in all known snapshots - state = %{state | move_handling_state: move_handling_state} - state = State.gc_touch_tracker(state) - - {:noreply, state, state.hibernate_after} + def handle_info({:pg_snapshot_known, snapshot}, state) do + Logger.debug(fn -> "Snapshot known for active move-in" end) + handle_apply_event_result(state, apply_event(state, {:pg_snapshot_known, snapshot})) end - def handle_info({:query_move_in_complete, name, key_set, snapshot}, state) do + def handle_info( + {:query_move_in_complete, snapshot_name, row_count, row_bytes, move_in_lsn}, + state + ) do Logger.debug(fn -> - "Consumer query move in complete for #{name} with #{length(key_set)} keys" + "Consumer query move in complete for #{state.shape_handle} with #{row_count} rows from #{snapshot_name} (#{row_bytes} bytes)" end) - {state, notification} = MoveHandling.query_complete(state, name, key_set, snapshot) - :ok = notify_new_changes(state, notification) - - # Garbage collect touches after query completes (no buffer consumption needed) - state = State.gc_touch_tracker(state) - - {:noreply, state, state.hibernate_after} + handle_apply_event_result( + state, + apply_event( + state, + {:query_move_in_complete, snapshot_name, row_count, row_bytes, move_in_lsn} + ) + ) end - def handle_info({:query_move_in_error, _, error, stacktrace}, state) do + def handle_info({:query_move_in_error, error, stacktrace}, state) do Logger.error( "Error querying move in for #{state.shape_handle}: #{Exception.format(:error, error, stacktrace)}" ) @@ -386,8 +375,6 @@ defmodule Electric.Shapes.Consumer do "Materializer down for a dependency: #{handle} (#{inspect(pid)}) (#{inspect(reason)})" ) - Materializer.delete_link_values(state.stack_id, handle) - handle_materializer_down(reason, state) end @@ -447,11 +434,6 @@ defmodule Electric.Shapes.Consumer do end end) - # Clean up this shape's link-values ETS entry. This consumer may itself be - # a dep shape; removing the entry prevents stale cached values from persisting - # after shutdown. - Materializer.delete_link_values(state.stack_id, state.shape_handle) - # always need to terminate writer to remove the writer ets (which belongs # to this process). leads to unecessary writes in the case of a deleted # shape but the alternative is leaking ets tables. @@ -482,6 +464,20 @@ defmodule Electric.Shapes.Consumer do |> mark_for_removal() end + defp handle_event({:global_last_seen_lsn, _lsn} = event, state) do + case apply_event(state, event) do + {:error, reason} -> + handle_event_error(state, reason) + + {state, notification, _num_changes, _total_size} -> + if notification do + :ok = notify_new_changes(state, notification) + end + + state + end + end + defp handle_event(%TransactionFragment{} = txn_fragment, state) do Logger.debug(fn -> "Txn fragment received in Shapes.Consumer: #{inspect(txn_fragment)}" end) handle_txn_fragment(txn_fragment, state) @@ -667,7 +663,7 @@ defmodule Electric.Shapes.Consumer do end end - defp convert_fragment_changes(changes, stack_id, shape_handle, shape) do + defp convert_fragment_changes(changes, stack_id, shape_handle, shape, extra_refs \\ nil) do Enum.reduce_while(changes, {[], 0}, fn %Changes.TruncatedRelation{}, _acc -> {:halt, :includes_truncate} @@ -676,7 +672,11 @@ defmodule Electric.Shapes.Consumer do # Apply Shape.convert_change to each change to: # 1. Filter out changes not matching the shape's table # 2. Apply WHERE clause filtering - case Shape.convert_change(shape, change, stack_id: stack_id, shape_handle: shape_handle) do + case Shape.convert_change(shape, change, + stack_id: stack_id, + shape_handle: shape_handle, + extra_refs: extra_refs + ) do [] -> {:cont, acc} @@ -796,77 +796,96 @@ defmodule Electric.Shapes.Consumer do ) end - defp do_handle_txn(%Transaction{xid: xid, changes: changes} = txn, state) do - %{shape: shape, writer: writer} = state + defp do_handle_txn(%Transaction{} = txn, state) do + timestamp = System.monotonic_time() - state = State.remove_completed_move_ins(state, txn) + case apply_event(state, txn) do + {:error, reason} -> + handle_event_error(state, reason) - extra_refs_full = - Materializer.get_all_as_refs(shape, state.stack_id) + {state, notification, num_changes, total_size} -> + if notification do + :ok = notify_new_changes(state, notification) - extra_refs_before_move_ins = - Enum.reduce(state.move_handling_state.in_flight_values, extra_refs_full, fn {key, value}, - acc -> - if is_map_key(acc, key), - do: Map.update!(acc, key, &MapSet.difference(&1, value)), - else: acc - end) + OpenTelemetry.add_span_attributes(%{ + num_bytes: total_size, + actual_num_changes: num_changes + }) - Logger.debug(fn -> "Extra refs: #{inspect(extra_refs_before_move_ins)}" end) + lag = calculate_replication_lag(txn.commit_timestamp) + OpenTelemetry.add_span_attributes(replication_lag: lag) - case ChangeHandling.process_changes( - changes, - state, - %{xid: xid, extra_refs: {extra_refs_before_move_ins, extra_refs_full}} - ) do - :includes_truncate -> - handle_txn_with_truncate(txn.xid, state) + Electric.Telemetry.OpenTelemetry.execute( + [:electric, :storage, :transaction_stored], + %{ + duration: System.monotonic_time() - timestamp, + bytes: total_size, + count: 1, + operations: num_changes, + replication_lag: lag + }, + Map.new(State.telemetry_attrs(state)) + ) - {_, state, 0, _} -> - Logger.debug(fn -> - "No relevant changes found for #{inspect(shape)} in txn #{txn.xid}" - end) + state + else + state + end + end + end - consider_flushed(state, txn.last_log_offset) + defp handle_apply_event_result(state, {:error, reason}) do + state = handle_event_error(state, reason) - {changes, state, num_changes, last_log_offset} -> - timestamp = System.monotonic_time() + if state.terminating? do + {:noreply, state, {:continue, :stop_and_clean}} + else + stop_and_clean(state) + end + end - {lines, total_size} = prepare_log_entries(changes, xid, shape) - writer = ShapeCache.Storage.append_to_log!(lines, writer) + defp handle_apply_event_result(_old_state, {state, notification, _num_changes, _total_size}) do + if notification do + :ok = notify_new_changes(state, notification) + end - OpenTelemetry.add_span_attributes(%{ - num_bytes: total_size, - actual_num_changes: num_changes - }) + {:noreply, state, state.hibernate_after} + end - :ok = notify_new_changes(state, changes, last_log_offset) + defp apply_event(state, event) do + case EventHandler.handle_event(state.event_handler, event) do + {:error, reason} -> + {:error, reason} - lag = calculate_replication_lag(txn.commit_timestamp) - OpenTelemetry.add_span_attributes(replication_lag: lag) + {:ok, new_handler, effects} -> + state = %{state | event_handler: new_handler} + previous_offset = state.latest_offset - Electric.Telemetry.OpenTelemetry.execute( - [:electric, :storage, :transaction_stored], - %{ - duration: System.monotonic_time() - timestamp, - bytes: total_size, - count: 1, - operations: num_changes, - replication_lag: lag - }, - Map.new(State.telemetry_attrs(state)) - ) + result = Effects.execute(effects, state) - %{ - state - | writer: writer, - latest_offset: last_log_offset, - txn_offset_mapping: - state.txn_offset_mapping ++ [{last_log_offset, txn.last_log_offset}] - } + notification = + if result.state.latest_offset != previous_offset do + {{previous_offset, result.state.latest_offset}, result.state.latest_offset} + end + + {result.state, notification, result.num_changes, result.total_size} end end + defp handle_event_error(state, {:truncate, xid}) do + handle_txn_with_truncate(xid, state) + end + + defp handle_event_error(state, :unsupported_subquery) do + mark_for_removal(state) + end + + defp handle_event_error(state, :buffer_overflow) do + Logger.warning("Subquery buffer overflow for #{state.shape_handle} - terminating shape") + + mark_for_removal(state) + end + defp handle_txn_with_truncate(xid, state) do # TODO: This is a very naive way to handle truncations: if ANY relevant truncates are # present in the transaction, we're considering the whole transaction empty, and @@ -1043,17 +1062,47 @@ defmodule Electric.Shapes.Consumer do defp more_recent_offset(offset, nil), do: offset defp more_recent_offset(offset1, offset2), do: LogOffset.max(offset1, offset2) - defp subscribe(state, action) do - case ShapeLogCollector.add_shape(state.stack_id, state.shape_handle, state.shape, action) do - :ok -> - true - - {:error, error} -> - Logger.warning( - "Shape #{state.shape_handle} cannot subscribe due to #{inspect(error)} - invalidating shape" - ) + defp initialize_event_handler(%State{} = state, action) do + with {:ok, handler, setup_effects} <- EventHandlerBuilder.build(state, action), + {:ok, state} <- SetupEffects.execute(setup_effects, %{state | event_handler: handler}) do + {:ok, state} + else + {:error, %State{} = state} -> + {:error, state} + end + end - false + defp finish_initialization(%State{} = state, action, otel_ctx) do + if all_materializers_alive?(state) do + case initialize_event_handler(state, action) do + {:ok, state} -> + Logger.debug("Writer for #{state.shape_handle} initialized") + + # We start the snapshotter even if there's a snapshot because it also performs the call + # to PublicationManager.add_shape/3. We *could* do that call here and avoid spawning a + # process if the shape already has a snapshot but the current semantics rely on being able + # to wait for the snapshot asynchronously and if we called publication manager here it would + # block and prevent await_snapshot_start calls from adding snapshot subscribers. + + {:ok, _pid} = + Shapes.DynamicConsumerSupervisor.start_snapshotter( + state.stack_id, + %{ + stack_id: state.stack_id, + shape: state.shape, + shape_handle: state.shape_handle, + storage: state.storage, + otel_ctx: otel_ctx + } + ) + + {:noreply, state} + + {:error, state} -> + stop_and_clean(state) + end + else + stop_and_clean(state) end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/change_handling.ex b/packages/sync-service/lib/electric/shapes/consumer/change_handling.ex deleted file mode 100644 index c7c81ed8d5..0000000000 --- a/packages/sync-service/lib/electric/shapes/consumer/change_handling.ex +++ /dev/null @@ -1,144 +0,0 @@ -defmodule Electric.Shapes.Consumer.ChangeHandling do - alias Electric.Shapes.Consumer.MoveIns - alias Electric.Replication.Eval.Runner - alias Electric.Shapes.Shape - alias Electric.Shapes.WhereClause - alias Electric.Replication.LogOffset - alias Electric.LogItems - alias Electric.Shapes.Consumer.State - alias Electric.Shapes.Consumer - alias Electric.Replication.Changes - - require Electric.Shapes.Shape - - @spec process_changes(list(Changes.change()), State.t(), context) :: - {filtered_changes :: list(Changes.change()), state :: State.t(), - count :: non_neg_integer(), last_log_offset :: LogOffset.t() | nil} - | :includes_truncate - when context: map() - def process_changes(changes, state, ctx) - when is_map_key(ctx, :xid) do - do_process_changes(changes, state, ctx, [], 0) - end - - def do_process_changes(changes, state, ctx, acc, count) - - def do_process_changes([], state, _, _, 0), do: {[], state, 0, nil} - - def do_process_changes([], state, _, [head | tail], total_ops), - do: - {Enum.reverse([%{head | last?: true} | tail]), state, total_ops, - LogItems.expected_offset_after_split(head)} - - def do_process_changes([%Changes.TruncatedRelation{} | _], _, _, _, _), do: :includes_truncate - - # We're special casing processing without dependencies, as it's very common so we can optimize it. - def do_process_changes([change | rest], %State{shape: shape} = state, ctx, acc, count) - when not Shape.has_dependencies(shape) do - case Shape.convert_change(shape, change, - stack_id: state.stack_id, - shape_handle: state.shape_handle, - extra_refs: ctx.extra_refs - ) do - [] -> - do_process_changes(rest, state, ctx, acc, count) - - [change] -> - state = State.track_change(state, ctx.xid, change) - do_process_changes(rest, state, ctx, [change | acc], count + 1) - end - end - - def do_process_changes( - [change | rest], - %State{shape: shape, stack_id: stack_id, shape_handle: shape_handle} = state, - ctx, - acc, - count - ) do - if not change_visible_in_resolved_move_ins?(change, state, ctx) and - not change_will_be_covered_by_move_in?(change, state, ctx) do - case Shape.convert_change(shape, change, - stack_id: stack_id, - shape_handle: shape_handle, - extra_refs: ctx.extra_refs - ) do - [] -> - do_process_changes(rest, state, ctx, acc, count) - - [change] -> - state = State.track_change(state, ctx.xid, change) - do_process_changes(rest, state, ctx, [change | acc], count + 1) - end - else - do_process_changes(rest, state, ctx, acc, count) - end - end - - defp change_visible_in_resolved_move_ins?(change, state, ctx) do - Consumer.MoveIns.change_already_visible?(state.move_handling_state, ctx.xid, change) - end - - defp change_will_be_covered_by_move_in?(%Changes.DeletedRecord{}, _, _), do: false - - defp change_will_be_covered_by_move_in?(change, state, ctx) do - # First check if the new record's sublink values are in pending move-ins - referenced_values = get_referenced_values(change, state) - - if change_visible_in_unresolved_move_ins_for_values?(referenced_values, state, ctx) do - # For UpdatedRecords where the sublink value changed, we must NOT skip the change. - # The move-in query will return this row as an INSERT, which doesn't carry - # removed_move_tags. Without the tag transition from the WAL change, the client - # will retain the old tag, causing the row to not be properly cleaned up on - # subsequent move-outs. - if is_struct(change, Changes.UpdatedRecord) and - sublink_value_changed?(change, state) do - false - else - # Even if the sublink value is in a pending move-in, we should only skip - # this change if the new record actually matches the full WHERE clause. - # The move-in query uses the full WHERE clause, so if the record doesn't - # match other non-subquery conditions in the WHERE clause, the move-in - # won't return this row and we need to process this change normally. - case ctx.extra_refs do - {_extra_refs_old, extra_refs_new} -> - WhereClause.includes_record?(state.shape.where, change.record, extra_refs_new) - - _ -> - # If extra_refs is not a tuple (e.g., empty map in tests), fall back to - # the old behavior of skipping the change - true - end - end - else - false - end - end - - defp sublink_value_changed?( - %Changes.UpdatedRecord{record: new_record, old_record: old_record}, - state - ) do - Enum.any?(state.shape.subquery_comparison_expressions, fn {_path, expr} -> - {:ok, new_value} = Runner.execute_for_record(expr, new_record) - {:ok, old_value} = Runner.execute_for_record(expr, old_record) - new_value != old_value - end) - end - - defp get_referenced_values(change, state) do - state.shape.subquery_comparison_expressions - |> Map.new(fn {path, expr} -> - {:ok, value} = Runner.execute_for_record(expr, change.record) - {path, value} - end) - end - - defp change_visible_in_unresolved_move_ins_for_values?(referenced_values, state, ctx) do - MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state.move_handling_state, - referenced_values, - ctx.xid - ) - end -end diff --git a/packages/sync-service/lib/electric/shapes/consumer/effect_list.ex b/packages/sync-service/lib/electric/shapes/consumer/effect_list.ex new file mode 100644 index 0000000000..d1cd126ca7 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/effect_list.ex @@ -0,0 +1,19 @@ +defmodule Electric.Shapes.Consumer.EffectList do + @moduledoc false + + alias Electric.Shapes.Consumer.Effects + + @opaque t() :: [Effects.t()] + + @spec new([Effects.t()]) :: t() + def new(effects \\ []) when is_list(effects), do: Enum.reverse(effects) + + @spec append(t(), Effects.t()) :: t() + def append(acc, %_{} = effect), do: [effect | acc] + + @spec append_all(t(), [Effects.t()]) :: t() + def append_all(acc, effects) when is_list(effects), do: Enum.reverse(effects, acc) + + @spec to_list(t()) :: [Effects.t()] + def to_list(acc), do: Enum.reverse(acc) +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/effects.ex b/packages/sync-service/lib/electric/shapes/consumer/effects.ex new file mode 100644 index 0000000000..ba362420c6 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/effects.ex @@ -0,0 +1,365 @@ +defmodule Electric.Shapes.Consumer.Effects do + @moduledoc false + # These are the runtime effects emitted by event handlers after functional + # event processing. Consumer bootstrapping and other imperative setup steps + # live in SetupEffects rather than being mixed into this runtime effect layer. + + alias Electric.Connection.Manager + alias Electric.Postgres.SnapshotQuery + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + alias Electric.ShapeCache.Storage + alias Electric.LogItems + alias Electric.Replication.LogOffset + alias Electric.ShapeCache + alias Electric.Shapes.Querying + alias Electric.Shapes.Shape + + require Logger + + defmodule AppendChanges do + @moduledoc false + defstruct changes: [], xid: nil + end + + defmodule AppendControl do + @moduledoc false + defstruct message: nil + end + + defmodule AppendMoveInSnapshot do + @moduledoc false + defstruct [:snapshot_name, :snapshot, row_count: 0, row_bytes: 0] + end + + defmodule NotifyFlushed do + @moduledoc false + defstruct [:log_offset] + end + + defmodule StartMoveInQuery do + @moduledoc false + defstruct [:dnf_plan, :trigger_dep_index, :values, :views_before_move, :views_after_move] + + @type t() :: %__MODULE__{ + dnf_plan: Electric.Shapes.DnfPlan.t(), + trigger_dep_index: non_neg_integer(), + values: list(), + views_before_move: Electric.Shapes.Consumer.Subqueries.Views.t(), + views_after_move: Electric.Shapes.Consumer.Subqueries.Views.t() + } + end + + defmodule SubscribeGlobalLsn do + @moduledoc false + defstruct [] + end + + defmodule UnsubscribeGlobalLsn do + @moduledoc false + defstruct [] + end + + defmodule AddToSubqueryIndex do + @moduledoc false + defstruct [:dep_index, :subquery_ref, :values] + end + + defmodule RemoveFromSubqueryIndex do + @moduledoc false + defstruct [:dep_index, :subquery_ref, :values] + end + + @type t() :: + %AppendChanges{} + | %AppendControl{} + | %AppendMoveInSnapshot{} + | %NotifyFlushed{} + | %StartMoveInQuery{} + | %SubscribeGlobalLsn{} + | %UnsubscribeGlobalLsn{} + | %AddToSubqueryIndex{} + | %RemoveFromSubqueryIndex{} + + @type execution_result() :: %{ + state: term(), + num_changes: non_neg_integer(), + total_size: non_neg_integer(), + pending_written_offset: LogOffset.t() | nil + } + + @spec execute([t()], term(), keyword()) :: execution_result() + def execute(effects, state, _opts \\ []) when is_list(effects) do + Enum.reduce( + effects, + %{state: state, num_changes: 0, total_size: 0, pending_written_offset: nil}, + fn effect, acc -> + execute_effect(effect, acc) + end + ) + end + + defp execute_effect(%AppendChanges{changes: [], xid: _}, acc), do: acc + + defp execute_effect(%AppendChanges{changes: changes, xid: xid}, acc) do + state = acc.state + + {lines, total_size, state} = + Enum.reduce(changes, {[], 0, state}, fn change, {lines, size, state} -> + {new_lines, line_size} = change_to_log_lines(change, xid, state.shape) + last_offset = new_lines |> List.last() |> elem(0) + {lines ++ new_lines, size + line_size, %{state | latest_offset: last_offset}} + end) + + writer = ShapeCache.Storage.append_to_log!(lines, state.writer) + state = %{state | writer: writer} + + %{ + acc + | state: state, + num_changes: acc.num_changes + length(lines), + total_size: acc.total_size + total_size, + pending_written_offset: state.latest_offset + } + end + + defp execute_effect(%AppendControl{message: message}, acc) do + state = acc.state + encoded = Jason.encode!(message) + + {{_, offset}, writer} = + ShapeCache.Storage.append_control_message!(encoded, state.writer) + + state = %{state | writer: writer, latest_offset: offset} + + %{ + acc + | state: state, + num_changes: acc.num_changes + 1, + total_size: acc.total_size + byte_size(encoded), + pending_written_offset: state.latest_offset + } + end + + defp execute_effect( + %AppendMoveInSnapshot{ + snapshot_name: snapshot_name, + row_count: row_count, + row_bytes: row_bytes, + snapshot: snapshot + }, + acc + ) do + state = acc.state + + {{_, inserted_offset}, writer} = + ShapeCache.Storage.append_move_in_snapshot_to_log!( + snapshot_name, + state.writer, + fn _, _ -> false end + ) + + state = %{state | writer: writer, latest_offset: inserted_offset} + + if row_count == 0 do + %{acc | state: state} + else + snapshot_end = + snapshot + |> snapshot_end_message() + |> Jason.encode!() + + {{_, offset}, writer} = + ShapeCache.Storage.append_control_message!(snapshot_end, state.writer) + + state = %{state | writer: writer, latest_offset: offset} + + %{ + acc + | state: state, + num_changes: acc.num_changes + row_count + 1, + total_size: acc.total_size + row_bytes + byte_size(snapshot_end), + pending_written_offset: state.latest_offset + } + end + end + + defp execute_effect(%NotifyFlushed{log_offset: log_offset}, acc) do + state = acc.state + + state = + if acc.pending_written_offset do + %{ + state + | txn_offset_mapping: + state.txn_offset_mapping ++ [{acc.pending_written_offset, log_offset}] + } + else + consider_flushed(state, log_offset) + end + + %{acc | state: state, pending_written_offset: nil} + end + + defp execute_effect(%StartMoveInQuery{} = effect, acc) do + state = acc.state + supervisor = Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) + query_move_in_async(supervisor, state, effect, self()) + acc + end + + defp execute_effect(%SubscribeGlobalLsn{}, acc) do + {:ok, _} = Electric.LsnTracker.subscribe_to_global_lsn_updates(acc.state.stack_id) + + acc + end + + defp execute_effect(%UnsubscribeGlobalLsn{}, acc) do + :ok = Electric.LsnTracker.unsubscribe_from_global_lsn_updates(acc.state.stack_id) + acc + end + + defp execute_effect(%AddToSubqueryIndex{} = effect, acc) do + update_subquery_index(acc, effect.dep_index, effect.subquery_ref, effect.values, :add) + end + + defp execute_effect(%RemoveFromSubqueryIndex{} = effect, acc) do + update_subquery_index(acc, effect.dep_index, effect.subquery_ref, effect.values, :remove) + end + + defp update_subquery_index(acc, dep_index, subquery_ref, values, op) do + state = acc.state + index = SubqueryIndex.for_stack(state.stack_id) + fun = if op == :add, do: &SubqueryIndex.add_value/5, else: &SubqueryIndex.remove_value/5 + + for {value, _original} <- values do + fun.(index, state.shape_handle, subquery_ref, dep_index, value) + end + + acc + end + + @spec query_move_in_async(pid() | atom(), map(), StartMoveInQuery.t(), pid()) :: :ok + def query_move_in_async( + supervisor, + consumer_state, + %StartMoveInQuery{} = request, + consumer_pid + ) do + {where, params} = + Querying.move_in_where_clause( + request.dnf_plan, + request.trigger_dep_index, + request.views_before_move, + request.views_after_move, + consumer_state.shape.where.used_refs + ) + + pool = Manager.pool_name(consumer_state.stack_id, :snapshot) + stack_id = consumer_state.stack_id + shape = consumer_state.shape + shape_handle = consumer_state.shape_handle + + :telemetry.execute([:electric, :subqueries, :move_in_triggered], %{count: 1}, %{ + stack_id: stack_id + }) + + Task.Supervisor.start_child(supervisor, fn -> + snapshot_name = Electric.Utils.uuid4() + + try do + SnapshotQuery.execute_for_shape(pool, shape_handle, shape, + stack_id: stack_id, + query_reason: "move_in_query", + snapshot_info_fn: fn _, pg_snapshot, _lsn -> + send(consumer_pid, {:pg_snapshot_known, pg_snapshot}) + end, + query_fn: fn conn, _pg_snapshot, lsn -> + task_pid = self() + + Querying.query_move_in(conn, stack_id, shape_handle, shape, {where, params}, + dnf_plan: request.dnf_plan, + views: request.views_after_move + ) + |> Stream.transform( + fn -> {0, 0} end, + fn [_, _, json] = row, {row_count, row_bytes} -> + {[row], {row_count + 1, row_bytes + IO.iodata_length(json)}} + end, + fn {row_count, row_bytes} -> + send(task_pid, {:move_in_snapshot_stats, row_count, row_bytes}) + end + ) + |> Storage.write_move_in_snapshot!(snapshot_name, consumer_state.storage) + + {row_count, row_bytes} = + receive do + {:move_in_snapshot_stats, row_count, row_bytes} -> {row_count, row_bytes} + end + + send( + consumer_pid, + {:query_move_in_complete, snapshot_name, row_count, row_bytes, lsn} + ) + end + ) + rescue + error -> + send(consumer_pid, {:query_move_in_error, error, __STACKTRACE__}) + end + end) + + :ok + end + + defp consider_flushed(state, log_offset) do + alias Electric.Replication.ShapeLogCollector + + if state.txn_offset_mapping == [] do + ShapeLogCollector.notify_flushed(state.stack_id, state.shape_handle, log_offset) + state + else + new_boundary = log_offset + + {head, tail} = + Enum.split_while( + state.txn_offset_mapping, + &(LogOffset.compare(elem(&1, 1), new_boundary) == :lt) + ) + + case Enum.reverse(head) do + [] -> + state + + [{offset, _} | rest] -> + %{state | txn_offset_mapping: Enum.reverse([{offset, new_boundary} | rest], tail)} + end + end + end + + defp change_to_log_lines(change, xid, shape) do + lines = + change + |> LogItems.from_change( + xid, + Shape.pk(shape, change.relation), + shape.replica + ) + |> Enum.map(fn {offset, %{key: key} = log_item} -> + {offset, key, log_item.headers.operation, Jason.encode!(log_item)} + end) + + size = Enum.reduce(lines, 0, fn {_, _, _, json}, acc -> acc + byte_size(json) end) + {lines, size} + end + + defp snapshot_end_message({xmin, xmax, xip_list}) do + %{ + headers: %{ + control: "snapshot-end", + xmin: to_string(xmin), + xmax: to_string(xmax), + xip_list: Enum.map(xip_list, &to_string/1) + } + } + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex new file mode 100644 index 0000000000..cb7f68af0c --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler.ex @@ -0,0 +1,18 @@ +defmodule Electric.Shapes.Consumer.EventHandler do + @moduledoc false + # Event handlers are the functional part of the consumer: they take handler + # state plus an event and return new handler state plus declarative runtime + # effects. Imperative startup/setup work is kept separate in SetupEffects. + + alias Electric.Shapes.Consumer.Effects + + @type t() :: term() + + @callback handle_event(t(), term()) :: + {:ok, t(), [Effects.t()]} | {:error, term()} + + @spec handle_event(t(), term()) :: {:ok, t(), [Effects.t()]} | {:error, term()} + def handle_event(handler, event) do + handler.__struct__.handle_event(handler, event) + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex new file mode 100644 index 0000000000..57ada249eb --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/default.ex @@ -0,0 +1,43 @@ +defmodule Electric.Shapes.Consumer.EventHandler.Default do + @moduledoc false + + @behaviour Electric.Shapes.Consumer.EventHandler + + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.EffectList + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.TransactionConverter + alias Electric.Shapes.Shape + + @enforce_keys [:shape, :stack_id, :shape_handle] + defstruct [:shape, :stack_id, :shape_handle] + + @type t() :: %__MODULE__{ + shape: Shape.t(), + stack_id: String.t(), + shape_handle: String.t() + } + + @impl true + def handle_event(state, %Transaction{} = txn) do + with {:ok, effects} <- + TransactionConverter.transaction_to_effects( + txn, + state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle + ) do + effects = + effects + |> EffectList.new() + |> EffectList.append(%Effects.NotifyFlushed{log_offset: txn.last_log_offset}) + |> EffectList.to_list() + + {:ok, state, effects} + end + end + + def handle_event(state, {:global_last_seen_lsn, _lsn}) do + {:ok, state, []} + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex new file mode 100644 index 0000000000..c3d7bf299a --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/buffering.ex @@ -0,0 +1,192 @@ +defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering do + # Handles events while a move-in is buffered and waiting to be spliced. + + @behaviour Electric.Shapes.Consumer.EventHandler + + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.EffectList + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Steady + alias Electric.Shapes.Consumer.Subqueries.ActiveMove + alias Electric.Shapes.Consumer.Subqueries.IndexChanges + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + alias Electric.Shapes.Consumer.Subqueries.RefResolver + alias Electric.Shapes.Consumer.Subqueries.ShapeInfo + alias Electric.Shapes.Consumer.Subqueries.SplicePlan + alias Electric.Shapes.Consumer.Subqueries.Views + + @enforce_keys [:shape_info, :queue, :active_move] + defstruct [:shape_info, :queue, :active_move] + + @type t() :: %__MODULE__{ + shape_info: ShapeInfo.t(), + queue: MoveQueue.t(), + active_move: ActiveMove.t() + } + + @spec start( + ShapeInfo.t(), + Views.t(), + MoveQueue.t(), + IndexChanges.move(), + [String.t()], + keyword() + ) :: {:ok, t(), [Effects.t()]} + def start( + %ShapeInfo{} = shape_info, + views, + %MoveQueue{} = queue, + {dep_move_kind, dep_index, values} = move, + subquery_ref, + opts \\ [] + ) + when is_map(views) do + state = %__MODULE__{ + shape_info: shape_info, + queue: queue, + active_move: + views + |> ActiveMove.start(dep_index, dep_move_kind, subquery_ref, values) + |> ActiveMove.carry_latest_seen_lsn(Keyword.get(opts, :latest_seen_lsn)) + } + + effects = + EffectList.new() + |> maybe_subscribe_global_lsn(Keyword.get(opts, :subscribe_global_lsn?, true)) + |> EffectList.append_all( + IndexChanges.effects_for_buffering(state.shape_info.dnf_plan, move, subquery_ref) + ) + |> EffectList.append(start_move_in_query_effect(state)) + |> EffectList.to_list() + + {:ok, state, effects} + end + + @impl true + def handle_event(%__MODULE__{} = state, %Transaction{} = txn) do + next_active_move = ActiveMove.buffer_txn(state.active_move, txn) + + if ActiveMove.buffered_txn_count(next_active_move) > state.shape_info.buffer_max_transactions do + {:error, :buffer_overflow} + else + state + |> Map.put(:active_move, next_active_move) + |> maybe_splice() + end + end + + def handle_event(%__MODULE__{} = state, {:global_last_seen_lsn, lsn}) do + next_active_move = ActiveMove.record_seen_lsn(state.active_move, Lsn.from_integer(lsn)) + + state + |> Map.put(:active_move, next_active_move) + |> maybe_splice() + end + + def handle_event(%__MODULE__{} = state, {:materializer_changes, dep_handle, payload}) do + subquery_ref = RefResolver.ref_from_dep_handle!(state.shape_info.ref_resolver, dep_handle) + dep_index = subquery_ref |> List.last() |> String.to_integer() + dep_view = Views.current(state.active_move.views_after_move, subquery_ref) + + {:ok, %{state | queue: MoveQueue.enqueue(state.queue, dep_index, payload, dep_view)}, []} + end + + def handle_event(%__MODULE__{} = state, {:pg_snapshot_known, snapshot}) do + state + |> Map.put(:active_move, ActiveMove.record_snapshot!(state.active_move, snapshot)) + |> maybe_splice() + end + + def handle_event( + %__MODULE__{} = state, + {:query_move_in_complete, snapshot_name, row_count, row_bytes, move_in_lsn} + ) do + state + |> Map.put( + :active_move, + ActiveMove.record_query_complete!( + state.active_move, + snapshot_name, + row_count, + row_bytes, + move_in_lsn + ) + ) + |> maybe_splice() + end + + defp maybe_splice(%__MODULE__{active_move: active_move} = state) do + if ActiveMove.ready_to_splice?(active_move) do + splice(state) + else + {:ok, state, []} + end + end + + defp splice(%{active_move: active_move} = state) do + with {:ok, splice_plan} <- SplicePlan.build(active_move, state.shape_info) do + index_effects = + IndexChanges.effects_for_complete( + state.shape_info.dnf_plan, + {active_move.dep_move_kind, active_move.dep_index, active_move.values}, + active_move.subquery_ref + ) + + steady_state = %Steady{ + shape_info: state.shape_info, + views: active_move.views_after_move, + queue: state.queue + } + + effects = + splice_plan.effects + |> EffectList.new() + |> EffectList.append_all(index_effects) + + case Steady.drain_queue( + steady_state, + effects, + subscription_active?: true, + latest_seen_lsn: active_move.latest_seen_lsn + ) do + {:ok, %Steady{} = next_state, effects} -> + effects = + effects + |> maybe_notify_flushed(splice_plan.flushed_log_offset) + |> EffectList.append(%Effects.UnsubscribeGlobalLsn{}) + + {:ok, next_state, EffectList.to_list(effects)} + + {:ok, %__MODULE__{} = next_state, effects} -> + effects = + effects + |> maybe_notify_flushed(splice_plan.flushed_log_offset) + + {:ok, next_state, EffectList.to_list(effects)} + end + end + end + + defp start_move_in_query_effect(%__MODULE__{shape_info: shape_info, active_move: active_move}) do + %Effects.StartMoveInQuery{ + dnf_plan: shape_info.dnf_plan, + trigger_dep_index: active_move.dep_index, + values: active_move.values, + views_before_move: active_move.views_before_move, + views_after_move: active_move.views_after_move + } + end + + defp maybe_subscribe_global_lsn(effects, true) do + EffectList.append(effects, %Effects.SubscribeGlobalLsn{}) + end + + defp maybe_subscribe_global_lsn(effects, false), do: effects + + defp maybe_notify_flushed(effects, nil), do: effects + + defp maybe_notify_flushed(effects, log_offset) do + EffectList.append(effects, %Effects.NotifyFlushed{log_offset: log_offset}) + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex new file mode 100644 index 0000000000..fa36c04641 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler/subqueries/steady.ex @@ -0,0 +1,156 @@ +defmodule Electric.Shapes.Consumer.EventHandler.Subqueries.Steady do + # Handles events while the handler is in its steady, non-buffering state. + + @behaviour Electric.Shapes.Consumer.EventHandler + + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.EffectList + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering + alias Electric.Shapes.Consumer.TransactionConverter + alias Electric.Shapes.Consumer.Subqueries.IndexChanges + alias Electric.Shapes.Consumer.Subqueries.MoveBroadcast + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + alias Electric.Shapes.Consumer.Subqueries.RefResolver + alias Electric.Shapes.Consumer.Subqueries.ShapeInfo + alias Electric.Shapes.Consumer.Subqueries.Views + + @enforce_keys [:shape_info, :views] + defstruct [:shape_info, :views, queue: MoveQueue.new()] + + @type t() :: %__MODULE__{ + shape_info: ShapeInfo.t(), + views: Views.t(), + queue: MoveQueue.t() + } + + @impl true + def handle_event(%__MODULE__{} = state, %Transaction{} = txn) do + with {:ok, effects} <- append_txn_effects(txn, state.shape_info, state.views) do + {:ok, state, effects} + end + end + + def handle_event(%__MODULE__{} = state, {:global_last_seen_lsn, _lsn}) do + # Straggler message after unsubscribe; ignore. + {:ok, state, []} + end + + def handle_event( + %__MODULE__{ + shape_info: %ShapeInfo{dependency_move_policy: :invalidate_on_dependency_move} + }, + {:materializer_changes, _dep_handle, _payload} + ) do + {:error, :unsupported_subquery} + end + + def handle_event(%__MODULE__{} = state, {:materializer_changes, dep_handle, payload}) do + subquery_ref = RefResolver.ref_from_dep_handle!(state.shape_info.ref_resolver, dep_handle) + dep_index = subquery_ref |> List.last() |> String.to_integer() + dep_view = Views.current(state.views, subquery_ref) + next_state = %{state | queue: MoveQueue.enqueue(state.queue, dep_index, payload, dep_view)} + + with {:ok, next_state, effects} <- drain_queue(next_state, EffectList.new()) do + {:ok, next_state, EffectList.to_list(effects)} + end + end + + def handle_event(%__MODULE__{}, {:pg_snapshot_known, _snapshot}) do + raise ArgumentError, "received {:pg_snapshot_known, snapshot} while no move-in is buffering" + end + + def handle_event( + %__MODULE__{}, + {:query_move_in_complete, _snapshot_name, _row_count, _row_bytes, _move_in_lsn} + ) do + raise ArgumentError, + "received {:query_move_in_complete, snapshot_name, row_count, row_bytes, move_in_lsn} while no move-in is buffering" + end + + @spec drain_queue(t(), EffectList.t(), keyword()) :: + {:ok, t() | Buffering.t(), EffectList.t()} | {:error, term()} + def drain_queue(%__MODULE__{} = state, effects, opts \\ []) do + case MoveQueue.pop_next(state.queue) do + nil -> + {:ok, state, effects} + + {{dep_move_kind, dep_index, values} = move, queue} -> + subquery_ref = RefResolver.ref_from_dep_index!(state.shape_info.ref_resolver, dep_index) + subscription_active? = Keyword.get(opts, :subscription_active?, false) + latest_seen_lsn = Keyword.get(opts, :latest_seen_lsn) + + case outer_move_kind(state.shape_info, dep_index, dep_move_kind) do + :move_in -> + with {:ok, next_state, start_effects} <- + Buffering.start( + state.shape_info, + state.views, + queue, + move, + subquery_ref, + subscribe_global_lsn?: not subscription_active?, + latest_seen_lsn: latest_seen_lsn + ) do + {:ok, next_state, EffectList.append_all(effects, start_effects)} + end + + :move_out -> + next_state = %{ + state + | queue: queue, + views: Views.apply_move(state.views, subquery_ref, values, dep_move_kind) + } + + index_effects = + IndexChanges.effects_for_complete(state.shape_info.dnf_plan, move, subquery_ref) + + effects = + effects + |> EffectList.append( + MoveBroadcast.effect_for_move_out(dep_index, values, state.shape_info) + ) + |> EffectList.append_all(index_effects) + + drain_queue( + next_state, + effects, + opts + ) + end + end + end + + defp outer_move_kind( + %ShapeInfo{dnf_plan: %{dependency_polarities: polarities}}, + dep_index, + move_kind + ) do + case {Map.fetch!(polarities, dep_index), move_kind} do + {:positive, effect} -> effect + {:negated, :move_in} -> :move_out + {:negated, :move_out} -> :move_in + end + end + + defp append_txn_effects(%Transaction{} = txn, %ShapeInfo{} = shape_info, views) + when is_map(views) do + with {:ok, effects} <- + TransactionConverter.transaction_to_effects( + txn, + shape_info.shape, + stack_id: shape_info.stack_id, + shape_handle: shape_info.shape_handle, + extra_refs: {views, views}, + dnf_plan: shape_info.dnf_plan + ) do + effects = + effects + |> EffectList.new() + |> EffectList.append(%Effects.NotifyFlushed{log_offset: txn.last_log_offset}) + |> EffectList.to_list() + + {:ok, effects} + end + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/event_handler_builder.ex b/packages/sync-service/lib/electric/shapes/consumer/event_handler_builder.ex new file mode 100644 index 0000000000..feb251e914 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/event_handler_builder.ex @@ -0,0 +1,76 @@ +defmodule Electric.Shapes.Consumer.EventHandlerBuilder do + # Builds the initial event handler and ordered setup effects for a consumer shape. + + alias Electric.Shapes.Consumer.EventHandler + alias Electric.Shapes.Consumer.Materializer + alias Electric.Shapes.Consumer.SetupEffects + alias Electric.Shapes.Consumer.State + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Shape + + @spec build(State.t(), :create | :restore) :: + {:ok, EventHandler.t(), [SetupEffects.t()]} + def build(%State{shape: %Shape{shape_dependencies_handles: dep_handles}} = state, action) + when dep_handles != [] do + {:ok, dnf_plan} = DnfPlan.compile(state.shape) + dependency_move_policy = dependency_move_policy(state.stack_id, state.shape) + + {views, dep_handle_to_ref, dep_index_to_ref} = + dep_handles + |> Enum.with_index() + |> Enum.reduce({%{}, %{}, %{}}, fn {handle, index}, + {views, handle_mapping, index_mapping} -> + materializer_opts = %{stack_id: state.stack_id, shape_handle: handle} + :ok = Materializer.wait_until_ready(materializer_opts) + view = Materializer.get_link_values(materializer_opts) + ref = ["$sublink", Integer.to_string(index)] + + {Map.put(views, ref, view), Map.put(handle_mapping, handle, {index, ref}), + Map.put(index_mapping, index, ref)} + end) + + buffer_max_transactions = + Electric.StackConfig.lookup( + state.stack_id, + :subquery_buffer_max_transactions, + Electric.Config.default(:subquery_buffer_max_transactions) + ) + + handler = %EventHandler.Subqueries.Steady{ + shape_info: %Electric.Shapes.Consumer.Subqueries.ShapeInfo{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle, + dnf_plan: dnf_plan, + ref_resolver: + Electric.Shapes.Consumer.Subqueries.RefResolver.new(dep_handle_to_ref, dep_index_to_ref), + buffer_max_transactions: buffer_max_transactions, + dependency_move_policy: dependency_move_policy + }, + views: views + } + + {:ok, handler, + [%SetupEffects.SubscribeShape{action: action}, %SetupEffects.SeedSubqueryIndex{}]} + end + + def build(%State{} = state, action) do + handler = %EventHandler.Default{ + shape: state.shape, + stack_id: state.stack_id, + shape_handle: state.shape_handle + } + + {:ok, handler, [%SetupEffects.SubscribeShape{action: action}]} + end + + defp dependency_move_policy(stack_id, _shape) do + feature_flags = Electric.StackConfig.lookup(stack_id, :feature_flags, []) + + if "tagged_subqueries" not in feature_flags do + :invalidate_on_dependency_move + else + :stream_dependency_moves + end + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex index 9a613107b9..d71dffc8e5 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/materializer.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/materializer.ex @@ -318,21 +318,28 @@ defmodule Electric.Shapes.Consumer.Materializer do } -> case operation do "insert" -> - %Changes.NewRecord{key: key, record: value, move_tags: Map.get(headers, "tags", [])} + %Changes.NewRecord{ + key: key, + record: value, + move_tags: Map.get(headers, "tags", []), + active_conditions: Map.get(headers, "active_conditions", []) + } "update" -> %Changes.UpdatedRecord{ key: key, record: value, move_tags: Map.get(headers, "tags", []), - removed_move_tags: Map.get(headers, "removed_tags", []) + removed_move_tags: Map.get(headers, "removed_tags", []), + active_conditions: Map.get(headers, "active_conditions", []) } "delete" -> %Changes.DeletedRecord{ key: key, old_record: value, - move_tags: Map.get(headers, "tags", []) + move_tags: Map.get(headers, "tags", []), + active_conditions: Map.get(headers, "active_conditions", []) } end @@ -343,6 +350,14 @@ defmodule Electric.Shapes.Consumer.Materializer do end) %{headers: %{event: "move-out", patterns: patterns}} + + %{"headers" => %{"event" => "move-in", "patterns" => patterns}} -> + patterns = + Enum.map(patterns, fn %{"pos" => pos, "value" => value} -> + %{pos: pos, value: value} + end) + + %{headers: %{event: "move-in", patterns: patterns}} end) end @@ -441,20 +456,41 @@ defmodule Electric.Shapes.Consumer.Materializer do changes, {{state.index, state.tag_indices}, {state.value_counts, []}}, fn - %Changes.NewRecord{key: key, record: record, move_tags: move_tags}, + %Changes.NewRecord{ + key: key, + record: record, + move_tags: move_tags, + active_conditions: ac + }, {{index, tag_indices}, counts_and_events} -> {value, original_string} = cast!(record, state) if is_map_key(index, key), do: raise("Key #{key} already exists") - index = Map.put(index, key, value) + included? = evaluate_inclusion(move_tags, ac) + + index = + Map.put(index, key, %{ + value: value, + tags: move_tags, + active_conditions: ac, + included?: included? + }) + tag_indices = add_row_to_tag_indices(tag_indices, key, move_tags) - {{index, tag_indices}, increment_value(counts_and_events, value, original_string)} + + counts_and_events = + if included?, + do: increment_value(counts_and_events, value, original_string), + else: counts_and_events + + {{index, tag_indices}, counts_and_events} %Changes.UpdatedRecord{ key: key, old_key: old_key, record: record, move_tags: move_tags, - removed_move_tags: removed_move_tags + removed_move_tags: removed_move_tags, + active_conditions: ac }, {{index, tag_indices}, counts_and_events} -> # When the primary key doesn't change, old_key may be nil; default to key @@ -464,46 +500,97 @@ defmodule Electric.Shapes.Consumer.Materializer do columns_present = Enum.any?(state.columns, &is_map_key(record, &1)) has_tag_updates = removed_move_tags != [] pk_changed = old_key != key + has_ac_update = ac != [] and is_map_key(index, old_key) + + if columns_present or has_tag_updates or has_ac_update or pk_changed do + old_entry = Map.fetch!(index, old_key) - if columns_present or has_tag_updates or pk_changed do - # When PK changes, old_key must be removed from all tag indices it - # belongs to (both removed and retained tags), not just removed_move_tags + # When the primary key changes, re-index every existing tag for the new key. tags_to_remove = if pk_changed, - do: removed_move_tags ++ move_tags, + do: old_entry.tags, else: removed_move_tags + new_tags = + if has_tag_updates or move_tags != [], do: move_tags, else: old_entry.tags + + new_ac = if ac != [], do: ac, else: old_entry.active_conditions + new_included? = evaluate_inclusion(new_tags, new_ac) + tag_indices = tag_indices |> remove_row_from_tag_indices(old_key, tags_to_remove) - |> add_row_to_tag_indices(key, move_tags) + |> add_row_to_tag_indices(key, new_tags) if columns_present do {value, original_string} = cast!(record, state) - {old_value, index} = Map.pop!(index, old_key) - index = Map.put(index, key, value) - - # Skip decrement/increment dance if value hasn't changed to avoid - # spurious move_out/move_in events when only the tag changed - if old_value == value do - {{index, tag_indices}, counts_and_events} - else - {{index, tag_indices}, - counts_and_events - |> decrement_value(old_value, value_to_string(old_value, state)) - |> increment_value(value, original_string)} + old_value = old_entry.value + + index = + index + |> Map.delete(old_key) + |> Map.put(key, %{ + value: value, + tags: new_tags, + active_conditions: new_ac, + included?: new_included? + }) + + cond do + old_entry.included? and new_included? and old_value != value -> + {{index, tag_indices}, + counts_and_events + |> decrement_value(old_value, value_to_string(old_value, state)) + |> increment_value(value, original_string)} + + old_entry.included? and not new_included? -> + {{index, tag_indices}, + decrement_value( + counts_and_events, + old_value, + value_to_string(old_value, state) + )} + + not old_entry.included? and new_included? -> + {{index, tag_indices}, + increment_value(counts_and_events, value, original_string)} + + true -> + # Skip decrement/increment dance if value hasn't changed to avoid + # spurious move_out/move_in events when only the tag changed + {{index, tag_indices}, counts_and_events} end else - # PK changed but tracked column not in record — re-key the index entry index = - if pk_changed do - {value, index} = Map.pop!(index, old_key) - Map.put(index, key, value) - else - index - end - - {{index, tag_indices}, counts_and_events} + index + |> Map.delete(old_key) + |> Map.put(key, %{ + old_entry + | tags: new_tags, + active_conditions: new_ac, + included?: new_included? + }) + + cond do + old_entry.included? and not new_included? -> + {{index, tag_indices}, + decrement_value( + counts_and_events, + old_entry.value, + value_to_string(old_entry.value, state) + )} + + not old_entry.included? and new_included? -> + {{index, tag_indices}, + increment_value( + counts_and_events, + old_entry.value, + value_to_string(old_entry.value, state) + )} + + true -> + {{index, tag_indices}, counts_and_events} + end end else # Nothing relevant to this materializer has been updated @@ -512,22 +599,43 @@ defmodule Electric.Shapes.Consumer.Materializer do %Changes.DeletedRecord{key: key, move_tags: move_tags}, {{index, tag_indices}, counts_and_events} -> - {value, index} = Map.pop!(index, key) - + {entry, index} = Map.pop!(index, key) tag_indices = remove_row_from_tag_indices(tag_indices, key, move_tags) - {{index, tag_indices}, - decrement_value(counts_and_events, value, value_to_string(value, state))} - - %{headers: %{event: "move-out", patterns: patterns}}, - {{index, tag_indices}, counts_and_events} -> - {keys, tag_indices} = pop_keys_from_tag_indices(tag_indices, patterns) + if entry.included? do + {{index, tag_indices}, + decrement_value( + counts_and_events, + entry.value, + value_to_string(entry.value, state) + )} + else + {{index, tag_indices}, counts_and_events} + end - {index, counts_and_events} = - Enum.reduce(keys, {index, counts_and_events}, fn key, {index, counts_and_events} -> - {value, index} = Map.pop!(index, key) - {index, decrement_value(counts_and_events, value, value_to_string(value, state))} - end) + %{headers: %{event: event, patterns: patterns}}, + {{index, tag_indices}, counts_and_events} + when event in ["move-out", "move-in"] -> + new_condition = event == "move-in" + affected = collect_affected_keys(tag_indices, patterns) + + {{index, tag_indices}, counts_and_events} = + Enum.reduce( + affected, + {{index, tag_indices}, counts_and_events}, + fn {key, matched_positions}, acc -> + entry = Map.fetch!(index, key) + + process_move_event( + entry, + key, + matched_positions, + new_condition, + acc, + state + ) + end + ) {{index, tag_indices}, counts_and_events} end @@ -559,39 +667,138 @@ defmodule Electric.Shapes.Consumer.Materializer do end end + # Position-aware tag indexing: tags are "/" separated strings where each slot + # corresponds to a DNF position. Non-empty slots are indexed as {pos, hash}. + # For backward compat, flat tags (no "/") are treated as position 0. defp add_row_to_tag_indices(tag_indices, key, move_tags) do - # For now we only support one move tag per row (i.e. no `OR`s in the where clause if there's a subquery) Enum.reduce(move_tags, tag_indices, fn tag, acc when is_binary(tag) -> - Map.update(acc, tag, MapSet.new([key]), &MapSet.put(&1, key)) + tag + |> parse_tag_slots() + |> Enum.reduce(acc, fn + {"", _pos}, acc -> + acc + + {hash, pos}, acc -> + Map.update(acc, {pos, hash}, MapSet.new([key]), &MapSet.put(&1, key)) + end) end) end defp remove_row_from_tag_indices(tag_indices, key, move_tags) do Enum.reduce(move_tags, tag_indices, fn tag, acc when is_binary(tag) -> - case Map.fetch(acc, tag) do - {:ok, v} -> - new_mapset = MapSet.delete(v, key) - - if MapSet.size(new_mapset) == 0 do - Map.delete(acc, tag) - else - Map.put(acc, tag, new_mapset) + tag + |> parse_tag_slots() + |> Enum.reduce(acc, fn + {"", _pos}, acc -> + acc + + {hash, pos}, acc -> + case Map.fetch(acc, {pos, hash}) do + {:ok, v} -> + new_mapset = MapSet.delete(v, key) + + if MapSet.size(new_mapset) == 0 do + Map.delete(acc, {pos, hash}) + else + Map.put(acc, {pos, hash}, new_mapset) + end + + :error -> + acc end + end) + end) + end - :error -> + defp parse_tag_slots(tag) do + tag |> String.split("/") |> Enum.with_index() + end + + # Collect keys affected by move patterns, returning %{key => MapSet} + defp collect_affected_keys(tag_indices, patterns) do + Enum.reduce(patterns, %{}, fn %{pos: pos, value: value}, acc -> + case Map.get(tag_indices, {pos, value}) do + nil -> acc + + keys -> + Enum.reduce(keys, acc, fn key, acc -> + Map.update(acc, key, MapSet.new([pos]), &MapSet.put(&1, pos)) + end) end end) end - defp pop_keys_from_tag_indices(tag_indices, patterns) do - # This implementation is naive while we support only one tag per row and no composite tags. - Enum.reduce(patterns, {MapSet.new(), tag_indices}, fn %{pos: _pos, value: value}, - {keys, acc} -> - case Map.pop(acc, value) do - {nil, acc} -> {keys, acc} - {v, acc} -> {MapSet.union(keys, v), acc} - end + defp process_move_event(entry, key, matched_positions, new_condition, {{idx, ti}, ce}, state) do + case entry.active_conditions do + [] when new_condition == false -> + # No DNF, move-out: remove row entirely (backward compat) + ti = remove_row_from_tag_indices(ti, key, entry.tags) + idx = Map.delete(idx, key) + {{idx, ti}, decrement_value(ce, entry.value, value_to_string(entry.value, state))} + + [] -> + # No DNF, move-in: no-op + {{idx, ti}, ce} + + ac -> + # DNF: flip matched positions, re-evaluate inclusion + new_ac = flip_active_conditions(ac, matched_positions, new_condition) + new_included? = evaluate_inclusion(entry.tags, new_ac) + + cond do + entry.included? and not new_included? -> + # Remove row entirely to avoid stale tag_indices. If the row + # should become included again later, it will re-enter via a + # move-in query or NewRecord with fresh tags and ac. + ti = remove_row_from_tag_indices(ti, key, entry.tags) + idx = Map.delete(idx, key) + {{idx, ti}, decrement_value(ce, entry.value, value_to_string(entry.value, state))} + + not entry.included? and new_included? -> + idx = + Map.put(idx, key, %{ + entry + | active_conditions: new_ac, + included?: new_included? + }) + + {{idx, ti}, increment_value(ce, entry.value, value_to_string(entry.value, state))} + + true -> + idx = + Map.put(idx, key, %{ + entry + | active_conditions: new_ac, + included?: new_included? + }) + + {{idx, ti}, ce} + end + end + end + + defp flip_active_conditions(ac, positions, new_value) do + ac + |> Enum.with_index() + |> Enum.map(fn {val, idx} -> + if MapSet.member?(positions, idx), do: new_value, else: val + end) + end + + # Evaluate whether a row is included based on its tags and active_conditions. + # A row is included if any disjunct (tag) has all participating positions active. + defp evaluate_inclusion([], _ac), do: true + defp evaluate_inclusion(_tags, []), do: true + + defp evaluate_inclusion(tags, ac) do + Enum.any?(tags, fn tag -> + tag + |> parse_tag_slots() + |> Enum.all?(fn + {"", _pos} -> true + {_hash, pos} -> Enum.at(ac, pos, true) + end) end) end end diff --git a/packages/sync-service/lib/electric/shapes/consumer/move_handling.ex b/packages/sync-service/lib/electric/shapes/consumer/move_handling.ex deleted file mode 100644 index da44e60e36..0000000000 --- a/packages/sync-service/lib/electric/shapes/consumer/move_handling.ex +++ /dev/null @@ -1,143 +0,0 @@ -defmodule Electric.Shapes.Consumer.MoveHandling do - @moduledoc false - alias Electric.Replication.LogOffset - alias Electric.ShapeCache.Storage - alias Electric.Shapes.Consumer.State - alias Electric.Shapes.PartialModes - alias Electric.Shapes.Shape - alias Electric.Shapes.Shape.SubqueryMoves - alias Electric.Shapes.Consumer.MoveIns - - require Logger - - @spec process_move_ins(State.t(), Shape.handle(), list(term())) :: State.t() - def process_move_ins(state, _, []), do: state - - def process_move_ins(%State{} = state, dep_handle, new_values) do - # Something moved in in a dependency shape. We need to query the DB for relevant values. - formed_where_clause = - Shape.SubqueryMoves.move_in_where_clause( - state.shape, - dep_handle, - Enum.map(new_values, &elem(&1, 1)) - ) - - storage = state.storage - name = Electric.Utils.uuid4() - consumer_pid = self() - - # Start async query - don't block on snapshot - Electric.ProcessRegistry.name(state.stack_id, Electric.StackTaskSupervisor) - |> PartialModes.query_move_in_async( - state.shape_handle, - state.shape, - formed_where_clause, - stack_id: state.stack_id, - consumer_pid: consumer_pid, - results_fn: fn stream, pg_snapshot -> - task_pid = self() - - # Process query results - stream - |> Stream.transform( - fn -> [] end, - fn [key, _, _] = item, acc -> {[item], [key | acc]} end, - fn acc -> send(task_pid, {:acc, acc, pg_snapshot}) end - ) - |> Storage.write_move_in_snapshot!(name, storage) - - # Return accumulated keys and snapshot - receive(do: ({:acc, acc, snapshot} -> {acc, snapshot})) - end, - move_in_name: name - ) - - index = Enum.find_index(state.shape.shape_dependencies_handles, &(&1 == dep_handle)) - - # Add to waiting WITHOUT blocking (snapshot will be set later via message) - move_handling_state = - MoveIns.add_waiting( - state.move_handling_state, - name, - {["$sublink", Integer.to_string(index)], MapSet.new(Enum.map(new_values, &elem(&1, 0)))} - ) - - Logger.debug("Move-in #{name} has been triggered from #{dep_handle}") - - %{state | move_handling_state: move_handling_state} - end - - @spec process_move_outs(State.t(), Shape.handle(), list(term())) :: - {State.t(), changes :: term()} - def process_move_outs(state, _, []), do: {state, nil} - - def process_move_outs(state, dep_handle, removed_values) do - message = - SubqueryMoves.make_move_out_control_message( - state.shape, - state.stack_id, - state.shape_handle, - [ - {dep_handle, removed_values} - ] - ) - - # TODO: This leaks the message abstraction, and I'm OK with it for now because I'll be refactoring this code path for the multi-subqueries shortly - move_handling_state = - MoveIns.move_out_happened( - state.move_handling_state, - MapSet.new(message.headers.patterns |> Enum.map(& &1[:value])) - ) - - {{_, upper_bound}, writer} = Storage.append_control_message!(message, state.writer) - - {%{state | move_handling_state: move_handling_state, writer: writer}, - {[message], upper_bound}} - end - - def query_complete(%State{} = state, name, key_set, snapshot) do - touch_tracker = state.move_handling_state.touch_tracker - tags_to_skip = state.move_handling_state.moved_out_tags[name] || MapSet.new() - - # 1. Splice stored snapshot into main log with filtering - {{lower_bound, upper_bound}, writer} = - Storage.append_move_in_snapshot_to_log!( - name, - state.writer, - fn key, tags -> - (tags != [] and Enum.all?(tags, &MapSet.member?(tags_to_skip, &1))) or - MoveIns.should_skip_query_row?(touch_tracker, snapshot, key) - end - ) - - # 2. Move from "waiting" to "filtering" - {visibility_snapshot, move_handling_state} = - MoveIns.change_to_filtering(state.move_handling_state, name, MapSet.new(key_set)) - - {{_, upper_bound}, writer} = - if is_nil(visibility_snapshot) do - {{nil, upper_bound}, writer} - else - append_snapshot_end_control(snapshot, writer) - end - - state = %{state | move_handling_state: move_handling_state, writer: writer} - - {state, {{lower_bound, upper_bound}, upper_bound}} - end - - @spec append_snapshot_end_control(MoveIns.pg_snapshot(), Storage.writer_state()) :: - {{LogOffset.t(), LogOffset.t()}, Storage.writer_state()} - defp append_snapshot_end_control({xmin, xmax, xip_list}, writer) do - control_message = %{ - headers: %{ - control: "snapshot-end", - xmin: Integer.to_string(xmin), - xmax: Integer.to_string(xmax), - xip_list: Enum.map(xip_list, &Integer.to_string/1) - } - } - - Storage.append_control_message!(control_message, writer) - end -end diff --git a/packages/sync-service/lib/electric/shapes/consumer/move_ins.ex b/packages/sync-service/lib/electric/shapes/consumer/move_ins.ex deleted file mode 100644 index c0c554cb70..0000000000 --- a/packages/sync-service/lib/electric/shapes/consumer/move_ins.ex +++ /dev/null @@ -1,358 +0,0 @@ -defmodule Electric.Shapes.Consumer.MoveIns do - alias Electric.Replication.Changes - alias Electric.Replication.Changes.Transaction - alias Electric.Postgres.Xid - alias Electric.Postgres.SnapshotQuery - - require Xid - - defstruct waiting_move_ins: %{}, - filtering_move_ins: [], - touch_tracker: %{}, - move_in_buffering_snapshot: nil, - in_flight_values: %{}, - moved_out_tags: %{}, - maximum_resolved_snapshot: nil, - minimum_unresolved_snapshot: nil - - @type pg_snapshot() :: SnapshotQuery.pg_snapshot() - @type move_in_name() :: String.t() - @type in_flight_values() :: %{term() => MapSet.t()} - - @typedoc """ - Information needed to reason about move-in handling and correct stream processing. - - - `waiting_move_ins`: Information about move-ins we're waiting for. That means a move-in was triggered, but - query results are not yet available. The map value has pg snapshot and actual values that were - moved in and thus should be skipped in where clause evaluation until the results are appended to the log - - `filtering_move_ins`: Information about move-ins we're filtering. That means a move-in has resolved and was - added to the shape log, and we need to skip changes that are already visible there. - - `touch_tracker`: A map of keys to xids of transactions that have touched them. This is used to skip changes - inside move-in query results that are already visible in the shape log. - - `move_in_buffering_snapshot`: A snapshot that is a union of all the "waiting" move-in snapshots. This is used to - reduce a check whether something is visible in any of the "waiting" move-in snapshots - down to a single check instead of checking each snapshot individually. - - `in_flight_values`: A precalculated map of all moved-in values that caused a move-in and thus should be skipped in - where clause evaluation until the results are appended to the log. - - `moved_out_tags`: A map of move-in names to sets of tags that were moved out while the move-in was happening and thus - should be skipped when appending move-in results to the log. - - `maximum_resolved_snapshot`: Stores the maximum snapshot of resolved move-ins that weren't immediately appended as - snapshot-end control messages, to be appended when the last concurrent move-in resolves. - - `minimum_unresolved_snapshot`: Stores the minimum snapshot of unresolved move-ins. - """ - @type t() :: %__MODULE__{ - waiting_move_ins: %{move_in_name() => {pg_snapshot() | nil, {term(), MapSet.t()}}}, - filtering_move_ins: list({pg_snapshot(), keys :: list(String.t())}), - touch_tracker: %{String.t() => pos_integer()}, - move_in_buffering_snapshot: nil | pg_snapshot(), - in_flight_values: in_flight_values(), - moved_out_tags: %{move_in_name() => MapSet.t(String.t())}, - maximum_resolved_snapshot: nil | pg_snapshot(), - minimum_unresolved_snapshot: nil | pg_snapshot() - } - def new() do - %__MODULE__{} - end - - @doc """ - Add information about a new move-in to the state for which we're waiting. - Snapshot is initially nil and will be set later when the query begins. - """ - @spec add_waiting(t(), move_in_name(), {term(), MapSet.t()}) :: t() - def add_waiting( - %__MODULE__{waiting_move_ins: waiting_move_ins} = state, - name, - moved_values - ) do - new_waiting_move_ins = Map.put(waiting_move_ins, name, {nil, moved_values}) - new_buffering_snapshot = make_move_in_buffering_snapshot(new_waiting_move_ins) - - %{ - state - | waiting_move_ins: new_waiting_move_ins, - move_in_buffering_snapshot: new_buffering_snapshot, - in_flight_values: make_in_flight_values(new_waiting_move_ins), - moved_out_tags: Map.put(state.moved_out_tags, name, MapSet.new()) - } - end - - # TODO: this assumes a single subquery for now - def move_out_happened(state, new_tags) do - moved_out_tags = - Map.new(state.moved_out_tags, fn {name, tags} -> {name, MapSet.union(tags, new_tags)} end) - - %{state | moved_out_tags: moved_out_tags} - end - - @doc """ - Set the snapshot for a waiting move-in when it becomes known. - """ - @spec set_snapshot(t(), move_in_name(), pg_snapshot()) :: t() - def set_snapshot(%__MODULE__{waiting_move_ins: waiting_move_ins} = state, name, snapshot) do - new_move_ins = - Map.update!(waiting_move_ins, name, fn {_, moved_values} -> {snapshot, moved_values} end) - - new_buffering_snapshot = make_move_in_buffering_snapshot(new_move_ins) - - %{ - state - | waiting_move_ins: new_move_ins, - move_in_buffering_snapshot: new_buffering_snapshot, - minimum_unresolved_snapshot: min_snapshot(state.minimum_unresolved_snapshot, snapshot) - } - end - - @spec make_move_in_buffering_snapshot(%{move_in_name() => pg_snapshot()}) :: nil | pg_snapshot() - # The fake global snapshot allows us to check if a transaction is not visible in any of the pending snapshots - # instead of checking each snapshot individually. - defp make_move_in_buffering_snapshot(waiting_move_ins) when waiting_move_ins == %{}, do: nil - - defp make_move_in_buffering_snapshot(waiting_move_ins) do - snapshots = - waiting_move_ins - |> Map.values() - |> Enum.map(fn {snapshot, _} -> snapshot end) - |> Enum.reject(&is_nil/1) - - case snapshots do - [] -> - nil - - _ -> - Enum.reduce(snapshots, {:infinity, -1, []}, fn {xmin, xmax, xip_list}, - {global_xmin, global_xmax, global_xip_list} -> - {Kernel.min(global_xmin, xmin), Kernel.max(global_xmax, xmax), - global_xip_list ++ xip_list} - end) - end - end - - defp make_in_flight_values(waiting_move_ins) do - waiting_move_ins - |> Map.values() - |> Enum.map(fn {_, moved_values} -> moved_values end) - |> Enum.reduce(%{}, fn {key, value}, acc -> - Map.update(acc, key, value, &MapSet.union(&1, value)) - end) - end - - @doc """ - Change a move-in from "waiting" to "filtering", marking it as complete and return best-effort visibility boundary. - """ - @spec change_to_filtering(t(), move_in_name(), MapSet.t(String.t())) :: - {visibility_boundary :: nil | pg_snapshot(), t()} - def change_to_filtering(%__MODULE__{} = state, name, key_set) do - {{snapshot, _}, waiting_move_ins} = Map.pop!(state.waiting_move_ins, name) - filtering_move_ins = [{snapshot, key_set} | state.filtering_move_ins] - buffering_snapshot = make_move_in_buffering_snapshot(waiting_move_ins) - - {boundary, maximum_resolved_snapshot} = - cond do - waiting_move_ins == %{} -> {max_snapshot(state.maximum_resolved_snapshot, snapshot), nil} - is_minimum_snapshot?(state, snapshot) -> {snapshot, state.maximum_resolved_snapshot} - true -> {nil, max_snapshot(state.maximum_resolved_snapshot, snapshot)} - end - - new_state = %{ - state - | waiting_move_ins: waiting_move_ins, - filtering_move_ins: filtering_move_ins, - move_in_buffering_snapshot: buffering_snapshot, - in_flight_values: make_in_flight_values(waiting_move_ins), - moved_out_tags: Map.delete(state.moved_out_tags, name), - minimum_unresolved_snapshot: find_minimum_unresolved_snapshot(waiting_move_ins), - maximum_resolved_snapshot: maximum_resolved_snapshot - } - - {boundary, new_state} - end - - defp find_minimum_unresolved_snapshot(waiting_move_ins) do - snapshots = - waiting_move_ins - |> Map.values() - |> Enum.map(fn {snapshot, _} -> snapshot end) - |> Enum.reject(&is_nil/1) - - case snapshots do - [] -> nil - list -> Enum.min(list, &(Xid.compare_snapshots(&1, &2) != :gt)) - end - end - - @doc """ - Remove completed move-ins from the state. - - Move-in is considered "completed" (i.e. not included in the filtering logic) - once we see any transaction that is after the end of the move-in snapshot. - - Filtering generally is applied only to transactions that are already visible - in the snapshot, and those can only be with `xid < xmax`. - """ - @spec remove_completed(t(), Transaction.t()) :: t() - def remove_completed(%__MODULE__{} = state, %Transaction{xid: xid}) do - state.filtering_move_ins - |> Enum.reject(fn {snapshot, _} -> Xid.after_snapshot?(xid, snapshot) end) - |> then(&%{state | filtering_move_ins: &1}) - end - - @doc """ - Check if a change is already visible in one of the completed move-ins. - - A visible change means it needs to be skipped to avoid duplicates. - """ - @spec change_already_visible?(t(), Xid.anyxid(), Changes.change()) :: boolean() - def change_already_visible?(_state, _xid, %Changes.DeletedRecord{}), do: false - def change_already_visible?(%__MODULE__{filtering_move_ins: []}, _, _), do: false - - def change_already_visible?(%__MODULE__{filtering_move_ins: filters}, xid, %{key: key}) do - Enum.any?(filters, fn {snapshot, key_set} -> - Transaction.visible_in_snapshot?(xid, snapshot) and MapSet.member?(key_set, key) - end) - end - - def change_visible_in_unresolved_move_ins_for_values?( - %__MODULE__{waiting_move_ins: waiting_move_ins}, - referenced_values, - xid - ) do - Enum.any?(Map.values(waiting_move_ins), fn {snapshot, {path, moved_values}} -> - case Map.fetch(referenced_values, path) do - {:ok, value} -> - (is_nil(snapshot) or Transaction.visible_in_snapshot?(xid, snapshot)) and - MapSet.member?(moved_values, value) - - :error -> - false - end - end) - end - - @doc """ - Track a touch for a non-delete change. - Returns updated touch_tracker. - """ - @spec track_touch(t(), pos_integer(), Changes.change()) :: t() - - def track_touch(%__MODULE__{} = state, _xid, %Changes.DeletedRecord{}), - do: state - - def track_touch(%__MODULE__{touch_tracker: touch_tracker} = state, xid, %{key: key}) do - %{state | touch_tracker: Map.put(touch_tracker, key, xid)} - end - - @doc """ - Garbage collect touches that are visible in all pending snapshots. - A touch is visible if its xid is before the minimum xmin of all waiting snapshots. - """ - @spec gc_touch_tracker(t()) :: t() - def gc_touch_tracker( - %__MODULE__{ - move_in_buffering_snapshot: nil, - waiting_move_ins: waiting_move_ins - } = state - ) do - # If there are waiting move-ins but buffering_snapshot is nil (all snapshots unknown), - # keep all touches. Otherwise (no waiting move-ins), clear all touches. - case waiting_move_ins do - empty when empty == %{} -> %{state | touch_tracker: %{}} - _ -> state - end - end - - def gc_touch_tracker( - %__MODULE__{ - touch_tracker: touch_tracker, - move_in_buffering_snapshot: {xmin, _xmax, _xip_list} - } = - state - ) do - # Remove touches that are before the minimum xmin (visible in all snapshots) - %{ - state - | touch_tracker: - Map.reject(touch_tracker, fn {_key, touch_xid} -> - touch_xid < xmin - end) - } - end - - @doc """ - Check if a query result row should be skipped because a fresher version exists in the stream. - Skip if: touch exists AND touch xid is NOT visible in query snapshot. - """ - @spec should_skip_query_row?(%{String.t() => pos_integer()}, pg_snapshot(), String.t()) :: - boolean() - def should_skip_query_row?(touch_tracker, _snapshot, key) - when not is_map_key(touch_tracker, key) do - false - end - - def should_skip_query_row?(touch_tracker, snapshot, key) do - touch_xid = Map.fetch!(touch_tracker, key) - # Skip if touch is NOT visible in snapshot (means we have fresher data in stream) - not Transaction.visible_in_snapshot?(touch_xid, snapshot) - end - - @spec max_snapshot(pg_snapshot() | nil, pg_snapshot() | nil) :: pg_snapshot() - defp max_snapshot(nil, value), do: value - defp max_snapshot(value, nil), do: value - - defp max_snapshot(snapshot1, snapshot2) do - case Xid.compare_snapshots(snapshot1, snapshot2) do - :lt -> snapshot2 - _ -> snapshot1 - end - end - - @spec min_snapshot(pg_snapshot(), pg_snapshot()) :: pg_snapshot() - defp min_snapshot(nil, value), do: value - defp min_snapshot(value, nil), do: value - - defp min_snapshot(snapshot1, snapshot2) do - case Xid.compare_snapshots(snapshot1, snapshot2) do - :lt -> snapshot1 - _ -> snapshot2 - end - end - - @doc """ - Check if the given snapshot is the minimum among all concurrent waiting move-ins - (excluding the current one being resolved, and only considering those with known snapshots). - """ - @spec is_minimum_snapshot?(t(), pg_snapshot()) :: boolean() - def is_minimum_snapshot?( - %__MODULE__{minimum_unresolved_snapshot: minimum_unresolved_snapshot}, - snapshot - ) do - Xid.compare_snapshots(snapshot, minimum_unresolved_snapshot) == :eq - end - - @doc """ - Store or update the maximum resolved snapshot. - If there's already a stored snapshot, keep the maximum of the two. - """ - @spec store_maximum_resolved_snapshot(t(), pg_snapshot()) :: t() - def store_maximum_resolved_snapshot( - %__MODULE__{maximum_resolved_snapshot: nil} = state, - snapshot - ) do - %{state | maximum_resolved_snapshot: snapshot} - end - - def store_maximum_resolved_snapshot( - %__MODULE__{maximum_resolved_snapshot: stored} = state, - snapshot - ) do - %{state | maximum_resolved_snapshot: max_snapshot(stored, snapshot)} - end - - @doc """ - Get the stored maximum resolved snapshot and clear it, or return nil if none is stored. - Returns {snapshot | nil, updated_state}. - """ - @spec get_and_clear_maximum_resolved_snapshot(t()) :: {pg_snapshot() | nil, t()} - def get_and_clear_maximum_resolved_snapshot(%__MODULE__{} = state) do - {state.maximum_resolved_snapshot, %{state | maximum_resolved_snapshot: nil}} - end -end diff --git a/packages/sync-service/lib/electric/shapes/consumer/setup_effects.ex b/packages/sync-service/lib/electric/shapes/consumer/setup_effects.ex new file mode 100644 index 0000000000..61177f707f --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/setup_effects.ex @@ -0,0 +1,70 @@ +defmodule Electric.Shapes.Consumer.SetupEffects do + # Executes ordered boot-time setup effects for consumer handler initialization. + + alias Electric.Replication.ShapeLogCollector + alias Electric.Shapes.Consumer.State + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + + require Logger + + defmodule SubscribeShape do + @moduledoc false + defstruct [:action] + end + + defmodule SeedSubqueryIndex do + @moduledoc false + defstruct [] + end + + @type t() :: %SubscribeShape{} | %SeedSubqueryIndex{} + + @spec execute([t()], State.t()) :: {:ok, State.t()} | {:error, State.t()} + def execute(effects, %State{} = state) when is_list(effects) do + Enum.reduce_while(effects, {:ok, state}, fn effect, {:ok, state} -> + case execute_effect(effect, state) do + {:ok, %State{} = state} -> {:cont, {:ok, state}} + {:error, %State{} = state} -> {:halt, {:error, state}} + end + end) + end + + defp execute_effect(%SubscribeShape{action: action}, %State{} = state) do + case ShapeLogCollector.add_shape(state.stack_id, state.shape_handle, state.shape, action) do + :ok -> + {:ok, state} + + {:error, error} -> + Logger.warning( + "Shape #{state.shape_handle} cannot subscribe due to #{inspect(error)} - invalidating shape" + ) + + {:error, state} + end + end + + defp execute_effect(%SeedSubqueryIndex{}, %State{event_handler: %{views: views}} = state) do + case SubqueryIndex.for_stack(state.stack_id) do + nil -> + {:ok, state} + + index -> + for {ref, view} <- views do + dep_index = ref |> List.last() |> String.to_integer() + + SubqueryIndex.seed_membership( + index, + state.shape_handle, + ref, + dep_index, + view + ) + end + + SubqueryIndex.mark_ready(index, state.shape_handle) + {:ok, state} + end + end + + defp execute_effect(%SeedSubqueryIndex{}, %State{} = state), do: {:ok, state} +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/state.ex b/packages/sync-service/lib/electric/shapes/consumer/state.ex index 52c70eaff9..9414f07571 100644 --- a/packages/sync-service/lib/electric/shapes/consumer/state.ex +++ b/packages/sync-service/lib/electric/shapes/consumer/state.ex @@ -1,10 +1,7 @@ defmodule Electric.Shapes.Consumer.State do @moduledoc false - alias Electric.Shapes.Consumer.MoveIns alias Electric.Shapes.Consumer.InitialSnapshot alias Electric.Shapes.Shape - alias Electric.Replication.Eval.Parser - alias Electric.Replication.Eval.Walker alias Electric.Replication.TransactionBuilder alias Electric.Postgres.SnapshotQuery alias Electric.Replication.LogOffset @@ -25,15 +22,13 @@ defmodule Electric.Shapes.Consumer.State do :storage, :writer, initial_snapshot_state: InitialSnapshot.new(nil), - move_handling_state: MoveIns.new(), + event_handler: nil, transaction_builder: TransactionBuilder.new(), buffer: [], txn_offset_mapping: [], materializer_subscribed?: false, terminating?: false, buffering?: false, - or_with_subquery?: false, - not_with_subquery?: false, # Based on the write unit value, consumer will either buffer txn fragments in memory until # it sees a commit (write_unit=txn) or it will write each received txn fragment to storage # immediately (write_unit=txn_fragment). @@ -50,31 +45,7 @@ defmodule Electric.Shapes.Consumer.State do ] @type pg_snapshot() :: SnapshotQuery.pg_snapshot() - @type move_in_name() :: String.t() - @type uninitialized_t() :: term() - # @type uninitialized_t() :: %__MODULE__{ - # stack_id: Electric.stack_id(), - # shape_handle: Shape.handle(), - # shape: Shape.t(), - # awaiting_snapshot_start: list(GenServer.from()), - # buffer: list(Transaction.t()), - # monitors: list({pid(), reference()}), - # txn_offset_mapping: list({LogOffset.t(), LogOffset.t()}), - # snapshot_started?: boolean(), - # materializer_subscribed?: boolean(), - # terminating?: boolean(), - # buffering?: boolean(), - # initial_snapshot_filtering?: boolean(), - # waiting_move_ins: %{move_in_name() => pg_snapshot()}, - # filtering_move_ins: list(Shape.handle()), - # move_in_buffering_snapshot: nil | pg_snapshot(), - # hibernate_after: non_neg_integer(), - # latest_offset: nil, - # initial_pg_snapshot: nil, - # storage: nil, - # writer: nil - # } @typedoc """ State of the consumer process. @@ -89,53 +60,14 @@ defmodule Electric.Shapes.Consumer.State do last relevant one to last one generally in the transaction and use that to map back the flushed offset to the transaction boundary. - ## Move-in handling - - There are 3 fields in the state relating to the move-in handling: - `waiting_move_ins`, `filtering_move_ins`, and `move_in_buffering_snapshot`. - - Once a move-in is necessary, we immeidately query the DB for the snapshot, - and store it in `waiting_move_ins` until we know the affected key set for this - move-in (possible only when entire query resolves). If a transaction is not a - part of any of these "waiting" move-in snapshots, we cannot apply it yet - and so we start buffering. In order to avoid walking the `waiting_move_ins` - map every time, we instead construct a "buffering snapshot" which is a union - of all the "waiting" move-in snapshots. This is stored in `move_in_buffering_snapshot` - and is updated when anything is added to or removed from `waiting_move_ins`. - - Once we have the affected key set, we can move the move-in to `filtering_move_ins`. - Filtering logic is described elsewhere. - ## Buffering Consumer will be buffering transactions in 2 cases: when we're waiting for initial - snapshot information, or when we can't reason about the change in context of a move-in. + snapshot information, or when an active subquery move-in is being spliced into the log. Buffer is stored in reverse order. """ @type t() :: term() - # @type t() :: %__MODULE__{ - # stack_id: Electric.stack_id(), - # shape_handle: Shape.handle(), - # shape: Shape.t(), - # awaiting_snapshot_start: list(GenServer.from()), - # buffer: list(Transaction.t()), - # monitors: list({pid(), reference()}), - # txn_offset_mapping: list({LogOffset.t(), LogOffset.t()}), - # snapshot_started?: boolean(), - # materializer_subscribed?: boolean(), - # terminating?: boolean(), - # buffering?: boolean(), - # initial_snapshot_filtering?: boolean(), - # waiting_move_ins: %{move_in_name() => pg_snapshot()}, - # filtering_move_ins: list(Shape.handle()), - # move_in_buffering_snapshot: nil | pg_snapshot(), - # hibernate_after: non_neg_integer(), - # latest_offset: LogOffset.t(), - # initial_pg_snapshot: nil | pg_snapshot(), - # storage: Storage.shape_storage(), - # writer: Storage.writer_state() - # } defguard is_snapshot_started(state) when is_struct(state.initial_snapshot_state, InitialSnapshot) and @@ -175,8 +107,6 @@ defmodule Electric.Shapes.Consumer.State do %{ state | shape: shape, - or_with_subquery?: has_or_with_subquery?(shape), - not_with_subquery?: has_not_with_subquery?(shape), # Enable direct fragment-to-storage streaming for shapes without subquery dependencies # and if the current shape itself isn't an inner shape of a shape with subqueries. write_unit: @@ -189,62 +119,6 @@ defmodule Electric.Shapes.Consumer.State do } end - defp has_or_with_subquery?(%Shape{shape_dependencies: []}), do: false - defp has_or_with_subquery?(%Shape{where: nil}), do: false - - defp has_or_with_subquery?(%Shape{where: where}) do - Walker.reduce!( - where.eval, - fn - %Parser.Func{name: "or"} = or_node, acc, _ctx -> - if subtree_has_sublink?(or_node) do - {:ok, true} - else - {:ok, acc} - end - - _node, acc, _ctx -> - {:ok, acc} - end, - false - ) - end - - defp subtree_has_sublink?(tree) do - Walker.reduce!( - tree, - fn - %Parser.Ref{path: ["$sublink", _]}, _acc, _ctx -> - {:ok, true} - - _node, acc, _ctx -> - {:ok, acc} - end, - false - ) - end - - defp has_not_with_subquery?(%Shape{shape_dependencies: []}), do: false - defp has_not_with_subquery?(%Shape{where: nil}), do: false - - defp has_not_with_subquery?(%Shape{where: where}) do - Walker.reduce!( - where.eval, - fn - %Parser.Func{name: "not"} = not_node, acc, _ctx -> - if subtree_has_sublink?(not_node) do - {:ok, true} - else - {:ok, acc} - end - - _node, acc, _ctx -> - {:ok, acc} - end, - false - ) - end - @doc """ After the storage is ready, initialize the state with info from storage and writer state. """ @@ -362,32 +236,6 @@ defmodule Electric.Shapes.Consumer.State do def initial_snapshot_xmin(%__MODULE__{}), do: nil - @doc """ - Track a change in the touch tracker. - """ - @spec track_change(t(), pos_integer(), Electric.Replication.Changes.change()) :: t() - def track_change(%__MODULE__{move_handling_state: move_handling_state} = state, xid, change) do - %{state | move_handling_state: MoveIns.track_touch(move_handling_state, xid, change)} - end - - @doc """ - Garbage collect touches that are visible in all pending snapshots. - """ - @spec gc_touch_tracker(t()) :: t() - def gc_touch_tracker(%__MODULE__{move_handling_state: move_handling_state} = state) do - %{ - state - | move_handling_state: MoveIns.gc_touch_tracker(move_handling_state) - } - end - - def remove_completed_move_ins( - %__MODULE__{move_handling_state: move_handling_state} = state, - xid - ) do - %{state | move_handling_state: MoveIns.remove_completed(move_handling_state, xid)} - end - def telemetry_attrs(%__MODULE__{stack_id: stack_id, shape_handle: shape_handle, shape: shape}) do [ "shape.handle": shape_handle, diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/active_move.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/active_move.ex new file mode 100644 index 0000000000..a6a6817590 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/active_move.ex @@ -0,0 +1,215 @@ +defmodule Electric.Shapes.Consumer.Subqueries.ActiveMove do + # Tracks a single buffered move-in while we wait to splice it into the log. + + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Subqueries.Views + + @type move_value() :: {term(), term()} + + @enforce_keys [ + :dep_index, + :dep_move_kind, + :subquery_ref, + :values, + :views_before_move, + :views_after_move + ] + defstruct [ + :dep_index, + :dep_move_kind, + :subquery_ref, + :values, + :views_before_move, + :views_after_move, + snapshot: nil, + move_in_snapshot_name: nil, + move_in_row_count: nil, + move_in_row_bytes: nil, + move_in_lsn: nil, + latest_seen_lsn: nil, + boundary_txn_count: nil, + buffered_txn_count: 0, + buffered_txns: [] + ] + + @type t() :: %__MODULE__{ + dep_index: non_neg_integer(), + dep_move_kind: :move_in | :move_out, + subquery_ref: [String.t()], + values: [move_value()], + views_before_move: Views.t(), + views_after_move: Views.t(), + snapshot: {term(), term(), [term()]} | nil, + move_in_snapshot_name: String.t() | nil, + move_in_row_count: non_neg_integer() | nil, + move_in_row_bytes: non_neg_integer() | nil, + move_in_lsn: Lsn.t() | nil, + latest_seen_lsn: Lsn.t() | nil, + boundary_txn_count: non_neg_integer() | nil, + buffered_txn_count: non_neg_integer(), + buffered_txns: [Transaction.t()] + } + + @spec start(Views.t(), non_neg_integer(), :move_in | :move_out, [String.t()], [move_value()]) :: + t() + def start(views, dep_index, dep_move_kind, subquery_ref, values) when is_map(views) do + %__MODULE__{ + dep_index: dep_index, + dep_move_kind: dep_move_kind, + subquery_ref: subquery_ref, + values: values, + views_before_move: views, + views_after_move: Views.apply_move(views, subquery_ref, values, dep_move_kind) + } + end + + @spec buffer_txn(t(), Transaction.t()) :: t() + def buffer_txn(%__MODULE__{} = active_move, %Transaction{} = txn) do + active_move + |> maybe_set_boundary_from_txn(txn) + |> Map.update!(:buffered_txns, &[txn | &1]) + |> Map.update!(:buffered_txn_count, &(&1 + 1)) + end + + @spec buffered_txn_count(t()) :: non_neg_integer() + def buffered_txn_count(%__MODULE__{buffered_txn_count: buffered_txn_count}), + do: buffered_txn_count + + @spec record_seen_lsn(t(), Lsn.t()) :: t() + def record_seen_lsn(%__MODULE__{} = active_move, %Lsn{} = lsn) do + latest_seen_lsn = newer_lsn(active_move.latest_seen_lsn, lsn) + + active_move + |> Map.put(:latest_seen_lsn, latest_seen_lsn) + |> maybe_set_boundary_from_lsn(latest_seen_lsn) + end + + @spec carry_latest_seen_lsn(t(), Lsn.t() | nil) :: t() + def carry_latest_seen_lsn(%__MODULE__{} = active_move, %Lsn{} = latest_seen_lsn) do + %{active_move | latest_seen_lsn: latest_seen_lsn} + end + + def carry_latest_seen_lsn(%__MODULE__{} = active_move, _latest_seen_lsn), do: active_move + + @spec record_snapshot!(t(), {term(), term(), [term()]}) :: t() + def record_snapshot!(%__MODULE__{snapshot: nil} = active_move, snapshot) do + active_move + |> Map.put(:snapshot, snapshot) + |> maybe_set_boundary_from_snapshot() + end + + def record_snapshot!(%__MODULE__{}, _snapshot) do + raise ArgumentError, "received {:pg_snapshot_known, snapshot} more than once for one move-in" + end + + @spec record_query_complete!(t(), String.t(), non_neg_integer(), non_neg_integer(), Lsn.t()) :: + t() + def record_query_complete!( + %__MODULE__{move_in_snapshot_name: nil} = active_move, + snapshot_name, + row_count, + row_bytes, + move_in_lsn + ) do + active_move + |> Map.put(:move_in_snapshot_name, snapshot_name) + |> Map.put(:move_in_row_count, row_count) + |> Map.put(:move_in_row_bytes, row_bytes) + |> Map.put(:move_in_lsn, move_in_lsn) + |> maybe_set_boundary_from_seen_lsn() + end + + def record_query_complete!(%__MODULE__{}, _snapshot_name, _row_count, _row_bytes, _move_in_lsn) do + raise ArgumentError, + "received {:query_move_in_complete, snapshot_name, row_count, row_bytes, move_in_lsn} more than once for one move-in" + end + + @spec ready_to_splice?(t()) :: boolean() + def ready_to_splice?(%__MODULE__{} = active_move) do + not is_nil(active_move.snapshot) and not is_nil(active_move.move_in_snapshot_name) and + not is_nil(active_move.boundary_txn_count) + end + + @spec split_buffer(t()) :: {[Transaction.t()], [Transaction.t()]} + def split_buffer(%__MODULE__{} = active_move) do + active_move.buffered_txns + |> Enum.reverse() + |> Enum.split(active_move.boundary_txn_count) + end + + @spec last_buffered_log_offset(t()) :: Electric.Replication.LogOffset.t() | nil + def last_buffered_log_offset(%__MODULE__{buffered_txns: []}), do: nil + + def last_buffered_log_offset(%__MODULE__{ + buffered_txns: [%Transaction{last_log_offset: log_offset} | _] + }), + do: log_offset + + defp maybe_set_boundary_from_txn( + %__MODULE__{boundary_txn_count: boundary} = active_move, + _txn + ) + when not is_nil(boundary), + do: active_move + + defp maybe_set_boundary_from_txn(%__MODULE__{snapshot: nil} = active_move, _txn), + do: active_move + + defp maybe_set_boundary_from_txn(%__MODULE__{} = active_move, %Transaction{} = txn) do + if Transaction.visible_in_snapshot?(txn, active_move.snapshot) do + active_move + else + %{active_move | boundary_txn_count: active_move.buffered_txn_count} + end + end + + defp maybe_set_boundary_from_snapshot(%__MODULE__{boundary_txn_count: boundary} = active_move) + when not is_nil(boundary), + do: active_move + + defp maybe_set_boundary_from_snapshot(%__MODULE__{snapshot: nil} = active_move), + do: active_move + + defp maybe_set_boundary_from_snapshot(%__MODULE__{} = active_move) do + case active_move.buffered_txns + |> Enum.reverse() + |> Enum.find_index(&(not Transaction.visible_in_snapshot?(&1, active_move.snapshot))) do + nil -> active_move + index -> %{active_move | boundary_txn_count: index} + end + end + + defp maybe_set_boundary_from_lsn( + %__MODULE__{boundary_txn_count: boundary} = active_move, + _lsn + ) + when not is_nil(boundary), + do: active_move + + defp maybe_set_boundary_from_lsn(%__MODULE__{move_in_lsn: nil} = active_move, _lsn), + do: active_move + + defp maybe_set_boundary_from_lsn(%__MODULE__{} = active_move, %Lsn{} = lsn) do + case Lsn.compare(lsn, active_move.move_in_lsn) do + :lt -> active_move + _ -> %{active_move | boundary_txn_count: active_move.buffered_txn_count} + end + end + + defp maybe_set_boundary_from_seen_lsn(%__MODULE__{latest_seen_lsn: nil} = active_move), + do: active_move + + defp maybe_set_boundary_from_seen_lsn(%__MODULE__{} = active_move) do + maybe_set_boundary_from_lsn(active_move, active_move.latest_seen_lsn) + end + + defp newer_lsn(nil, %Lsn{} = lsn), do: lsn + + defp newer_lsn(%Lsn{} = current, %Lsn{} = candidate) do + case Lsn.compare(current, candidate) do + :lt -> candidate + _ -> current + end + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/index_changes.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/index_changes.ex new file mode 100644 index 0000000000..cc150c0fbc --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/index_changes.ex @@ -0,0 +1,136 @@ +defmodule Electric.Shapes.Consumer.Subqueries.IndexChanges do + @moduledoc """ + Determines subquery index effects for dependency move events. + + The subquery index tracks which values are present in the dependency view. + When a move event occurs, the index must be updated — but the *timing* of + that update depends on whether the move triggers buffering. + + ## Broadening and narrowing + + While a move-in is being buffered, transactions continue to arrive and must + be filtered. To avoid missing relevant rows, the index is **broadened** + (made more permissive) as soon as buffering starts. Once the move-in query + completes and the splice is done, the index is **narrowed** back to its + final state. + + For a positive (`IN`) subquery: + - Adding values to the index broadens the filter (more rows match). + - So a move-in adds to the index when **buffering starts**. + + For a negated (`NOT IN`) subquery: + - Adding values to the index *narrows* the filter (fewer rows match). + - So a dependency move-in does **not** update the index when buffering starts + (keeping the filter broad); the add is deferred until **complete**. + - A dependency move-out broadens the filter by removing the value from the + index immediately, and that removal remains correct after the splice. + + ## Effect tables + + ### When buffering starts + + | Dep move | Polarity | Index effect | + |------------|----------|---------------------------| + | move_in | positive | AddToSubqueryIndex | + | move_in | negated | *(none)* | + | move_out | positive | *(none)* | + | move_out | negated | RemoveFromSubqueryIndex | + + ### When complete (splice finished, or immediate for non-buffering cases) + + | Dep move | Polarity | Index effect | + |------------|----------|---------------------------| + | move_in | positive | *(none)* | + | move_in | negated | AddToSubqueryIndex | + | move_out | positive | RemoveFromSubqueryIndex | + | move_out | negated | *(none)* | + + ## Caller conventions + + - **Non-buffering cases** (positive move-out, negated move-in): the move + completes atomically, so callers use `effects_for_complete/3`. + - **Buffering cases** (positive move-in, negated move-out): callers use + `effects_for_buffering/3` when entering buffering and + `effects_for_complete/3` at splice time. + """ + + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.DnfPlan + + @type move :: {:move_in | :move_out, non_neg_integer(), list()} + + @doc """ + Returns index effects to apply when a dependency move event starts buffering. + + Used only by buffering cases to broaden the filter before the move-in query + runs. Calling this for an immediate (non-buffering) move is a bug in the + caller. + """ + @spec effects_for_buffering(DnfPlan.t(), move(), [String.t()]) :: + [Effects.AddToSubqueryIndex.t() | Effects.RemoveFromSubqueryIndex.t()] + def effects_for_buffering(dnf_plan, {dep_move_kind, dep_index, values}, subquery_ref) do + polarity = Map.get(dnf_plan.dependency_polarities, dep_index, :positive) + + case {polarity, dep_move_kind} do + {:positive, :move_in} -> + [ + %Effects.AddToSubqueryIndex{ + dep_index: dep_index, + subquery_ref: subquery_ref, + values: values + } + ] + + {:negated, :move_out} -> + [ + %Effects.RemoveFromSubqueryIndex{ + dep_index: dep_index, + subquery_ref: subquery_ref, + values: values + } + ] + + other -> + raise ArgumentError, + "effects_for_buffering/3 only supports buffering cases, got #{inspect(other)}" + end + end + + @doc """ + Returns index effects to apply when a move event completes. + + For buffering cases this is called at splice time. For non-buffering cases + (where the move completes atomically) this is the only function called. + """ + @spec effects_for_complete(DnfPlan.t(), move(), [String.t()]) :: + [Effects.AddToSubqueryIndex.t() | Effects.RemoveFromSubqueryIndex.t()] + def effects_for_complete(dnf_plan, {dep_move_kind, dep_index, values}, subquery_ref) do + polarity = Map.get(dnf_plan.dependency_polarities, dep_index, :positive) + + case {polarity, dep_move_kind} do + {:positive, :move_in} -> + [] + + {:negated, :move_in} -> + [ + %Effects.AddToSubqueryIndex{ + dep_index: dep_index, + subquery_ref: subquery_ref, + values: values + } + ] + + {:positive, :move_out} -> + [ + %Effects.RemoveFromSubqueryIndex{ + dep_index: dep_index, + subquery_ref: subquery_ref, + values: values + } + ] + + {:negated, :move_out} -> + [] + end + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_broadcast.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_broadcast.ex new file mode 100644 index 0000000000..f901256057 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_broadcast.ex @@ -0,0 +1,86 @@ +defmodule Electric.Shapes.Consumer.Subqueries.MoveBroadcast do + # Builds the control messages that tell materializers which tag positions moved. + + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.Subqueries.ShapeInfo + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.SubqueryTags + + @type move_value() :: {term(), term()} + @type move() :: %{dep_index: non_neg_integer(), values: [move_value()]} + + @spec effect_for_move_in(move(), ShapeInfo.t()) :: %Effects.AppendControl{} + def effect_for_move_in(active_move, %ShapeInfo{} = shape_info) do + %Effects.AppendControl{ + message: + make( + shape_info.dnf_plan, + active_move.dep_index, + active_move.values, + "move-in", + shape_info.stack_id, + shape_info.shape_handle + ) + } + end + + @spec effect_for_move_out(non_neg_integer(), [move_value()], ShapeInfo.t()) :: + %Effects.AppendControl{} + def effect_for_move_out(dep_index, values, %ShapeInfo{} = shape_info) do + %Effects.AppendControl{ + message: + make( + shape_info.dnf_plan, + dep_index, + values, + "move-out", + shape_info.stack_id, + shape_info.shape_handle + ) + } + end + + @spec make( + DnfPlan.t(), + non_neg_integer(), + [move_value()], + String.t(), + String.t(), + String.t() + ) :: map() + defp make(plan, dep_index, values, event, stack_id, shape_handle) + when event in ["move-in", "move-out"] do + positions = Map.get(plan.dependency_positions, dep_index, []) + + patterns = + Enum.flat_map(positions, fn pos -> + info = plan.positions[pos] + + Enum.map(values, fn {_typed_value, original_value} -> + %{pos: pos, value: make_hash(info, stack_id, shape_handle, original_value)} + end) + end) + + %{headers: %{event: event, patterns: patterns}} + end + + defp make_hash(%{tag_columns: [_col]}, stack_id, shape_handle, value) do + SubqueryTags.make_value_hash(stack_id, shape_handle, value) + end + + defp make_hash( + %{tag_columns: {:hash_together, cols}}, + stack_id, + shape_handle, + original_value + ) do + parts = + original_value + |> Tuple.to_list() + |> Enum.zip_with(cols, fn value, column -> + column <> ":" <> SubqueryTags.namespace_value(value) + end) + + SubqueryTags.make_value_hash_raw(stack_id, shape_handle, Enum.join(parts)) + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_queue.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_queue.ex new file mode 100644 index 0000000000..02722b86b8 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/move_queue.ex @@ -0,0 +1,109 @@ +defmodule Electric.Shapes.Consumer.Subqueries.MoveQueue do + @moduledoc """ + Multi-dependency move queue. Tracks move_in/move_out operations per dependency index, + with deduplication and redundancy elimination scoped per dependency. + + Move-outs from any dependency are drained before move-ins from any dependency. + """ + + @type move_value() :: {term(), term()} + + # move_out/move_in are maps from dep_index to [move_value] + defstruct move_out: %{}, move_in: %{} + + @type t() :: %__MODULE__{ + move_out: %{non_neg_integer() => [move_value()]}, + move_in: %{non_neg_integer() => [move_value()]} + } + + @type batch_kind() :: :move_out | :move_in + @type batch() :: {batch_kind(), non_neg_integer(), [move_value()]} + + @spec new() :: t() + def new, do: %__MODULE__{} + + @spec length(t()) :: non_neg_integer() + def length(%__MODULE__{move_out: move_out, move_in: move_in}) do + count_values(move_out) + count_values(move_in) + end + + defp count_values(map) do + Enum.reduce(map, 0, fn {_, vs}, acc -> acc + Kernel.length(vs) end) + end + + @doc """ + Enqueue a materializer payload for a specific dependency. + `dep_view` is the current view for this dependency, used for redundancy elimination. + """ + @spec enqueue(t(), non_neg_integer(), map() | keyword(), MapSet.t()) :: t() + def enqueue(%__MODULE__{} = queue, dep_index, payload, %MapSet{} = dep_view) + when is_map(payload) or is_list(payload) do + payload = Map.new(payload) + + existing_outs = Map.get(queue.move_out, dep_index, []) + existing_ins = Map.get(queue.move_in, dep_index, []) + + ops = + Enum.map(existing_outs, &{:move_out, &1}) ++ + Enum.map(existing_ins, &{:move_in, &1}) ++ + payload_to_ops(payload) + + {new_outs, new_ins} = reduce(ops, dep_view) + + %__MODULE__{ + move_out: put_or_delete(queue.move_out, dep_index, new_outs), + move_in: put_or_delete(queue.move_in, dep_index, new_ins) + } + end + + @doc """ + Pop the next batch of operations. Returns move-out batches (any dep) before move-in batches. + Returns `{batch, updated_queue}` or `nil` if the queue is empty. + """ + @spec pop_next(t()) :: {batch(), t()} | nil + def pop_next(%__MODULE__{move_out: move_out} = queue) when move_out != %{} do + {dep_index, values} = Enum.min_by(move_out, &elem(&1, 0)) + {{:move_out, dep_index, values}, %{queue | move_out: Map.delete(move_out, dep_index)}} + end + + def pop_next(%__MODULE__{move_out: move_out, move_in: move_in} = queue) + when move_out == %{} and move_in != %{} do + {dep_index, values} = Enum.min_by(move_in, &elem(&1, 0)) + {{:move_in, dep_index, values}, %{queue | move_in: Map.delete(move_in, dep_index)}} + end + + def pop_next(%__MODULE__{}), do: nil + + defp payload_to_ops(payload) do + Enum.map(Map.get(payload, :move_out, []), &{:move_out, &1}) ++ + Enum.map(Map.get(payload, :move_in, []), &{:move_in, &1}) + end + + defp reduce(ops, base_view) do + terminal_ops = + ops + |> Enum.with_index() + |> Enum.reduce(%{}, fn {{kind, move_value}, index}, acc -> + Map.put(acc, elem(move_value, 0), %{kind: kind, move_value: move_value, index: index}) + end) + |> Map.values() + |> Enum.reject(&redundant?(&1, base_view)) + |> Enum.sort_by(& &1.index) + + { + for(%{kind: :move_out, move_value: move_value} <- terminal_ops, do: move_value), + for(%{kind: :move_in, move_value: move_value} <- terminal_ops, do: move_value) + } + end + + defp redundant?(%{kind: :move_in, move_value: {value, _}}, base_view) do + MapSet.member?(base_view, value) + end + + defp redundant?(%{kind: :move_out, move_value: {value, _}}, base_view) do + not MapSet.member?(base_view, value) + end + + defp put_or_delete(map, key, []), do: Map.delete(map, key) + defp put_or_delete(map, key, values), do: Map.put(map, key, values) +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/ref_resolver.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/ref_resolver.ex new file mode 100644 index 0000000000..14839a1047 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/ref_resolver.ex @@ -0,0 +1,35 @@ +defmodule Electric.Shapes.Consumer.Subqueries.RefResolver do + # Resolves canonical subquery refs from dependency handles and dependency indexes. + + @enforce_keys [:handle_to_ref, :index_to_ref] + defstruct [:handle_to_ref, :index_to_ref] + + @type ref() :: [String.t()] + @type t() :: %__MODULE__{ + handle_to_ref: %{String.t() => {non_neg_integer(), ref()}}, + index_to_ref: %{non_neg_integer() => ref()} + } + + @spec new(%{String.t() => {non_neg_integer(), ref()}}, %{non_neg_integer() => ref()}) :: t() + def new(handle_to_ref, index_to_ref) do + %__MODULE__{handle_to_ref: handle_to_ref, index_to_ref: index_to_ref} + end + + @spec ref_from_dep_handle!(t(), String.t()) :: ref() + def ref_from_dep_handle!(%__MODULE__{handle_to_ref: mapping}, dep_handle) do + case Map.fetch(mapping, dep_handle) do + {:ok, {_dep_index, ref}} -> + ref + + :error -> + raise ArgumentError, + "unexpected dependency handle #{inspect(dep_handle)}, " <> + "known: #{inspect(Map.keys(mapping))}" + end + end + + @spec ref_from_dep_index!(t(), non_neg_integer()) :: ref() + def ref_from_dep_index!(%__MODULE__{index_to_ref: mapping}, dep_index) do + Map.fetch!(mapping, dep_index) + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/shape_info.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/shape_info.ex new file mode 100644 index 0000000000..5f414b09c0 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/shape_info.ex @@ -0,0 +1,38 @@ +defmodule Electric.Shapes.Consumer.Subqueries.ShapeInfo do + # Holds the immutable shape-level data shared by the steady and buffering states. + + alias Electric.Shapes.Consumer.Subqueries.RefResolver + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Shape + + @type dependency_move_policy :: :stream_dependency_moves | :invalidate_on_dependency_move + + @enforce_keys [ + :shape, + :stack_id, + :shape_handle, + :dnf_plan, + :ref_resolver, + :buffer_max_transactions, + :dependency_move_policy + ] + defstruct [ + :shape, + :stack_id, + :shape_handle, + :dnf_plan, + :ref_resolver, + :buffer_max_transactions, + :dependency_move_policy + ] + + @type t() :: %__MODULE__{ + shape: Shape.t(), + stack_id: String.t(), + shape_handle: String.t(), + dnf_plan: DnfPlan.t(), + ref_resolver: RefResolver.t(), + buffer_max_transactions: pos_integer(), + dependency_move_policy: dependency_move_policy() + } +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/splice_plan.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/splice_plan.ex new file mode 100644 index 0000000000..76d2b99c8e --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/splice_plan.ex @@ -0,0 +1,61 @@ +defmodule Electric.Shapes.Consumer.Subqueries.SplicePlan do + @moduledoc false + + alias Electric.Replication.LogOffset + alias Electric.Shapes.Consumer.EffectList + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.Subqueries.ActiveMove + alias Electric.Shapes.Consumer.Subqueries.MoveBroadcast + alias Electric.Shapes.Consumer.Subqueries.ShapeInfo + alias Electric.Shapes.Consumer.TransactionConverter + + @enforce_keys [:effects] + defstruct [:effects, :flushed_log_offset] + + @type t() :: %__MODULE__{ + effects: [Effects.t()], + flushed_log_offset: LogOffset.t() | nil + } + + @spec build(ActiveMove.t(), ShapeInfo.t()) :: {:ok, t()} | {:error, term()} + def build(%ActiveMove{} = active_move, %ShapeInfo{} = shape_info) do + {pre_txns, post_txns} = ActiveMove.split_buffer(active_move) + + with {:ok, pre_ops} <- convert_txns(pre_txns, shape_info, active_move.views_before_move), + {:ok, post_ops} <- convert_txns(post_txns, shape_info, active_move.views_after_move) do + effects = + EffectList.new() + |> EffectList.append_all(pre_ops) + |> EffectList.append(MoveBroadcast.effect_for_move_in(active_move, shape_info)) + |> EffectList.append(move_in_snapshot_effect(active_move)) + |> EffectList.append_all(post_ops) + |> EffectList.to_list() + + {:ok, + %__MODULE__{ + effects: effects, + flushed_log_offset: ActiveMove.last_buffered_log_offset(active_move) + }} + end + end + + defp convert_txns(txns, %ShapeInfo{} = shape_info, views) when is_map(views) do + TransactionConverter.transactions_to_effects( + txns, + shape_info.shape, + stack_id: shape_info.stack_id, + shape_handle: shape_info.shape_handle, + extra_refs: {views, views}, + dnf_plan: shape_info.dnf_plan + ) + end + + defp move_in_snapshot_effect(%ActiveMove{} = active_move) do + %Effects.AppendMoveInSnapshot{ + snapshot_name: active_move.move_in_snapshot_name, + row_count: active_move.move_in_row_count, + row_bytes: active_move.move_in_row_bytes, + snapshot: active_move.snapshot + } + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/subqueries/views.ex b/packages/sync-service/lib/electric/shapes/consumer/subqueries/views.ex new file mode 100644 index 0000000000..5191d8ab7a --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/subqueries/views.ex @@ -0,0 +1,26 @@ +defmodule Electric.Shapes.Consumer.Subqueries.Views do + # Applies dependency move operations against the current subquery view map. + + @type ref() :: [String.t()] + @type t() :: %{ref() => MapSet.t()} + + @spec current(t(), ref()) :: MapSet.t() + def current(views, subquery_ref), do: Map.get(views, subquery_ref, MapSet.new()) + + @spec apply_move(t(), ref(), list(), :move_in | :move_out) :: t() + def apply_move(views, subquery_ref, values, :move_in) do + Map.update!(views, subquery_ref, fn view -> + Enum.reduce(values, view, fn {value, _original_value}, view -> + MapSet.put(view, value) + end) + end) + end + + def apply_move(views, subquery_ref, values, :move_out) do + Map.update!(views, subquery_ref, fn view -> + Enum.reduce(values, view, fn {value, _original_value}, view -> + MapSet.delete(view, value) + end) + end) + end +end diff --git a/packages/sync-service/lib/electric/shapes/consumer/transaction_converter.ex b/packages/sync-service/lib/electric/shapes/consumer/transaction_converter.ex new file mode 100644 index 0000000000..5ddb560b11 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/consumer/transaction_converter.ex @@ -0,0 +1,75 @@ +defmodule Electric.Shapes.Consumer.TransactionConverter do + # Converts transactions into append-change effects using Shape.convert_change/3. + + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Shape + + @type convert_opts() :: keyword() + + @spec transaction_to_effects(Transaction.t(), Shape.t(), convert_opts()) :: + {:ok, [Effects.t()]} | {:error, {:truncate, Changes.xid() | nil}} + def transaction_to_effects(%Transaction{} = txn, %Shape{} = shape, opts \\ []) + when is_list(opts) do + with {:ok, changes} <- convert_changes(txn, shape, opts) do + {:ok, append_effects(txn.xid, changes)} + end + end + + @spec transactions_to_effects([Transaction.t()], Shape.t(), convert_opts()) :: + {:ok, [Effects.t()]} | {:error, {:truncate, Changes.xid() | nil}} + def transactions_to_effects(txns, %Shape{} = shape, opts \\ []) + when is_list(txns) and is_list(opts) do + Enum.reduce_while(txns, {:ok, []}, fn txn, {:ok, acc} -> + case transaction_to_effects(txn, shape, opts) do + {:ok, []} -> + {:cont, {:ok, acc}} + + {:ok, effects} -> + {:cont, {:ok, [effects | acc]}} + + {:error, {:truncate, _xid}} = error -> + {:halt, error} + end + end) + |> case do + {:ok, effects} -> {:ok, effects |> Enum.reverse() |> List.flatten()} + {:error, {:truncate, _xid}} = error -> error + end + end + + defp convert_changes(%Transaction{} = txn, %Shape{} = shape, opts) when is_list(opts) do + txn.changes + |> Enum.reduce_while([], fn change, acc -> + case change do + %Changes.TruncatedRelation{} -> + {:halt, {:error, {:truncate, txn.xid}}} + + _ -> + converted = Shape.convert_change(shape, change, opts) + {:cont, [converted | acc]} + end + end) + |> case do + {:error, {:truncate, _xid}} = error -> + error + + converted -> + {:ok, converted |> Enum.reverse() |> List.flatten() |> mark_last_change()} + end + end + + defp append_effects(_xid, []), do: [] + + defp append_effects(xid, changes) do + [%Effects.AppendChanges{changes: changes, xid: xid}] + end + + defp mark_last_change([]), do: [] + + defp mark_last_change(changes) do + {last, rest} = List.pop_at(changes, -1) + rest ++ [%{last | last?: true}] + end +end diff --git a/packages/sync-service/lib/electric/shapes/dnf_plan.ex b/packages/sync-service/lib/electric/shapes/dnf_plan.ex new file mode 100644 index 0000000000..51c3ee61e3 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/dnf_plan.ex @@ -0,0 +1,197 @@ +defmodule Electric.Shapes.DnfPlan do + @moduledoc """ + A DNF sidecar plan compiled from a shape's WHERE clause. + + Decomposes the WHERE clause into Disjunctive Normal Form and enriches each + position with dependency metadata needed by downstream modules. + + Not stored on the Shape struct itself and compiled at runtime when needed. + """ + + alias Electric.Replication.Eval.Decomposer + alias Electric.Replication.Eval.Parser.{Func, Ref, RowExpr} + alias Electric.Replication.Eval.SqlGenerator + alias Electric.Utils + + defstruct [ + :disjuncts, + :disjuncts_positions, + :position_count, + :positions, + :dependency_positions, + :dependency_disjuncts, + :dependency_polarities + ] + + @type tag_columns :: [String.t()] | {:hash_together, [String.t()]} + + @type position_info :: %{ + ast: term(), + sql: String.t(), + is_subquery: boolean(), + negated: boolean(), + dependency_index: non_neg_integer() | nil, + subquery_ref: [String.t()] | nil, + tag_columns: tag_columns() | nil + } + + @type t :: %__MODULE__{ + disjuncts: Decomposer.dnf(), + disjuncts_positions: [[Decomposer.position()]], + position_count: non_neg_integer(), + positions: %{Decomposer.position() => position_info()}, + dependency_positions: %{non_neg_integer() => [Decomposer.position()]}, + dependency_disjuncts: %{non_neg_integer() => [non_neg_integer()]}, + dependency_polarities: %{non_neg_integer() => :positive | :negated} + } + + @doc """ + Compile a DNF plan from a shape. + + Returns `{:ok, plan}` for shapes with subquery dependencies, + `:no_subqueries` for shapes without, or `{:error, reason}` if + decomposition fails. + """ + @spec compile(Electric.Shapes.Shape.t()) :: {:ok, t()} | :no_subqueries | {:error, term()} + def compile(shape) do + if is_nil(shape.where) or shape.shape_dependencies == [] do + :no_subqueries + else + do_compile(shape) + end + end + + defp do_compile(shape) do + with {:ok, decomposition} <- Decomposer.decompose(shape.where.eval) do + positions = enrich_positions(decomposition.subexpressions, shape) + + case build_dependency_polarities(positions) do + {:ok, dependency_polarities} -> + {:ok, + %__MODULE__{ + disjuncts: decomposition.disjuncts, + disjuncts_positions: decomposition.disjuncts_positions, + position_count: decomposition.position_count, + positions: positions, + dependency_positions: build_dependency_positions(positions), + dependency_disjuncts: build_dependency_disjuncts(decomposition.disjuncts, positions), + dependency_polarities: dependency_polarities + }} + + {:error, _} = error -> + error + end + end + end + + defp enrich_positions(subexpressions, shape) do + Map.new(subexpressions, fn {pos, subexpr} -> + {dep_index, subquery_ref, tag_columns} = + if subexpr.is_subquery do + extract_subquery_info(subexpr.ast) + else + {nil, nil, nil} + end + + {pos, + %{ + ast: subexpr.ast, + sql: position_sql(subexpr.ast, subexpr.is_subquery, shape), + is_subquery: subexpr.is_subquery, + negated: subexpr.negated, + dependency_index: dep_index, + subquery_ref: subquery_ref, + tag_columns: tag_columns + }} + end) + end + + defp extract_subquery_info(%Func{ + name: "sublink_membership_check", + args: [testexpr, %Ref{path: path}] + }) do + dep_index = path |> List.last() |> String.to_integer() + + tag_columns = + case testexpr do + %Ref{path: [column_name]} -> + [column_name] + + %RowExpr{elements: elements} -> + {:hash_together, Enum.map(elements, fn %Ref{path: [col]} -> col end)} + end + + {dep_index, path, tag_columns} + end + + defp build_dependency_positions(positions) do + positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Enum.group_by(fn {_pos, info} -> info.dependency_index end, fn {pos, _} -> pos end) + |> Map.new(fn {idx, poses} -> {idx, Enum.sort(poses)} end) + end + + defp build_dependency_disjuncts(disjuncts, positions) do + disjuncts + |> Enum.with_index() + |> Enum.reduce(%{}, fn {conj, disjunct_idx}, acc -> + Enum.reduce(conj, acc, fn {pos, _polarity}, acc -> + case Map.get(positions, pos) do + %{is_subquery: true, dependency_index: idx} when not is_nil(idx) -> + Map.update(acc, idx, MapSet.new([disjunct_idx]), &MapSet.put(&1, disjunct_idx)) + + _ -> + acc + end + end) + end) + |> Map.new(fn {idx, set} -> {idx, set |> MapSet.to_list() |> Enum.sort()} end) + end + + defp build_dependency_polarities(positions) do + positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Enum.group_by( + fn {_pos, info} -> info.dependency_index end, + fn {_pos, info} -> info.negated end + ) + |> Enum.reduce_while({:ok, %{}}, fn {dep_index, negated_flags}, {:ok, acc} -> + case Enum.uniq(negated_flags) do + [false] -> + {:cont, {:ok, Map.put(acc, dep_index, :positive)}} + + [true] -> + {:cont, {:ok, Map.put(acc, dep_index, :negated)}} + + _mixed -> + {:halt, + {:error, + "a subquery dependency cannot be used with both positive and negative polarity in the same filter"}} + end + end) + end + + defp position_sql(ast, false, _shape), do: SqlGenerator.to_sql(ast) + + defp position_sql( + %Func{name: "sublink_membership_check", args: [testexpr, %Ref{path: path}]}, + true, + shape + ) do + dep_index = path |> List.last() |> String.to_integer() + dependency = Enum.fetch!(shape.shape_dependencies, dep_index) + + selected_columns = + dependency.explicitly_selected_columns + |> Enum.map_join(", ", &Utils.quote_name/1) + + dependency_sql = + "SELECT " <> + selected_columns <> + " FROM " <> + Utils.relation_to_sql(dependency.root_table) <> + if(dependency.where, do: " WHERE " <> dependency.where.query, else: "") + + SqlGenerator.to_sql(testexpr) <> " IN (" <> dependency_sql <> ")" + end +end diff --git a/packages/sync-service/lib/electric/shapes/filter.ex b/packages/sync-service/lib/electric/shapes/filter.ex index 648470d6d5..2117bef4ed 100644 --- a/packages/sync-service/lib/electric/shapes/filter.ex +++ b/packages/sync-service/lib/electric/shapes/filter.ex @@ -18,15 +18,11 @@ defmodule Electric.Shapes.Filter do alias Electric.Replication.Changes.Relation alias Electric.Replication.Changes.TruncatedRelation alias Electric.Replication.Changes.UpdatedRecord - alias Electric.Replication.Eval - alias Electric.Replication.Eval.Parser.Func - alias Electric.Replication.Eval.Parser.Ref - alias Electric.Replication.Eval.Walker - alias Electric.Shapes.Consumer.Materializer + alias Electric.Shapes.DnfPlan alias Electric.Shapes.Filter + alias Electric.Shapes.Filter.Indexes.SubqueryIndex alias Electric.Shapes.Filter.WhereCondition alias Electric.Shapes.Shape - alias Electric.Shapes.WhereClause alias Electric.Telemetry.OpenTelemetry require Logger @@ -37,15 +33,7 @@ defmodule Electric.Shapes.Filter do :where_cond_table, :eq_index_table, :incl_index_table, - :refs_fun, - :stack_id, - # {relation, field_name} -> [{dep_handle, field_type}] - :sublink_field_table, - # dep_handle -> MapSet(outer_shape_ids) - :sublink_dep_table, - # MapSet of shape_ids registered in the inverted index — enables O(1) membership - # check in the hot path without loading the shape or touching dep ETS tables. - :sublink_shapes_set + :subquery_index ] @type t :: %Filter{} @@ -59,11 +47,7 @@ defmodule Electric.Shapes.Filter do where_cond_table: :ets.new(:filter_where, [:set, :private]), eq_index_table: :ets.new(:filter_eq, [:set, :private]), incl_index_table: :ets.new(:filter_incl, [:set, :private]), - refs_fun: Keyword.get(opts, :refs_fun, fn _shape -> %{} end), - stack_id: Keyword.get(opts, :stack_id), - sublink_field_table: :ets.new(:filter_sublink_field, [:set, :private]), - sublink_dep_table: :ets.new(:filter_sublink_dep, [:set, :private]), - sublink_shapes_set: MapSet.new() + subquery_index: SubqueryIndex.new(Keyword.take(opts, [:stack_id])) } end @@ -80,15 +64,10 @@ defmodule Electric.Shapes.Filter do @doc """ Returns `true` when ShapeLogCollector can route the shape through any of its indexes instead of relying exclusively on `other_shapes` scans. - - This includes both the primary equality/inclusion indexes and the sublink - inverted index used for dependency-driven subquery routing. """ @spec indexed_shape?(Shape.t()) :: boolean() def indexed_shape?(%Shape{} = shape) do - WhereCondition.indexed_where?(shape.where) or - (Shape.dependency_handles_known?(shape) and - map_size(extract_sublink_fields(shape.where)) > 0) + WhereCondition.indexed_where?(shape.where) end @doc """ @@ -106,17 +85,22 @@ defmodule Electric.Shapes.Filter do where_cond_id = get_or_create_table_condition(filter, shape.root_table) WhereCondition.add_shape(filter, where_cond_id, shape_id, shape.where) + maybe_register_subquery_shape(filter, shape_id, shape) - # Only register in the inverted index when the WHERE is non-optimisable - # (landed in other_shapes). Indexed dep shapes use the equality/inclusion path. - if shape.shape_dependencies_handles != [] and - in_other_shapes?(filter, where_cond_id, shape_id) do - register_sublink_shape(filter, shape_id, shape) - else - filter - end + filter end + defp maybe_register_subquery_shape( + %Filter{subquery_index: index}, + shape_id, + %Shape{shape_dependencies: [_ | _]} = shape + ) do + {:ok, plan} = DnfPlan.compile(shape) + SubqueryIndex.register_shape(index, shape_id, plan) + end + + defp maybe_register_subquery_shape(_filter, _shape_id, _shape), do: :ok + defp get_or_create_table_condition(filter, table_name) do case :ets.lookup(filter.tables_table, table_name) do [] -> @@ -145,18 +129,22 @@ defmodule Electric.Shapes.Filter do :ok -> :ok end - filter = - if registered_in_inverted_index?(filter, shape_id) do - unregister_sublink_shape(filter, shape_id, shape) - else - filter - end - + maybe_unregister_subquery_shape(filter, shape_id, shape) :ets.delete(filter.shapes_table, shape_id) filter end + defp maybe_unregister_subquery_shape( + %Filter{subquery_index: index}, + shape_id, + %Shape{shape_dependencies: [_ | _]} + ) do + SubqueryIndex.unregister_shape(index, shape_id) + end + + defp maybe_unregister_subquery_shape(_filter, _shape_id, _shape), do: :ok + @doc """ Returns the shape IDs for all shapes that have been added to the filter that are affected by the given change. @@ -218,91 +206,16 @@ defmodule Electric.Shapes.Filter do end defp shapes_affected_by_record(filter, table_name, record) do - where_cond_results = + candidates_from_where_condition = case :ets.lookup(filter.tables_table, table_name) do - [] -> MapSet.new() - [{_, where_cond_id}] -> WhereCondition.affected_shapes(filter, where_cond_id, record) - end + [] -> + MapSet.new() - MapSet.union(where_cond_results, sublink_affected_shapes(filter, table_name, record)) - end - - # Inverted-index lookup for dep shapes that live in other_shapes. - # Returns affected outer shapes in O(fields × dep_handles_per_field) instead - # of the O(N×D) loop that WhereCondition.other_shapes_affected would do. - defp sublink_affected_shapes(%Filter{stack_id: nil}, _table_name, _record), do: MapSet.new() - - defp sublink_affected_shapes(filter, table_name, record) do - link_values_table = Materializer.link_values_table_name(filter.stack_id) - - candidates = - Enum.reduce(record, MapSet.new(), fn {field_name, string_value}, acc -> - case :ets.lookup(filter.sublink_field_table, {table_name, field_name}) do - [] -> - acc - - [{_, dep_infos}] -> - Enum.reduce(dep_infos, acc, fn {dep_handle, field_type}, inner_acc -> - if record_matches_dep?( - link_values_table, - dep_handle, - field_type, - string_value - ) do - union_shapes_for_dep(filter, dep_handle, inner_acc) - else - inner_acc - end - end) - end - end) - - OpenTelemetry.add_span_attributes("filter.sublink_candidates_count": MapSet.size(candidates)) - - # Re-evaluate full WHERE for candidates to handle any non-sublink conditions - OpenTelemetry.timed_fun("filter.sublink_reeval.duration_µs", fn -> - for shape_id <- candidates, - shape = get_shape(filter, shape_id), - not is_nil(shape), - WhereClause.includes_record?(shape.where, record, filter.refs_fun.(shape)), - into: MapSet.new() do - shape_id + [{_, where_cond_id}] -> + WhereCondition.affected_shapes(filter, where_cond_id, record) end - end) - rescue - # The named ETS table may not exist during a ConsumerRegistry restart window. - # Return empty rather than propagating to the broad "return all shapes" fallback. - ArgumentError -> MapSet.new() - end - # Returns true if the record's field value is present in the dep handle's - # cached link values, or if no cached values exist yet (optimistic inclusion). - defp record_matches_dep?(link_values_table, dep_handle, _field_type, nil = _string_value) do - # Null field values never match link values, but we still include - # candidates when no cache exists (materializer not started). - :ets.lookup(link_values_table, dep_handle) == [] - end - - defp record_matches_dep?(link_values_table, dep_handle, field_type, string_value) do - case :ets.lookup(link_values_table, dep_handle) do - [] -> - # No cached values yet (materializer not started) -- include as candidate - # so the re-eval via refs_fun handles it correctly. - true - - [{_, linked_values}] -> - case Eval.Env.parse_const(Eval.Env.new(), string_value, field_type) do - {:ok, parsed_value} -> MapSet.member?(linked_values, parsed_value) - _ -> false - end - end - end - - defp union_shapes_for_dep(filter, dep_handle, acc) do - case :ets.lookup(filter.sublink_dep_table, dep_handle) do - [{_, shape_ids}] -> MapSet.union(acc, shape_ids) - [] -> acc - end + candidates_from_where_condition end defp all_shape_ids(%Filter{} = filter) do @@ -316,10 +229,13 @@ defmodule Electric.Shapes.Filter do end defp shape_ids_for_table(%Filter{} = filter, table_name) do - case :ets.lookup(filter.tables_table, table_name) do - [] -> MapSet.new() - [{_, where_cond_id}] -> WhereCondition.all_shape_ids(filter, where_cond_id) - end + from_where_condition = + case :ets.lookup(filter.tables_table, table_name) do + [] -> MapSet.new() + [{_, where_cond_id}] -> WhereCondition.all_shape_ids(filter, where_cond_id) + end + + from_where_condition end @doc """ @@ -333,132 +249,7 @@ defmodule Electric.Shapes.Filter do end @doc """ - Returns true if a dep shape is registered in the sublink inverted index. - - Only dep shapes in top-level other_shapes (non-optimisable WHERE) are registered. - Dep shapes that go through an equality index end up in nested other_shapes and - must be evaluated normally by `other_shapes_affected`. + Get the subquery index. Used by consumers to seed/update membership. """ - @spec registered_in_inverted_index?(t(), shape_id()) :: boolean() - def registered_in_inverted_index?(%Filter{sublink_shapes_set: set}, shape_id), - do: MapSet.member?(set, shape_id) - - defp in_other_shapes?(filter, where_cond_id, shape_id) do - case :ets.lookup(filter.where_cond_table, where_cond_id) do - [{_, {_index_keys, other_shapes}}] -> Map.has_key?(other_shapes, shape_id) - [] -> false - end - end - - # Walks the WHERE expression tree and returns a map of - # %{sublink_index => {field_name, field_type}} for each - # sublink_membership_check node with a simple field reference on the left. - # Returns an empty map for nil or complex (RowExpr) left-hand sides. - defp extract_sublink_fields(nil), do: %{} - - defp extract_sublink_fields(%{eval: eval}) do - Walker.reduce!( - eval, - fn - %Func{ - name: "sublink_membership_check", - args: [ - %Ref{path: [field_name], type: field_type}, - %Ref{path: ["$sublink", n_str]} - ] - }, - acc, - _ -> - {:ok, Map.put(acc, String.to_integer(n_str), {field_name, field_type})} - - _, acc, _ -> - {:ok, acc} - end, - %{} - ) - end - - defp register_sublink_shape(filter, shape_id, shape) do - sublink_fields = extract_sublink_fields(shape.where) - - for {sublink_index, {field_name, field_type}} <- sublink_fields do - dep_handle = Enum.at(shape.shape_dependencies_handles, sublink_index) - - field_key = {shape.root_table, field_name} - - existing_entries = - case :ets.lookup(filter.sublink_field_table, field_key) do - [{_, entries}] -> entries - [] -> [] - end - - unless Enum.any?(existing_entries, fn {h, _} -> h == dep_handle end) do - :ets.insert( - filter.sublink_field_table, - {field_key, [{dep_handle, field_type} | existing_entries]} - ) - end - - existing_shapes = - case :ets.lookup(filter.sublink_dep_table, dep_handle) do - [{_, shapes}] -> shapes - [] -> MapSet.new() - end - - :ets.insert(filter.sublink_dep_table, {dep_handle, MapSet.put(existing_shapes, shape_id)}) - end - - # RowExpr subqueries (e.g. `(a, b) IN (SELECT ...)`) produce no indexable fields; - # those shapes stay in other_shapes and must not be marked as indexed. - if map_size(sublink_fields) > 0 do - %{filter | sublink_shapes_set: MapSet.put(filter.sublink_shapes_set, shape_id)} - else - filter - end - end - - defp unregister_sublink_shape(filter, shape_id, shape) do - sublink_fields = extract_sublink_fields(shape.where) - - for {sublink_index, {field_name, _field_type}} <- sublink_fields do - dep_handle = Enum.at(shape.shape_dependencies_handles, sublink_index) - - dep_now_empty? = - case :ets.lookup(filter.sublink_dep_table, dep_handle) do - [{_, shapes}] -> - new_shapes = MapSet.delete(shapes, shape_id) - - if MapSet.size(new_shapes) == 0 do - :ets.delete(filter.sublink_dep_table, dep_handle) - true - else - :ets.insert(filter.sublink_dep_table, {dep_handle, new_shapes}) - false - end - - [] -> - true - end - - if dep_now_empty? do - field_key = {shape.root_table, field_name} - - case :ets.lookup(filter.sublink_field_table, field_key) do - [{_, entries}] -> - new_entries = Enum.reject(entries, fn {h, _} -> h == dep_handle end) - - if new_entries == [] do - :ets.delete(filter.sublink_field_table, field_key) - else - :ets.insert(filter.sublink_field_table, {field_key, new_entries}) - end - - [] -> - :ok - end - end - end - - %{filter | sublink_shapes_set: MapSet.delete(filter.sublink_shapes_set, shape_id)} - end + def subquery_index(%Filter{subquery_index: index}), do: index end diff --git a/packages/sync-service/lib/electric/shapes/filter/index.ex b/packages/sync-service/lib/electric/shapes/filter/index.ex index 9ac79afcab..001230feba 100644 --- a/packages/sync-service/lib/electric/shapes/filter/index.ex +++ b/packages/sync-service/lib/electric/shapes/filter/index.ex @@ -10,9 +10,11 @@ defmodule Electric.Shapes.Filter.Index do alias Electric.Shapes.Filter alias Electric.Shapes.Filter.Indexes.EqualityIndex alias Electric.Shapes.Filter.Indexes.InclusionIndex + alias Electric.Shapes.Filter.Indexes.SubqueryIndex defp module_for("="), do: EqualityIndex defp module_for("@>"), do: InclusionIndex + defp module_for("subquery"), do: SubqueryIndex # "in" delegates to EqualityIndex, registering the shape under each value def add_shape(%Filter{} = filter, where_cond_id, shape_id, %{operation: "in"} = optimisation) do diff --git a/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex b/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex new file mode 100644 index 0000000000..a34d29d962 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/filter/indexes/subquery_index.ex @@ -0,0 +1,479 @@ +defmodule Electric.Shapes.Filter.Indexes.SubqueryIndex do + # Index for subquery routing and exact membership. + + # Each subquery predicate in the filter tree registers a node identified by + # `{condition_id, field_key}`. For each node, this table acts as a reverse + # index from the value seen on the root-table record to the shapes whose + # current subquery view makes that value relevant at that node. + + # Each shape consumer maintains its own entries in the index. On startup it + # seeds the node memberships for its current dependency views, then updates + # only those memberships as its subquery views change. This keeps the filter's + # materialized view of subquery membership aligned with the view that shape + # currently needs, without re-evaluating subqueries globally. + + # The same table also stores exact `shape_handle + subquery_ref + typed_value` + # membership rows used by `WhereClause.includes_record?/3` when the filter + # needs to verify subquery membership for a specific shape. + + # Shapes begin in a fallback set until their consumer has loaded and seeded + # that local state. Fallback routing is needed for restored or lazily started + # consumers: before their subquery view is available we still need to route + # root-table changes conservatively so the shape can be started and brought up + # to date. `mark_ready/2` removes the shape from fallback once its index + # entries reflect the consumer's current view. + @moduledoc false + + import Electric, only: [is_stack_id: 1] + + alias Electric.Replication.Eval.Expr + alias Electric.Replication.Eval.Runner + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Filter + alias Electric.Shapes.Filter.WhereCondition + + @type t :: :ets.tid() | atom() + @type node_id :: {reference(), term()} + + defp table_name(stack_id) when is_stack_id(stack_id), do: :"subquery_index:#{stack_id}" + + @doc """ + Create a new SubqueryIndex ETS table. + + The table is `:public` so consumer processes can seed and update membership + while the filter reads candidates during routing. + """ + @spec new(keyword()) :: t() + def new(opts \\ []) do + case Keyword.get(opts, :stack_id) do + nil -> + :ets.new(:subquery_index, [:bag, :public]) + + stack_id -> + :ets.new(table_name(stack_id), [:bag, :public, :named_table]) + end + end + + @doc """ + Look up the SubqueryIndex table for a stack. + """ + @spec for_stack(String.t()) :: t() | nil + def for_stack(stack_id) when is_stack_id(stack_id) do + case :ets.whereis(table_name(stack_id)) do + :undefined -> nil + _tid -> table_name(stack_id) + end + end + + @doc """ + Register per-shape exact membership metadata from a compiled DnfPlan. + + Node-local routing metadata is registered by `add_shape/4` when the filter + adds the shape to a concrete subquery node. + """ + @spec register_shape(t(), term(), DnfPlan.t()) :: :ok + def register_shape(table, shape_handle, %DnfPlan{} = plan) do + polarities = + plan.positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Map.new(fn {_pos, info} -> + {info.subquery_ref, if(info.negated, do: :negated, else: :positive)} + end) + + for {subquery_ref, polarity} <- polarities do + :ets.insert(table, {{:polarity, shape_handle, subquery_ref}, polarity}) + end + + :ets.insert(table, {{:fallback, shape_handle}, true}) + + :ok + end + + @doc """ + Remove all exact membership metadata for a shape. + """ + @spec unregister_shape(t(), term()) :: :ok + def unregister_shape(table, shape_handle) do + :ets.match_delete(table, {{:membership, shape_handle, :_, :_}, true}) + :ets.match_delete(table, {{:polarity, shape_handle, :_}, :_}) + :ets.match_delete(table, {{:shape_node, shape_handle}, :_}) + :ets.match_delete(table, {{:shape_dep_node, shape_handle, :_}, :_}) + :ets.delete(table, {:fallback, shape_handle}) + :ok + end + + @doc """ + Register a shape on a concrete subquery filter node. + """ + @spec add_shape(Filter.t(), reference(), term(), map()) :: :ok + def add_shape(%Filter{subquery_index: table} = filter, condition_id, shape_id, optimisation) do + node_id = {condition_id, optimisation.field} + next_condition_id = make_ref() + + WhereCondition.init(filter, next_condition_id) + WhereCondition.add_shape(filter, next_condition_id, shape_id, optimisation.and_where) + + ensure_node_meta(table, node_id, optimisation.testexpr) + + :ets.insert( + table, + {{:node_shape, node_id}, + {shape_id, optimisation.dep_index, optimisation.polarity, next_condition_id}} + ) + + if optimisation.polarity == :negated do + :ets.insert(table, {{:node_negated_shape, node_id}, {shape_id, next_condition_id}}) + end + + :ets.insert( + table, + {{:shape_node, shape_id}, + {node_id, optimisation.dep_index, optimisation.polarity, next_condition_id}} + ) + + :ets.insert( + table, + {{:shape_dep_node, shape_id, optimisation.dep_index}, + {node_id, optimisation.polarity, next_condition_id}} + ) + + :ets.insert(table, {{:node_fallback, node_id}, {shape_id, next_condition_id}}) + :ok + end + + @doc """ + Remove a shape from a concrete subquery filter node. + """ + @spec remove_shape(Filter.t(), reference(), term(), map()) :: :deleted | :ok + def remove_shape(%Filter{subquery_index: table} = filter, condition_id, shape_id, optimisation) do + node_id = {condition_id, optimisation.field} + + case node_shape_entry_for_shape(table, shape_id, node_id) do + nil -> + :deleted + + {dep_index, polarity, next_condition_id} -> + _ = + WhereCondition.remove_shape(filter, next_condition_id, shape_id, optimisation.and_where) + + :ets.match_delete( + table, + {{:node_shape, node_id}, {shape_id, dep_index, polarity, next_condition_id}} + ) + + if polarity == :negated do + :ets.match_delete( + table, + {{:node_negated_shape, node_id}, {shape_id, next_condition_id}} + ) + end + + :ets.match_delete( + table, + {{:shape_node, shape_id}, {node_id, dep_index, polarity, next_condition_id}} + ) + + :ets.match_delete( + table, + {{:shape_dep_node, shape_id, dep_index}, {node_id, polarity, next_condition_id}} + ) + + :ets.match_delete(table, {{:node_fallback, node_id}, {shape_id, next_condition_id}}) + delete_node_members(table, node_id, shape_id, polarity, next_condition_id) + + if node_empty?(table, node_id) do + :ets.delete(table, {:node_meta, node_id}) + :deleted + else + :ok + end + end + end + + @doc """ + Seed membership entries from a dependency view. + """ + @spec seed_membership(t(), term(), [String.t()], non_neg_integer(), MapSet.t()) :: :ok + def seed_membership(table, shape_handle, subquery_ref, dep_index, view) do + for value <- view do + add_value(table, shape_handle, subquery_ref, dep_index, value) + end + + :ok + end + + @doc """ + Mark a shape as ready for indexed routing. + """ + @spec mark_ready(t(), term()) :: :ok + def mark_ready(table, shape_handle) do + :ets.delete(table, {:fallback, shape_handle}) + + for {node_id, _dep_index, _polarity, _next_condition_id} <- + nodes_for_shape(table, shape_handle) do + :ets.match_delete(table, {{:node_fallback, node_id}, {shape_handle, :_}}) + end + + :ok + end + + @doc """ + Add a value to both the node-local routing index and the exact membership set. + """ + @spec add_value(t(), term(), [String.t()], non_neg_integer(), term()) :: :ok + def add_value(table, shape_handle, subquery_ref, dep_index, value) do + for {node_id, polarity, next_condition_id} <- + nodes_for_shape_dependency(table, shape_handle, dep_index) do + case polarity do + :positive -> + :ets.insert( + table, + {{:node_positive_member, node_id, value}, {shape_handle, next_condition_id}} + ) + + :negated -> + :ets.insert( + table, + {{:node_negated_member, node_id, value}, {shape_handle, next_condition_id}} + ) + end + end + + :ets.insert(table, {{:membership, shape_handle, subquery_ref, value}, true}) + :ok + end + + @doc """ + Remove a value from both the node-local routing index and the exact membership set. + """ + @spec remove_value(t(), term(), [String.t()], non_neg_integer(), term()) :: :ok + def remove_value(table, shape_handle, subquery_ref, dep_index, value) do + for {node_id, polarity, next_condition_id} <- + nodes_for_shape_dependency(table, shape_handle, dep_index) do + case polarity do + :positive -> + :ets.match_delete( + table, + {{:node_positive_member, node_id, value}, {shape_handle, next_condition_id}} + ) + + :negated -> + :ets.match_delete( + table, + {{:node_negated_member, node_id, value}, {shape_handle, next_condition_id}} + ) + end + end + + :ets.delete(table, {:membership, shape_handle, subquery_ref, value}) + :ok + end + + @doc """ + Get affected shape handles for a specific subquery node. + """ + @spec affected_shapes(Filter.t(), reference(), term(), map()) :: MapSet.t() + def affected_shapes(%Filter{subquery_index: table} = filter, condition_id, field_key, record) do + node_id = {condition_id, field_key} + + candidates = + case evaluate_node_lhs(table, node_id, record) do + {:ok, typed_value} -> + positive = + values_for_key(table, {:node_positive_member, node_id, typed_value}) |> MapSet.new() + + negated = + MapSet.difference( + values_for_key(table, {:node_negated_shape, node_id}) |> MapSet.new(), + values_for_key(table, {:node_negated_member, node_id, typed_value}) |> MapSet.new() + ) + + fallback = values_for_key(table, {:node_fallback, node_id}) |> MapSet.new() + + positive + |> MapSet.union(negated) + |> MapSet.union(fallback) + + :error -> + all_node_shapes(table, node_id) + end + + Enum.reduce(candidates, MapSet.new(), fn {_shape_id, next_condition_id}, acc -> + MapSet.union( + acc, + WhereCondition.affected_shapes(filter, next_condition_id, record) + ) + end) + end + + @doc """ + Get all shape ids registered on a specific subquery node. + """ + @spec all_shape_ids(Filter.t(), reference(), term()) :: MapSet.t() + def all_shape_ids(%Filter{subquery_index: table} = filter, condition_id, field_key) do + table + |> all_node_shapes({condition_id, field_key}) + |> Enum.reduce(MapSet.new(), fn {_shape_id, next_condition_id}, acc -> + MapSet.union(acc, WhereCondition.all_shape_ids(filter, next_condition_id)) + end) + end + + @doc """ + Check if a specific shape has a value in its current dependency view + for a canonical subquery ref. + """ + @spec member?(t(), term(), [String.t()], term()) :: boolean() + def member?(table, shape_handle, subquery_ref, typed_value) do + :ets.member(table, {:membership, shape_handle, subquery_ref, typed_value}) + end + + @doc """ + Check subquery membership for exact evaluation, falling back to the shape's + dependency polarity while the shape is still unseeded. + """ + @spec membership_or_fallback?(t(), term(), [String.t()], term()) :: boolean() + def membership_or_fallback?(table, shape_handle, subquery_ref, typed_value) do + if shape_ready?(table, shape_handle) do + member?(table, shape_handle, subquery_ref, typed_value) + else + case polarity_for_shape_ref(table, shape_handle, subquery_ref) do + :positive -> true + :negated -> false + end + end + end + + @doc """ + Check if a shape is in the fallback set. + """ + @spec fallback?(t(), term()) :: boolean() + def fallback?(table, shape_handle) do + :ets.member(table, {:fallback, shape_handle}) + end + + @doc """ + Check if a shape has any registered subquery nodes. + """ + @spec has_positions?(t(), term()) :: boolean() + def has_positions?(table, shape_handle) do + nodes_for_shape(table, shape_handle) != [] + end + + @doc """ + Return the registered node ids for a shape. + """ + @spec positions_for_shape(t(), term()) :: [node_id()] + def positions_for_shape(table, shape_handle) do + table + |> nodes_for_shape(shape_handle) + |> Enum.map(fn {node_id, _dep_index, _polarity, _next_condition_id} -> node_id end) + end + + defp ensure_node_meta(table, node_id, testexpr) do + case :ets.lookup(table, {:node_meta, node_id}) do + [] -> + :ets.insert(table, {{:node_meta, node_id}, %{testexpr: testexpr}}) + + _ -> + :ok + end + end + + defp delete_node_members(table, node_id, shape_id, polarity, next_condition_id) do + case polarity do + :positive -> + :ets.match_delete( + table, + {{:node_positive_member, node_id, :_}, {shape_id, next_condition_id}} + ) + + :negated -> + :ets.match_delete( + table, + {{:node_negated_member, node_id, :_}, {shape_id, next_condition_id}} + ) + end + end + + defp nodes_for_shape(table, shape_handle) do + table + |> :ets.lookup({:shape_node, shape_handle}) + |> Enum.map(&elem(&1, 1)) + end + + defp nodes_for_shape_dependency(table, shape_handle, dep_index) do + table + |> :ets.lookup({:shape_dep_node, shape_handle, dep_index}) + |> Enum.map(&elem(&1, 1)) + end + + defp node_shape_entry_for_shape(table, shape_id, node_id) do + table + |> nodes_for_shape(shape_id) + |> Enum.find_value(fn + {^node_id, dep_index, polarity, next_condition_id} -> + {dep_index, polarity, next_condition_id} + + _ -> + nil + end) + end + + defp node_empty?(table, node_id) do + :ets.lookup(table, {:node_shape, node_id}) == [] + end + + defp all_node_shapes(table, node_id) do + table + |> :ets.lookup({:node_shape, node_id}) + |> Enum.reduce(MapSet.new(), fn + {{:node_shape, ^node_id}, {shape_id, _dep_index, _polarity, next_condition_id}}, acc -> + MapSet.put(acc, {shape_id, next_condition_id}) + + _, acc -> + acc + end) + end + + defp evaluate_node_lhs(table, node_id, record) do + case :ets.lookup(table, {:node_meta, node_id}) do + [{_, %{testexpr: testexpr}}] -> + expr = Expr.wrap_parser_part(testexpr) + + case Runner.record_to_ref_values(expr.used_refs, record) do + {:ok, ref_values} -> + case Runner.execute(expr, ref_values) do + {:ok, value} -> {:ok, value} + _ -> :error + end + + _ -> + :error + end + + [] -> + :error + end + end + + defp values_for_key(table, key) do + table + |> :ets.lookup(key) + |> Enum.map(&elem(&1, 1)) + end + + defp shape_ready?(table, shape_handle) do + not fallback?(table, shape_handle) + end + + defp polarity_for_shape_ref(table, shape_handle, subquery_ref) do + case :ets.lookup(table, {:polarity, shape_handle, subquery_ref}) do + [{_, polarity}] -> + polarity + + [] -> + raise ArgumentError, + "missing polarity for shape #{inspect(shape_handle)} and ref #{inspect(subquery_ref)}" + end + end +end diff --git a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex index df8dc6179c..1238de8516 100644 --- a/packages/sync-service/lib/electric/shapes/filter/where_condition.ex +++ b/packages/sync-service/lib/electric/shapes/filter/where_condition.ex @@ -7,7 +7,7 @@ defmodule Electric.Shapes.Filter.WhereCondition do being on the same branch. Each WhereCondition is identified by a unique reference and stores: - - `index_keys`: MapSet of {field, operation} tuples for indexed conditions + - `index_keys`: MapSet of {field_key, operation} tuples for indexed conditions - `other_shapes`: map of shape_id -> where_clause for non-optimized shapes The logic for specific indexes (equality, inclusion) is handled by dedicated modules that also use ETS. @@ -18,6 +18,7 @@ defmodule Electric.Shapes.Filter.WhereCondition do alias Electric.Replication.Eval.Parser.Const alias Electric.Replication.Eval.Parser.Func alias Electric.Replication.Eval.Parser.Ref + alias Electric.Replication.Eval.Parser.RowExpr alias Electric.Shapes.Filter alias Electric.Shapes.Filter.Index alias Electric.Shapes.WhereClause @@ -110,6 +111,17 @@ defmodule Electric.Shapes.Filter.WhereCondition do %{operation: "@>", field: field, type: type, value: [value], and_where: nil} end + defp optimise_where(%Func{name: "sublink_membership_check"} = subquery) do + subquery_optimisation(subquery, :positive) + end + + defp optimise_where(%Func{ + name: "not", + args: [%Func{name: "sublink_membership_check"} = subquery] + }) do + subquery_optimisation(subquery, :negated) + end + # field IN (const1, const2, ...) → reuse = index with multiple values defp optimise_where(%Func{name: "or"} = expr) do case flatten_or_equalities(expr) do @@ -123,10 +135,10 @@ defmodule Electric.Shapes.Filter.WhereCondition do defp optimise_where(%Func{name: "and", args: [arg1, arg2]}) do case {optimise_where(arg1), optimise_where(arg2)} do - {%{operation: op, and_where: nil} = params, _} when op in ["=", "@>", "in"] -> + {%{operation: op, and_where: nil} = params, _} when op in ["=", "@>", "in", "subquery"] -> %{params | and_where: where_expr(arg2)} - {_, %{operation: op, and_where: nil} = params} when op in ["=", "@>", "in"] -> + {_, %{operation: op, and_where: nil} = params} when op in ["=", "@>", "in", "subquery"] -> %{params | and_where: where_expr(arg1)} _ -> @@ -144,6 +156,27 @@ defmodule Electric.Shapes.Filter.WhereCondition do %Expr{eval: eval, used_refs: Parser.find_refs(eval), returns: :bool} end + defp subquery_optimisation( + %Func{name: "sublink_membership_check", args: [testexpr, %Ref{path: subquery_ref}]} = + _subquery, + polarity + ) do + with {:ok, field_key} <- subquery_field_key(testexpr), + {:ok, dep_index} <- dep_index_from_ref(subquery_ref) do + %{ + operation: "subquery", + field: field_key, + testexpr: testexpr, + subquery_ref: subquery_ref, + dep_index: dep_index, + polarity: polarity, + and_where: nil + } + else + _ -> :not_optimised + end + end + # Flatten an OR chain of equalities on the same field into {field, type, [values]} defp flatten_or_equalities(expr) do case collect_or_equalities(expr, []) do @@ -239,10 +272,10 @@ defmodule Electric.Shapes.Filter.WhereCondition do :ok end - def affected_shapes(%Filter{where_cond_table: table} = filter, condition_id, record) do + def affected_shapes(%Filter{} = filter, condition_id, record) do MapSet.union( indexed_shapes_affected(filter, condition_id, record), - other_shapes_affected(filter, table, condition_id, record) + other_shapes_affected(filter, condition_id, record) ) rescue error -> @@ -272,8 +305,11 @@ defmodule Electric.Shapes.Filter.WhereCondition do ) end - defp other_shapes_affected(%Filter{refs_fun: refs_fun} = filter, table, condition_id, record) - when is_function(refs_fun, 1) do + defp other_shapes_affected( + %Filter{subquery_index: index, where_cond_table: table}, + condition_id, + record + ) do [{_, {_index_keys, other_shapes}}] = :ets.lookup(table, condition_id) OpenTelemetry.with_child_span( @@ -281,9 +317,7 @@ defmodule Electric.Shapes.Filter.WhereCondition do [shape_count: map_size(other_shapes)], fn -> for {shape_id, where} <- other_shapes, - shape = Filter.get_shape(filter, shape_id), - not is_nil(shape), - WhereClause.includes_record?(where, record, refs_fun.(shape)), + other_shape_matches?(index, shape_id, where, record), into: MapSet.new() do shape_id end @@ -291,6 +325,38 @@ defmodule Electric.Shapes.Filter.WhereCondition do ) end + defp other_shape_matches?(index, shape_id, where, record) do + case WhereClause.includes_record_result( + where, + record, + WhereClause.subquery_member_from_index(index, shape_id) + ) do + {:ok, included?} -> included? + :error -> true + end + end + + defp subquery_field_key(%Ref{path: [field]}), do: {:ok, field} + + defp subquery_field_key(%RowExpr{elements: elements}) do + if Enum.all?(elements, &match?(%Ref{path: [_]}, &1)) do + {:ok, Enum.map(elements, fn %Ref{path: [field]} -> field end)} + else + :error + end + end + + defp subquery_field_key(_), do: :error + + defp dep_index_from_ref([_prefix, dep_index]) when is_binary(dep_index) do + case Integer.parse(dep_index) do + {idx, ""} -> {:ok, idx} + _ -> :error + end + end + + defp dep_index_from_ref(_), do: :error + def all_shape_ids(%Filter{where_cond_table: table} = filter, condition_id) do case :ets.lookup(table, condition_id) do [] -> diff --git a/packages/sync-service/lib/electric/shapes/querying.ex b/packages/sync-service/lib/electric/shapes/querying.ex index 32fda4a243..460c1cae26 100644 --- a/packages/sync-service/lib/electric/shapes/querying.ex +++ b/packages/sync-service/lib/electric/shapes/querying.ex @@ -1,21 +1,32 @@ defmodule Electric.Shapes.Querying do + alias Electric.Replication.Eval.Parser.Func + alias Electric.Replication.Eval.SqlGenerator alias Electric.ShapeCache.LogChunker - alias Electric.Utils + alias Electric.Shapes.DnfPlan alias Electric.Shapes.Shape - alias Electric.Shapes.Shape.SubqueryMoves + alias Electric.Shapes.SubqueryTags alias Electric.Telemetry.OpenTelemetry + alias Electric.Utils - @value_prefix SubqueryMoves.value_prefix() - @null_sentinel SubqueryMoves.null_sentinel() + @value_prefix SubqueryTags.value_prefix() + @null_sentinel SubqueryTags.null_sentinel() - def query_move_in(conn, stack_id, shape_handle, shape, {where, params}) do + def query_move_in(conn, stack_id, shape_handle, shape, {where, params}, opts \\ []) do table = Utils.relation_to_sql(shape.root_table) - {json_like_select, _} = - json_like_select(shape, %{"is_move_in" => true}, stack_id, shape_handle) + metadata = + metadata_sql( + shape, + stack_id, + shape_handle, + opts |> Keyword.put(:start_param_idx, length(params) + 1) + ) + + {json_like_select, metadata_params} = + json_like_select(shape, %{"is_move_in" => true}, stack_id, shape_handle, metadata) key_select = key_select(shape) - tag_select = make_tags(shape, stack_id, shape_handle) |> Enum.join(", ") + tag_select = Enum.join(metadata.tags_sqls, ", ") query = Postgrex.prepare!( @@ -24,7 +35,7 @@ defmodule Electric.Shapes.Querying do ~s|SELECT #{key_select}, ARRAY[#{tag_select}]::text[], #{json_like_select} FROM #{table} WHERE #{where}| ) - Postgrex.stream(conn, query, params) + Postgrex.stream(conn, query, params ++ metadata_params) |> Stream.flat_map(& &1.rows) end @@ -51,7 +62,10 @@ defmodule Electric.Shapes.Querying do limit = if limit = subset.limit, do: " LIMIT #{limit}", else: "" offset = if offset = subset.offset, do: " OFFSET #{offset}", else: "" - {json_like_select, params} = json_like_select(shape, headers, stack_id, shape_handle) + metadata = metadata_sql(shape, stack_id, shape_handle) + + {json_like_select, params} = + json_like_select(shape, headers, stack_id, shape_handle, metadata) query = Postgrex.prepare!( @@ -120,7 +134,8 @@ defmodule Electric.Shapes.Querying do where = if not is_nil(shape.where), do: " WHERE " <> shape.where.query, else: "" - {json_like_select, params} = json_like_select(shape, [], stack_id, shape_handle) + metadata = metadata_sql(shape, stack_id, shape_handle) + {json_like_select, params} = json_like_select(shape, [], stack_id, shape_handle, metadata) query = Postgrex.prepare!(conn, table, ~s|SELECT #{json_like_select} FROM #{table} #{where}|) @@ -177,13 +192,13 @@ defmodule Electric.Shapes.Querying do selected_columns: columns } = shape, additional_headers, - stack_id, - shape_handle + _stack_id, + _shape_handle, + metadata ) do - tags = make_tags(shape, stack_id, shape_handle) key_part = build_key_part(shape) value_part = build_value_part(columns) - headers_part = build_headers_part(root_table, additional_headers, tags) + headers_part = build_headers_part(root_table, additional_headers, metadata) # We're building a JSON string that looks like this: # @@ -200,13 +215,13 @@ defmodule Electric.Shapes.Querying do query = ~s['{' || #{key_part} || ',' || #{value_part} || ',' || #{headers_part} || '}'] - {query, []} + {query, metadata.params} end - defp build_headers_part(rel, headers, tags) when is_list(headers), - do: build_headers_part(rel, Map.new(headers), tags) + defp build_headers_part(rel, headers, metadata) when is_list(headers), + do: build_headers_part(rel, Map.new(headers), metadata) - defp build_headers_part({relation, table}, additional_headers, tags) do + defp build_headers_part({relation, table}, additional_headers, metadata) do headers = %{operation: "insert", relation: [relation, table]} headers = @@ -216,11 +231,11 @@ defmodule Electric.Shapes.Querying do |> Utils.escape_quotes(?') headers = - if tags != [] do + if metadata.tags_sqls != [] do "{" <> json = headers - tags = Enum.join(tags, ~s[ || '","' || ]) - ~s/{"tags":["' || #{tags} || '"],/ <> json + ~s/{"active_conditions":#{active_conditions_json_expr(metadata)},"tags":#{tags_json_expr(metadata.tags_sqls)},/ <> + json else headers end @@ -286,8 +301,298 @@ defmodule Electric.Shapes.Querying do defp pg_escape_string_for_json(str), do: ~s[to_json(#{str})::text] defp pg_coalesce_json_string(str), do: ~s[coalesce(#{str} , 'null')] + defp metadata_sql(shape, stack_id, shape_handle, opts \\ []) do + case dnf_plan_for_metadata(shape, opts) do + %DnfPlan{} = plan -> + tags_sqls = tags_sql(plan, stack_id, shape_handle) + + {active_conditions_sqls, params} = + case Keyword.get(opts, :views) do + nil -> + {active_conditions_sql(plan), []} + + views -> + {sqls, params, _next_idx} = + active_conditions_sql_for_views( + plan, + views, + shape.where.used_refs, + Keyword.get(opts, :start_param_idx, 1) + ) + + {sqls, params} + end + + %{tags_sqls: tags_sqls, active_conditions_sqls: active_conditions_sqls, params: params} + + nil -> + %{ + tags_sqls: make_tags(shape, stack_id, shape_handle), + active_conditions_sqls: nil, + params: [] + } + end + end + + defp dnf_plan_for_metadata(shape, opts) do + case Keyword.get(opts, :dnf_plan) do + %DnfPlan{} = plan -> + plan + + nil -> + if shape.shape_dependencies == [] do + nil + else + {:ok, %DnfPlan{} = plan} = DnfPlan.compile(shape) + plan + end + end + end + + def move_in_where_clause(plan, dep_index, views_before_move, views_after_move, used_refs) do + impacted = Map.get(plan.dependency_disjuncts, dep_index, []) + all_idxs = Enum.to_list(0..(length(plan.disjuncts) - 1)) + unaffected = all_idxs -- impacted + + {candidate_sql, candidate_params, next_param} = + build_disjuncts_sql( + plan, + impacted, + views_after_move, + used_refs, + 1 + ) + + {impacted_before_sql, impacted_before_params, next_param} = + build_disjuncts_sql( + plan, + impacted, + views_before_move, + used_refs, + next_param + ) + + {unaffected_sql, unaffected_params, _} = + build_disjuncts_sql( + plan, + unaffected, + views_before_move, + used_refs, + next_param + ) + + where = + case join_sql(" OR ", [impacted_before_sql, unaffected_sql]) do + nil -> candidate_sql + excl -> "(#{candidate_sql}) AND NOT (#{excl})" + end + + {where, candidate_params ++ impacted_before_params ++ unaffected_params} + end + + def active_conditions_sql(plan) do + Enum.map(0..(plan.position_count - 1), fn pos -> + info = plan.positions[pos] + base_sql = info.sql + + if info.negated do + "(NOT COALESCE((#{base_sql})::boolean, false))::boolean" + else + "COALESCE((#{base_sql})::boolean, false)" + end + end) + end + + def active_conditions_sql_for_views(plan, views, used_refs, start_param_idx \\ 1) do + {sqls, params, next_param_idx} = + Enum.reduce(0..(plan.position_count - 1), {[], [], start_param_idx}, fn pos, + {sqls, params, + param_idx} -> + info = Map.fetch!(plan.positions, pos) + + {base_sql, sql_params, next_param_idx} = + position_to_sql(info, views, used_refs, param_idx) + + sql = + if info.negated do + "(NOT COALESCE((#{base_sql})::boolean, false))::boolean" + else + "COALESCE((#{base_sql})::boolean, false)" + end + + {[sql | sqls], params ++ sql_params, next_param_idx} + end) + + {Enum.reverse(sqls), params, next_param_idx} + end + + def tags_sql(plan, stack_id, shape_handle) do + Enum.map(plan.disjuncts, fn conj -> + positions_in_disjunct = MapSet.new(conj, &elem(&1, 0)) + + slot_sqls = + Enum.map(0..(plan.position_count - 1), fn pos -> + if MapSet.member?(positions_in_disjunct, pos) do + tag_slot_sql(plan.positions[pos], stack_id, shape_handle) + else + "''" + end + end) + + Enum.join(slot_sqls, " || '/' || ") + end) + end + + defp build_disjuncts_sql( + _plan, + [], + _views, + _used_refs, + pidx + ) do + {nil, [], pidx} + end + + defp build_disjuncts_sql( + plan, + disjunct_idxs, + views, + used_refs, + pidx + ) do + {sqls, params, next_pidx} = + Enum.reduce(disjunct_idxs, {[], [], pidx}, fn didx, {sqls, params, pi} -> + conj = Enum.at(plan.disjuncts, didx) + + {conj_sql, conj_params, next_pi} = + build_conjunction_sql(plan, conj, views, used_refs, pi) + + {[conj_sql | sqls], params ++ conj_params, next_pi} + end) + + sql = join_sql(" OR ", Enum.reverse(sqls)) + + {sql, params, next_pidx} + end + + defp build_conjunction_sql(plan, conj, views, used_refs, pidx) do + {parts, params, next_pi} = + Enum.reduce(conj, {[], [], pidx}, fn {pos, polarity}, {parts, params, pi} -> + info = plan.positions[pos] + + {sql, ps, next_pi} = position_to_sql(info, views, used_refs, pi) + + sql = if polarity == :negated, do: "NOT (#{sql})", else: sql + + {[sql | parts], params ++ ps, next_pi} + end) + + sql = parts |> Enum.reverse() |> Enum.join(" AND ") + {sql, params, next_pi} + end + + defp position_to_sql(%{is_subquery: false} = info, _, _, pidx) do + {info.sql, [], pidx} + end + + defp position_to_sql( + %{is_subquery: true} = info, + views, + used_refs, + pidx + ) do + lhs_sql = lhs_sql_from_ast(info.ast) + ref_type = Map.get(used_refs, info.subquery_ref) + values = Map.get(views, info.subquery_ref, MapSet.new()) |> MapSet.to_list() + + case ref_type do + {:array, {:row, col_types}} -> + casts = Enum.map(col_types, &Electric.Replication.Eval.type_to_pg_cast/1) + + params = + case values do + [] -> + Enum.map(casts, fn _ -> [] end) + + _ -> + values + |> Utils.unzip_any() + |> Tuple.to_list() + |> Enum.zip(col_types) + |> Enum.map(fn {col_vals, col_type} -> + Enum.map(col_vals, &value_to_postgrex(&1, col_type)) + end) + end + + sql = + casts + |> Enum.with_index(pidx) + |> Enum.map_join(", ", fn {col, index} -> "$#{index}::#{col}[]" end) + |> then(&"#{lhs_sql} IN (SELECT * FROM unnest(#{&1}))") + + {sql, params, pidx + length(casts)} + + {:array, element_type} -> + type_cast = Electric.Replication.Eval.type_to_pg_cast(element_type) + sql = "#{lhs_sql} = ANY ($#{pidx}::#{type_cast}[])" + {sql, [Enum.map(values, &value_to_postgrex(&1, element_type))], pidx + 1} + end + end + + defp value_to_postgrex(value, type) do + Electric.Replication.Eval.value_to_postgrex(value, type) + end + + defp lhs_sql_from_ast(%Func{name: "sublink_membership_check", args: [testexpr, _]}) do + SqlGenerator.to_sql(testexpr) + end + + defp tag_slot_sql(%{is_subquery: true, tag_columns: [col]}, stack_id, shape_handle) do + col_sql = pg_cast_column_to_text(col) + namespaced = pg_namespace_value_sql(col_sql) + ~s[md5('#{stack_id}#{shape_handle}' || #{namespaced})] + end + + defp tag_slot_sql( + %{is_subquery: true, tag_columns: {:hash_together, cols}}, + stack_id, + shape_handle + ) do + column_parts = + Enum.map(cols, fn col_name -> + col = pg_cast_column_to_text(col_name) + ~s['#{col_name}:' || #{pg_namespace_value_sql(col)}] + end) + + ~s[md5('#{stack_id}#{shape_handle}' || #{Enum.join(column_parts, " || ")})] + end + + defp tag_slot_sql(%{is_subquery: false}, _stack_id, _shape_handle) do + "'1'" + end + + defp join_sql(separator, sqls) do + case Enum.reject(sqls, &is_nil/1) do + [] -> nil + [single] -> single + multiple -> Enum.map_join(multiple, separator, &"(#{&1})") + end + end + + defp active_conditions_json_expr(%{active_conditions_sqls: nil, tags_sqls: tags_sqls}) do + List.duplicate(true, length(tags_sqls)) |> Jason.encode!() + end + + defp active_conditions_json_expr(%{active_conditions_sqls: sqls}) do + "' || to_json(ARRAY[" <> Enum.join(sqls, ", ") <> "]::boolean[])::text || '" + end + + defp tags_json_expr(tags_sqls) do + "' || to_json(ARRAY[" <> Enum.join(tags_sqls, ", ") <> "]::text[])::text || '" + end + # Generates SQL to namespace a value for tag hashing. - # This MUST produce identical output to SubqueryMoves.namespace_value/1 for + # This MUST produce identical output to SubqueryTags.namespace_value/1 for # the same input values, or Elixir-side and SQL-side tag computation will diverge. defp pg_namespace_value_sql(col_sql) do ~s[CASE WHEN #{col_sql} IS NULL THEN '#{@null_sentinel}' ELSE '#{@value_prefix}' || #{col_sql} END] diff --git a/packages/sync-service/lib/electric/shapes/shape.ex b/packages/sync-service/lib/electric/shapes/shape.ex index 5bb1aad06a..f146a9b038 100644 --- a/packages/sync-service/lib/electric/shapes/shape.ex +++ b/packages/sync-service/lib/electric/shapes/shape.ex @@ -2,11 +2,14 @@ defmodule Electric.Shapes.Shape do @moduledoc """ Struct describing the requested shape """ - alias Electric.Shapes.Shape.SubqueryMoves alias Electric.Replication.Eval.Expr + alias Electric.Replication.Eval.Runner alias Electric.Postgres.Inspector alias Electric.Replication.Eval.Parser + alias Electric.Replication.Eval.Walker alias Electric.Replication.Changes + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.SubqueryTags alias Electric.Shapes.WhereClause alias Electric.Utils alias Electric.Shapes.Shape.Validators @@ -265,7 +268,7 @@ defmodule Electric.Shapes.Shape do end defp fill_tag_structure(shape) do - {tag_structure, comparison_expressions} = SubqueryMoves.move_in_tag_structure(shape) + {tag_structure, comparison_expressions} = SubqueryTags.move_in_tag_structure(shape) %{ shape @@ -280,16 +283,20 @@ defmodule Electric.Shapes.Shape do with {:ok, where} <- Parser.parse_query(where), {:ok, subqueries} <- Parser.extract_subqueries(where), :ok <- check_feature_flag(subqueries, opts), - {:ok, shape_dependencies} <- build_shape_dependencies(subqueries, opts), + {:ok, shape_dependencies, sublink_dependency_indexes} <- + build_shape_dependencies(subqueries, opts), {:ok, dependency_refs} <- build_dependency_refs(shape_dependencies, inspector), - all_refs = Map.merge(refs, dependency_refs), + all_refs = + Map.merge(refs, expand_dependency_refs(dependency_refs, sublink_dependency_indexes)), :ok <- Validators.validate_parameters(opts[:params]), {:ok, where} <- Parser.validate_where_ast(where, params: opts[:params], refs: all_refs, - sublink_queries: extract_sublink_queries(shape_dependencies) + sublink_queries: + expand_sublink_queries(shape_dependencies, sublink_dependency_indexes) ), + {:ok, where} <- canonicalize_where_sublink_refs(where, sublink_dependency_indexes), {:ok, where} <- Validators.validate_where_return_type(where) do {:ok, where, shape_dependencies} else @@ -317,13 +324,42 @@ defmodule Electric.Shapes.Shape do defp build_shape_dependencies(subqueries, opts) do shared_opts = Map.drop(opts, [:where, :columns, :relation]) - Utils.map_while_ok(subqueries, fn subquery -> - shared_opts - |> Map.put(:select, subquery) - |> Map.put(:autofill_pk_select?, true) - |> Map.put(:log_mode, :full) - |> new() + subqueries + |> Enum.with_index() + |> Utils.reduce_while_ok({[], %{}, %{}}, fn {subquery, occurrence_idx}, + {shape_dependencies, dependency_index_by_shape, + occurrence_to_dependency} -> + with {:ok, shape_dependency} <- + shared_opts + |> Map.put(:select, subquery) + |> Map.put(:autofill_pk_select?, true) + |> Map.put(:log_mode, :full) + |> new() do + comparable_shape = comparable(shape_dependency) + + case dependency_index_by_shape do + %{^comparable_shape => dependency_idx} -> + {:ok, + {shape_dependencies, dependency_index_by_shape, + Map.put(occurrence_to_dependency, occurrence_idx, dependency_idx)}} + + %{} -> + dependency_idx = length(shape_dependencies) + + {:ok, + {shape_dependencies ++ [shape_dependency], + Map.put(dependency_index_by_shape, comparable_shape, dependency_idx), + Map.put(occurrence_to_dependency, occurrence_idx, dependency_idx)}} + end + end end) + |> case do + {:ok, {shape_dependencies, _dependency_index_by_shape, occurrence_to_dependency}} -> + {:ok, shape_dependencies, occurrence_to_dependency} + + {:error, reason} -> + {:error, reason} + end end defp build_dependency_refs(shape_dependencies, inspector) do @@ -351,6 +387,46 @@ defmodule Electric.Shapes.Shape do end) end + defp expand_dependency_refs(dependency_refs, sublink_dependency_indexes) do + Map.new(sublink_dependency_indexes, fn {occurrence_idx, dependency_idx} -> + {["$sublink", "#{occurrence_idx}"], + Map.fetch!(dependency_refs, ["$sublink", "#{dependency_idx}"])} + end) + end + + defp expand_sublink_queries(shape_dependencies, sublink_dependency_indexes) do + canonical_queries = extract_sublink_queries(shape_dependencies) + + Map.new(sublink_dependency_indexes, fn {occurrence_idx, dependency_idx} -> + {occurrence_idx, Map.fetch!(canonical_queries, dependency_idx)} + end) + end + + defp canonicalize_where_sublink_refs(%Expr{} = where, sublink_dependency_indexes) do + with {:ok, eval} <- + Walker.fold( + where.eval, + fn + %Parser.Ref{path: ["$sublink", idx]} = ref, _children, occurrence_to_dependency -> + dependency_idx = + occurrence_to_dependency + |> Map.fetch!(String.to_integer(idx)) + |> Integer.to_string() + + {:ok, %{ref | path: ["$sublink", dependency_idx]}} + + node, children, _occurrence_to_dependency when map_size(children) == 0 -> + {:ok, node} + + node, children, _occurrence_to_dependency -> + {:ok, Map.merge(node, children)} + end, + sublink_dependency_indexes + ) do + {:ok, %{where | eval: eval, used_refs: Parser.find_refs(eval)}} + end + end + defp extract_sublink_queries(shapes) do Enum.with_index(shapes, fn %__MODULE__{} = shape, i -> columns = Enum.map_join(shape.explicitly_selected_columns, ", ", &Utils.quote_name/1) @@ -545,17 +621,20 @@ defmodule Electric.Shapes.Shape do Updates, on the other hand, may be converted to an "new record" or a "deleted record" if the previous/new version of the updated row isn't in the shape. """ - def convert_change(shape, change, opts \\ []) + def convert_change(shape, change, opts \\ []) do + opts = Map.new(opts) + do_convert_change(shape, change, opts) + end - def convert_change(%__MODULE__{root_table: table}, %{relation: relation}, _) - when table != relation, - do: [] + defp do_convert_change(%__MODULE__{root_table: table}, %{relation: relation}, _) + when table != relation, + do: [] - def convert_change( - %__MODULE__{where: nil, flags: %{selects_all_columns: true}} = shape, - change, - opts - ) do + defp do_convert_change( + %__MODULE__{where: nil, flags: %{selects_all_columns: true}} = shape, + change, + opts + ) do # If the change actually doesn't change any columns, we can skip it - this is possible on Postgres but we don't care for those. if is_struct(change, Changes.UpdatedRecord) and MapSet.size(change.changed_columns) == 0 do [] @@ -564,64 +643,131 @@ defmodule Electric.Shapes.Shape do end end - def convert_change(%__MODULE__{}, %Changes.TruncatedRelation{} = change, _), do: [change] + defp do_convert_change(%__MODULE__{}, %Changes.TruncatedRelation{} = change, _), + do: [change] - def convert_change( - %__MODULE__{where: where, selected_columns: selected_columns} = shape, - change, - opts - ) - when is_struct(change, Changes.NewRecord) - when is_struct(change, Changes.DeletedRecord) do - record = if is_struct(change, Changes.NewRecord), do: change.record, else: change.old_record + defp do_convert_change( + %__MODULE__{selected_columns: selected_columns} = shape, + %Changes.NewRecord{record: record} = change, + opts + ) do + {_old_refs, new_refs} = opts[:extra_refs] || {%{}, %{}} - # This is a pre-image and post-image of the value sets for subqueries. - # In case of a new record, we use the post-image, because we'll need to see the record, - # but in case of a deleted record, we use the pre-image, because we've never seen an insert - extra_refs = opts[:extra_refs] || {%{}, %{}} + case project_row_metadata(shape, record, new_refs, opts) do + {:ok, true, metadata} -> + [change |> put_row_metadata(metadata) |> filter_change_columns(selected_columns)] - used_extra_refs = - if is_struct(change, Changes.NewRecord), do: elem(extra_refs, 1), else: elem(extra_refs, 0) + {:ok, false, _metadata} -> + [] + end + end - if WhereClause.includes_record?(where, record, used_extra_refs) do - change - |> fill_move_tags(shape, opts[:stack_id], opts[:shape_handle]) - |> filter_change_columns(selected_columns) - |> List.wrap() - else - [] + defp do_convert_change( + %__MODULE__{selected_columns: selected_columns} = shape, + %Changes.DeletedRecord{old_record: record} = change, + opts + ) do + {old_refs, _new_refs} = opts[:extra_refs] || {%{}, %{}} + + case project_row_metadata(shape, record, old_refs, opts) do + {:ok, true, metadata} -> + [change |> put_row_metadata(metadata) |> filter_change_columns(selected_columns)] + + {:ok, false, _metadata} -> + [] end end - def convert_change( - %__MODULE__{where: where, selected_columns: selected_columns} = shape, - %Changes.UpdatedRecord{old_record: old_record, record: record} = change, - opts - ) do - {extra_refs_old, extra_refs_new} = opts[:extra_refs] || {%{}, %{}} - old_record_in_shape = WhereClause.includes_record?(where, old_record, extra_refs_old) - new_record_in_shape = WhereClause.includes_record?(where, record, extra_refs_new) + defp do_convert_change( + %__MODULE__{selected_columns: selected_columns} = shape, + %Changes.UpdatedRecord{old_record: old_record, record: record} = change, + opts + ) do + {old_refs, new_refs} = opts[:extra_refs] || {%{}, %{}} + + {:ok, old_included?, old_metadata} = project_row_metadata(shape, old_record, old_refs, opts) + {:ok, new_included?, new_metadata} = project_row_metadata(shape, record, new_refs, opts) converted_changes = - case {old_record_in_shape, new_record_in_shape} do - {true, true} -> [change] - {true, false} -> [Changes.convert_update(change, to: :deleted_record)] - {false, true} -> [Changes.convert_update(change, to: :new_record)] - {false, false} -> [] + case {old_included?, new_included?} do + {true, true} -> + [ + put_updated_metadata(change, new_metadata, + removed_move_tags: old_metadata.move_tags -- new_metadata.move_tags + ) + ] + + {true, false} -> + [ + Changes.convert_update(change, to: :deleted_record) + |> put_row_metadata(old_metadata) + ] + + {false, true} -> + [ + Changes.convert_update(change, to: :new_record) + |> put_row_metadata(new_metadata) + ] + + {false, false} -> + [] end converted_changes - |> Enum.map(&fill_move_tags(&1, shape, opts[:stack_id], opts[:shape_handle])) |> Enum.map(&filter_change_columns(&1, selected_columns)) |> Enum.filter(&should_keep_change?/1) end + defp project_row_metadata( + %__MODULE__{where: where}, + record, + refs, + %{dnf_plan: %DnfPlan{} = dnf_plan, stack_id: stack_id, shape_handle: shape_handle} + ) do + case get_row_metadata(dnf_plan, record, refs, where, stack_id, shape_handle) do + {:ok, included?, move_tags, active_conditions} -> + {:ok, included?, %{move_tags: move_tags, active_conditions: active_conditions}} + end + end + + defp project_row_metadata( + %__MODULE__{where: where, tag_structure: tag_structure}, + record, + refs, + opts + ) do + {:ok, + WhereClause.includes_record?(where, record, WhereClause.subquery_member_from_refs(refs)), + %{ + move_tags: + make_tags_from_pattern(tag_structure, record, opts[:stack_id], opts[:shape_handle]), + active_conditions: make_active_conditions(tag_structure) + }} + end + defp filter_change_columns(change, nil), do: change defp filter_change_columns(change, selected_columns) do Changes.filter_columns(change, selected_columns) end + defp put_row_metadata(change, %{move_tags: move_tags, active_conditions: active_conditions}) do + %{change | move_tags: move_tags, active_conditions: active_conditions} + end + + defp put_updated_metadata( + change, + %{move_tags: move_tags, active_conditions: active_conditions}, + opts + ) do + %{ + change + | move_tags: move_tags, + removed_move_tags: Keyword.get(opts, :removed_move_tags, []), + active_conditions: active_conditions + } + end + def fill_move_tags(change, %__MODULE__{tag_structure: []}, _, _), do: change def fill_move_tags(%Changes.NewRecord{move_tags: [_ | _]} = change, _, _, _), do: change @@ -637,7 +783,8 @@ defmodule Electric.Shapes.Shape do shape_handle ) do move_tags = make_tags_from_pattern(tag_structure, record, stack_id, shape_handle) - %{change | move_tags: move_tags} + active_conditions = make_active_conditions(tag_structure) + %{change | move_tags: move_tags, active_conditions: active_conditions} end def fill_move_tags( @@ -652,7 +799,14 @@ defmodule Electric.Shapes.Shape do make_tags_from_pattern(tag_structure, old_record, stack_id, shape_handle) -- move_tags - %{change | move_tags: move_tags, removed_move_tags: old_move_tags} + active_conditions = make_active_conditions(tag_structure) + + %{ + change + | move_tags: move_tags, + removed_move_tags: old_move_tags, + active_conditions: active_conditions + } end def fill_move_tags( @@ -663,22 +817,101 @@ defmodule Electric.Shapes.Shape do stack_id, shape_handle ) do - %{change | move_tags: make_tags_from_pattern(tag_structure, record, stack_id, shape_handle)} + active_conditions = make_active_conditions(tag_structure) + + %{ + change + | move_tags: make_tags_from_pattern(tag_structure, record, stack_id, shape_handle), + active_conditions: active_conditions + } end + def get_row_metadata(dnf_plan, record, views, where_expr, stack_id, shape_handle) do + with {:ok, ref_values} <- Runner.record_to_ref_values(where_expr.used_refs, record) do + refs = Map.merge(ref_values, views) + active_conditions = compute_active_conditions(dnf_plan, refs) + tags = compute_tags(dnf_plan, record, stack_id, shape_handle) + included? = compute_inclusion(dnf_plan, active_conditions) + {:ok, included?, tags, active_conditions} + end + end + + defp compute_active_conditions(dnf_plan, refs) do + Enum.map(0..(dnf_plan.position_count - 1), fn pos -> + info = dnf_plan.positions[pos] + pos_expr = Expr.wrap_parser_part(info.ast) + + base_result = + case Runner.execute(pos_expr, refs) do + {:ok, value} when value not in [nil, false] -> true + _ -> false + end + + if info.negated, do: not base_result, else: base_result + end) + end + + defp compute_tags(dnf_plan, record, stack_id, shape_handle) do + Enum.map(dnf_plan.disjuncts, fn conj -> + positions_in_disjunct = MapSet.new(conj, &elem(&1, 0)) + + Enum.map(0..(dnf_plan.position_count - 1), fn pos -> + if MapSet.member?(positions_in_disjunct, pos) do + compute_tag_slot(dnf_plan.positions[pos], record, stack_id, shape_handle) + else + "" + end + end) + |> Enum.join("/") + end) + end + + defp compute_tag_slot(%{is_subquery: true, tag_columns: [col]}, record, stack_id, shape_handle) do + SubqueryTags.make_value_hash(stack_id, shape_handle, Map.get(record, col)) + end + + defp compute_tag_slot( + %{is_subquery: true, tag_columns: {:hash_together, cols}}, + record, + stack_id, + shape_handle + ) do + parts = + Enum.map(cols, fn col -> + col <> ":" <> SubqueryTags.namespace_value(Map.get(record, col)) + end) + + SubqueryTags.make_value_hash_raw(stack_id, shape_handle, Enum.join(parts)) + end + + defp compute_tag_slot(%{is_subquery: false}, _record, _stack_id, _shape_handle) do + "1" + end + + defp compute_inclusion(dnf_plan, active_conditions) do + Enum.any?(dnf_plan.disjuncts, fn conj -> + Enum.all?(conj, fn {pos, _polarity} -> + Enum.at(active_conditions, pos) + end) + end) + end + + defp make_active_conditions([]), do: [] + defp make_active_conditions(tag_structure), do: List.duplicate(true, length(tag_structure)) + defp make_tags_from_pattern(patterns, record, stack_id, shape_handle) do Enum.map(patterns, fn pattern -> Enum.map(pattern, fn column_name when is_binary(column_name) -> - SubqueryMoves.make_value_hash(stack_id, shape_handle, Map.get(record, column_name)) + SubqueryTags.make_value_hash(stack_id, shape_handle, Map.get(record, column_name)) {:hash_together, columns} -> column_parts = Enum.map(columns, fn col -> - col <> ":" <> SubqueryMoves.namespace_value(Map.get(record, col)) + col <> ":" <> SubqueryTags.namespace_value(Map.get(record, col)) end) - SubqueryMoves.make_value_hash_raw(stack_id, shape_handle, Enum.join(column_parts)) + SubqueryTags.make_value_hash_raw(stack_id, shape_handle, Enum.join(column_parts)) end) |> Enum.join("/") end) @@ -687,6 +920,13 @@ defmodule Electric.Shapes.Shape do defp should_keep_change?(%Changes.UpdatedRecord{removed_move_tags: removed_move_tags}) when removed_move_tags != [], do: true + defp should_keep_change?(%Changes.UpdatedRecord{ + old_record: record, + record: record, + active_conditions: [_ | _] + }), + do: true + defp should_keep_change?(%Changes.UpdatedRecord{old_record: record, record: record}), do: false diff --git a/packages/sync-service/lib/electric/shapes/shape/subquery_moves.ex b/packages/sync-service/lib/electric/shapes/shape/subquery_moves.ex deleted file mode 100644 index cba66aecb2..0000000000 --- a/packages/sync-service/lib/electric/shapes/shape/subquery_moves.ex +++ /dev/null @@ -1,208 +0,0 @@ -defmodule Electric.Shapes.Shape.SubqueryMoves do - @moduledoc false - alias Electric.Replication.Eval - alias Electric.Replication.Eval.Walker - alias Electric.Shapes.Shape - - @value_prefix "v:" - @null_sentinel "NULL" - - def value_prefix, do: @value_prefix - def null_sentinel, do: @null_sentinel - - @doc """ - Given a shape with a where clause that contains a subquery, make a query that can use a - list of value in place of the subquery. - - When we're querying for new data, we're only querying for a subset of entire query. - To make that, we need to replace the subquery with a list of values. - - For example, if the shape has a where clause like this: - - ~S|WHERE parent_id IN (SELECT id FROM parent WHERE value = '1')| - - And we're querying for new data with a list of values like this: - - ["1", "2", "3"] - - Then the query will be transformed to: - - ~S|WHERE parent_id = ANY ($1::text[]::int8[])| - - And the parameters will be: - - [["1", "2", "3"]] - """ - def move_in_where_clause( - %Shape{ - where: %{query: query, used_refs: used_refs}, - shape_dependencies: shape_dependencies, - shape_dependencies_handles: shape_dependencies_handles - }, - shape_handle, - move_ins - ) do - index = Enum.find_index(shape_dependencies_handles, &(&1 == shape_handle)) - target_section = Enum.at(shape_dependencies, index) |> rebuild_subquery_section() - - case used_refs[["$sublink", "#{index}"]] do - {:array, {:row, cols}} -> - unnest_sections = - cols - |> Enum.map(&Electric.Replication.Eval.type_to_pg_cast/1) - |> Enum.with_index(fn col, index -> "$#{index + 1}::text[]::#{col}[]" end) - |> Enum.join(", ") - - {String.replace(query, target_section, "IN (SELECT * FROM unnest(#{unnest_sections}))"), - Electric.Utils.unzip_any(move_ins) |> Tuple.to_list()} - - col -> - type = Electric.Replication.Eval.type_to_pg_cast(col) - {String.replace(query, target_section, "= ANY ($1::text[]::#{type})"), [move_ins]} - end - end - - defp rebuild_subquery_section(shape) do - base = - ~s|IN (SELECT #{Enum.join(shape.explicitly_selected_columns, ", ")} FROM #{Electric.Utils.relation_to_sql(shape.root_table)}| - - where = if shape.where, do: " WHERE #{shape.where.query}", else: "" - base <> where <> ")" - end - - @doc """ - Generate a tag-removal control message for a shape. - - Patterns are a list of lists, where each inner list represents a pattern (and is functionally a tuple, but - JSON can't directly represent tuples). This pattern is filled with actual values that have been removed. - """ - @spec make_move_out_control_message(Shape.t(), String.t(), String.t(), [ - {dep_handle :: String.t(), gone_values :: String.t()}, - ... - ]) :: map() - # Stub guard to allow only one dependency for now. - def make_move_out_control_message(shape, stack_id, shape_handle, [_] = move_outs) do - %{ - headers: %{ - event: "move-out", - patterns: - Enum.flat_map(move_outs, &make_move_out_pattern(shape, stack_id, shape_handle, &1)) - } - } - end - - # This is a stub implementation valid only for when there is exactly one dependency. - defp make_move_out_pattern( - %{tag_structure: patterns}, - stack_id, - shape_handle, - {_dep_handle, gone_values} - ) do - # TODO: This makes the assumption of only one column per pattern. - Enum.flat_map(patterns, fn [column_or_expr] -> - case column_or_expr do - column_name when is_binary(column_name) -> - Enum.map( - gone_values, - &%{pos: 0, value: make_value_hash(stack_id, shape_handle, elem(&1, 1))} - ) - - {:hash_together, columns} -> - column_parts = - &(Enum.zip_with(&1, columns, fn value, column -> - column <> ":" <> namespace_value(value) - end) - |> Enum.join()) - - Enum.map( - gone_values, - &%{ - pos: 0, - value: - make_value_hash_raw( - stack_id, - shape_handle, - column_parts.(Tuple.to_list(elem(&1, 1))) - ) - } - ) - end - end) - end - - def make_value_hash(stack_id, shape_handle, value) do - make_value_hash_raw(stack_id, shape_handle, namespace_value(value)) - end - - @doc """ - Hash a pre-namespaced value. Use `make_value_hash/3` for single values that need namespacing. - """ - def make_value_hash_raw(stack_id, shape_handle, namespaced_value) do - :crypto.hash(:md5, "#{stack_id}#{shape_handle}#{namespaced_value}") - |> Base.encode16(case: :lower) - end - - @doc """ - Namespace a value for hashing. - - To distinguish NULL from the literal string 'NULL', values are prefixed with - 'v:' and NULL becomes 'NULL' (no prefix). This MUST match the SQL logic in - `Querying.pg_namespace_value_sql/1` - see lib/electric/shapes/querying.ex. - """ - def namespace_value(nil), do: @null_sentinel - def namespace_value(value), do: @value_prefix <> value - - @doc """ - Generate a tag structure for a shape. - - A tag structure is a list of lists, where each inner list represents a tag (and is functionally a tuple, but - JSON can't directly represent tuples). The structure is used to generate actual tags for each row, that act - as a refenence as to why this row is part of the shape. - - Tag structure then is essentially a list of column names in correct positions that will get filled in - with actual values from the row - """ - @spec move_in_tag_structure(Shape.t()) :: - list(list(String.t() | {:hash_together, [String.t(), ...]})) - def move_in_tag_structure(%Shape{} = shape) - when is_nil(shape.where) - when shape.shape_dependencies == [], - do: {[], %{}} - - def move_in_tag_structure(shape) do - # TODO: For multiple subqueries this should be a DNF form - # and this walking overrides the comparison expressions - {:ok, {tag_structure, comparison_expressions}} = - Walker.reduce( - shape.where.eval, - fn - %Eval.Parser.Func{name: "sublink_membership_check", args: [testexpr, sublink_ref]}, - {[current_tag | others], comparison_expressions}, - _ -> - tags = - case testexpr do - %Eval.Parser.Ref{path: [column_name]} -> - [[column_name | current_tag] | others] - - %Eval.Parser.RowExpr{elements: elements} -> - elements = - Enum.map(elements, fn %Eval.Parser.Ref{path: [column_name]} -> - column_name - end) - - [[{:hash_together, elements} | current_tag] | others] - end - - {:ok, {tags, Map.put(comparison_expressions, sublink_ref.path, testexpr)}} - - _, acc, _ -> - {:ok, acc} - end, - {[[]], %{}} - ) - - comparison_expressions - |> Map.new(fn {path, expr} -> {path, Eval.Expr.wrap_parser_part(expr)} end) - |> then(&{tag_structure, &1}) - end -end diff --git a/packages/sync-service/lib/electric/shapes/subquery_tags.ex b/packages/sync-service/lib/electric/shapes/subquery_tags.ex new file mode 100644 index 0000000000..d78254c7c1 --- /dev/null +++ b/packages/sync-service/lib/electric/shapes/subquery_tags.ex @@ -0,0 +1,70 @@ +defmodule Electric.Shapes.SubqueryTags do + @moduledoc false + + alias Electric.Replication.Eval + alias Electric.Replication.Eval.Walker + alias Electric.Shapes.Shape + + @value_prefix "v:" + @null_sentinel "NULL" + + def value_prefix, do: @value_prefix + def null_sentinel, do: @null_sentinel + + @spec move_in_tag_structure(Shape.t()) :: + {list(list(String.t() | {:hash_together, [String.t(), ...]})), map()} + def move_in_tag_structure(%Shape{} = shape) + when is_nil(shape.where) + when shape.shape_dependencies == [], + do: {[], %{}} + + def move_in_tag_structure(shape) do + {:ok, {tag_structure, comparison_expressions}} = + Walker.reduce( + shape.where.eval, + fn + %Eval.Parser.Func{name: "sublink_membership_check", args: [testexpr, sublink_ref]}, + {[current_tag | others], comparison_expressions}, + _ -> + tags = + case testexpr do + %Eval.Parser.Ref{path: [column_name]} -> + [[column_name | current_tag] | others] + + %Eval.Parser.RowExpr{elements: elements} -> + elements = + Enum.map(elements, fn %Eval.Parser.Ref{path: [column_name]} -> + column_name + end) + + [[{:hash_together, elements} | current_tag] | others] + end + + {:ok, {tags, Map.put(comparison_expressions, sublink_ref.path, testexpr)}} + + _, acc, _ -> + {:ok, acc} + end, + {[[]], %{}} + ) + + comparison_expressions + |> Map.new(fn {path, expr} -> {path, Eval.Expr.wrap_parser_part(expr)} end) + |> then(&{tag_structure, &1}) + end + + @spec namespace_value(nil | binary()) :: binary() + def namespace_value(nil), do: @null_sentinel + def namespace_value(value), do: @value_prefix <> value + + @spec make_value_hash(binary(), binary(), nil | binary()) :: binary() + def make_value_hash(stack_id, shape_handle, value) do + make_value_hash_raw(stack_id, shape_handle, namespace_value(value)) + end + + @spec make_value_hash_raw(binary(), binary(), binary()) :: binary() + def make_value_hash_raw(stack_id, shape_handle, namespaced_value) do + :crypto.hash(:md5, "#{stack_id}#{shape_handle}#{namespaced_value}") + |> Base.encode16(case: :lower) + end +end diff --git a/packages/sync-service/lib/electric/shapes/where_clause.ex b/packages/sync-service/lib/electric/shapes/where_clause.ex index 15b5688ad9..98d7cc8e8c 100644 --- a/packages/sync-service/lib/electric/shapes/where_clause.ex +++ b/packages/sync-service/lib/electric/shapes/where_clause.ex @@ -1,15 +1,60 @@ defmodule Electric.Shapes.WhereClause do + alias PgInterop.Sublink alias Electric.Replication.Eval.Runner + alias Electric.Shapes.Filter.Indexes.SubqueryIndex - def includes_record?(where_clause, record, extra_refs \\ %{}) - def includes_record?(nil = _where_clause, _record, _), do: true + @spec includes_record_result( + Electric.Replication.Eval.Expr.t() | nil, + map(), + ([String.t()], term() -> boolean()) + ) :: {:ok, boolean()} | :error + def includes_record_result(where_clause, record, subquery_member? \\ fn _, _ -> false end) + def includes_record_result(nil = _where_clause, _record, _), do: {:ok, true} - def includes_record?(where_clause, record, extra_refs) do + def includes_record_result(where_clause, record, subquery_member?) + when is_function(subquery_member?, 2) do with {:ok, refs} <- Runner.record_to_ref_values(where_clause.used_refs, record), - {:ok, evaluated} <- Runner.execute(where_clause, Map.merge(refs, extra_refs)) do - if is_nil(evaluated), do: false, else: evaluated + {:ok, evaluated} <- + Runner.execute(where_clause, refs, subquery_member?: subquery_member?) do + {:ok, not is_nil(evaluated) and evaluated != false} else - _ -> false + _ -> :error + end + end + + @spec includes_record?(Electric.Replication.Eval.Expr.t() | nil, map(), ([String.t()], term() -> + boolean())) :: + boolean() + def includes_record?(where_clause, record, subquery_member? \\ fn _, _ -> false end) + def includes_record?(nil = _where_clause, _record, _), do: true + + def includes_record?(where_clause, record, subquery_member?) + when is_function(subquery_member?, 2) do + case includes_record_result(where_clause, record, subquery_member?) do + {:ok, included?} -> included? + :error -> false + end + end + + @spec subquery_member_from_refs(map()) :: ([String.t()], term() -> boolean()) + def subquery_member_from_refs(extra_refs) when is_map(extra_refs) do + fn subquery_ref, typed_value -> + typed_value + |> Sublink.member?(Map.get(extra_refs, subquery_ref, [])) + end + end + + @doc """ + Build a subquery_member? callback that queries the SubqueryIndex. + + Used for filter-side exact verification: checks whether a specific + shape currently contains a typed value for a canonical subquery ref. + """ + @spec subquery_member_from_index(SubqueryIndex.t(), term()) :: + ([String.t()], term() -> boolean()) + def subquery_member_from_index(index, shape_handle) do + fn subquery_ref, typed_value -> + SubqueryIndex.membership_or_fallback?(index, shape_handle, subquery_ref, typed_value) end end end diff --git a/packages/sync-service/test/electric/lsn_tracker_test.exs b/packages/sync-service/test/electric/lsn_tracker_test.exs index 43994ba070..2accecea9d 100644 --- a/packages/sync-service/test/electric/lsn_tracker_test.exs +++ b/packages/sync-service/test/electric/lsn_tracker_test.exs @@ -1,7 +1,9 @@ defmodule Electric.LsnTrackerTest do use ExUnit.Case, async: true - import Support.ComponentSetup, only: [with_registry: 1, with_stack_id_from_test: 1] + import Support.ComponentSetup, + only: [with_lsn_tracker: 1, with_registry: 1, with_stack_id_from_test: 1] + alias Electric.LsnTracker alias Electric.Postgres.Lsn @@ -79,7 +81,7 @@ defmodule Electric.LsnTrackerTest do end describe "broadcast_last_seen_lsn/2" do - setup [:with_registry] + setup [:with_registry, :with_lsn_tracker] test "delivers messages to processes registered for global_lsn_updates", ctx do LsnTracker.subscribe_to_global_lsn_updates(ctx.stack_id) @@ -89,6 +91,14 @@ defmodule Electric.LsnTrackerTest do assert_receive {:global_last_seen_lsn, 42} end + test "replays the most recent broadcast to newly registered processes", ctx do + :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 42) + + assert {:ok, _} = LsnTracker.subscribe_to_global_lsn_updates(ctx.stack_id) + + assert_receive {:global_last_seen_lsn, 42} + end + test "delivers to multiple registered processes", ctx do test_pid = self() LsnTracker.subscribe_to_global_lsn_updates(ctx.stack_id) diff --git a/packages/sync-service/test/electric/plug/router_test.exs b/packages/sync-service/test/electric/plug/router_test.exs index 267a872ae1..e717f0050e 100644 --- a/packages/sync-service/test/electric/plug/router_test.exs +++ b/packages/sync-service/test/electric/plug/router_test.exs @@ -2377,21 +2377,41 @@ defmodule Electric.Plug.RouterTest do end @tag with_sql: [ - "CREATE TABLE parent (id INT PRIMARY KEY, value INT NOT NULL)", - "CREATE TABLE child (id INT PRIMARY KEY, parent_id INT NOT NULL REFERENCES parent(id), value INT NOT NULL)", - "INSERT INTO parent (id, value) VALUES (1, 1), (2, 2)", - "INSERT INTO child (id, parent_id, value) VALUES (1, 1, 10), (2, 2, 20)" + "CREATE TABLE inner_table (id INT PRIMARY KEY, value INT NOT NULL)", + "CREATE TABLE outer_table (id INT PRIMARY KEY, inner_id INT NOT NULL REFERENCES inner_table(id), value INT NOT NULL)" + ] + test "return 400 if same subquery is used with both positive and negative polarity", %{ + opts: opts + } do + assert %{status: 400} = + conn("GET", "/v1/shape", %{ + table: "outer_table", + offset: "-1", + where: + "inner_id IN (SELECT id FROM inner_table) OR NOT inner_id IN (SELECT id FROM inner_table)" + }) + |> Router.call(opts) + end + + @tag with_sql: [ + "CREATE TABLE inner_table (id INT PRIMARY KEY, value INT NOT NULL)", + "CREATE TABLE outer_table (id INT PRIMARY KEY, inner_id INT NOT NULL REFERENCES inner_table(id), value INT NOT NULL)", + "INSERT INTO inner_table (id, value) VALUES (1, 1), (2, 2)", + "INSERT INTO outer_table (id, inner_id, value) VALUES (1, 1, 10), (2, 2, 20)" ] test "a move-out from the inner shape is propagated to the outer shape", %{ opts: opts, db_conn: db_conn } do - req = make_shape_req("child", where: "parent_id in (SELECT id FROM parent WHERE value = 1)") + req = + make_shape_req("outer_table", + where: "inner_id in (SELECT id FROM inner_table WHERE value = 1)" + ) assert {req, 200, [data, snapshot_end]} = shape_req(req, opts) assert %{ - "value" => %{"id" => "1", "parent_id" => "1", "value" => "10"}, + "value" => %{"id" => "1", "inner_id" => "1", "value" => "10"}, "headers" => %{"operation" => "insert", "tags" => [tag]} } = data @@ -2399,7 +2419,7 @@ defmodule Electric.Plug.RouterTest do task = live_shape_req(req, opts) - Postgrex.query!(db_conn, "UPDATE parent SET value = 3 WHERE id = 1", []) + Postgrex.query!(db_conn, "UPDATE inner_table SET value = 3 WHERE id = 1", []) assert {_req, 200, [data, %{"headers" => %{"control" => "up-to-date"}}]} = Task.await(task) @@ -2412,68 +2432,79 @@ defmodule Electric.Plug.RouterTest do end @tag with_sql: [ - "CREATE TABLE parent (id INT PRIMARY KEY, value INT NOT NULL)", - "CREATE TABLE child (id INT PRIMARY KEY, parent_id INT NOT NULL REFERENCES parent(id), value INT NOT NULL)", - "INSERT INTO parent (id, value) VALUES (1, 1), (2, 2)", - "INSERT INTO child (id, parent_id, value) VALUES (1, 1, 10), (2, 2, 20)" + "CREATE TABLE inner_table (id INT PRIMARY KEY, value INT NOT NULL)", + "CREATE TABLE outer_table (id INT PRIMARY KEY, inner_id INT NOT NULL REFERENCES inner_table(id), value INT NOT NULL)", + "INSERT INTO inner_table (id, value) VALUES (1, 1), (2, 2)", + "INSERT INTO outer_table (id, inner_id, value) VALUES (1, 1, 10), (2, 2, 20)" ] test "a move-in from the inner shape causes a query and new entries in the outer shape", %{ opts: opts, db_conn: db_conn, stack_id: stack_id } do - req = make_shape_req("child", where: "parent_id in (SELECT id FROM parent WHERE value = 1)") + req = + make_shape_req("outer_table", + where: "inner_id in (SELECT id FROM inner_table WHERE value = 1)" + ) + assert {req, 200, [data, snapshot_end]} = shape_req(req, opts) tag = :crypto.hash(:md5, stack_id <> req.handle <> "v:1") |> Base.encode16(case: :lower) - assert %{"id" => "1", "parent_id" => "1", "value" => "10"} = data["value"] + assert %{"id" => "1", "inner_id" => "1", "value" => "10"} = data["value"] assert %{"operation" => "insert", "tags" => [^tag]} = data["headers"] assert %{"headers" => %{"control" => "snapshot-end"}} = snapshot_end task = live_shape_req(req, opts) # Move in reflects in the new shape without invalidating it - Postgrex.query!(db_conn, "UPDATE parent SET value = 1 WHERE id = 2", []) + Postgrex.query!(db_conn, "UPDATE inner_table SET value = 1 WHERE id = 2", []) tag2 = :crypto.hash(:md5, stack_id <> req.handle <> "v:2") |> Base.encode16(case: :lower) - assert {_, 200, [data, %{"headers" => %{"control" => "snapshot-end"}}, up_to_date_ctl()]} = + assert {_, 200, + [ + %{"headers" => %{"event" => "move-in"}}, + data, + %{"headers" => %{"control" => "snapshot-end"}}, + up_to_date_ctl() + ]} = Task.await(task) - assert %{"id" => "2", "parent_id" => "2", "value" => "20"} = data["value"] + assert %{"id" => "2", "inner_id" => "2", "value" => "20"} = data["value"] assert %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag2]} = data["headers"] end @tag with_sql: [ - "CREATE TABLE parent (id INT PRIMARY KEY, excluded BOOLEAN NOT NULL DEFAULT FALSE)", - "CREATE TABLE child (id INT PRIMARY KEY, parent_id INT NOT NULL REFERENCES parent(id), value INT NOT NULL)", - "INSERT INTO parent (id, excluded) VALUES (1, false), (2, true)", - "INSERT INTO child (id, parent_id, value) VALUES (1, 1, 10), (2, 2, 20)" + "CREATE TABLE inner_table (id INT PRIMARY KEY, excluded BOOLEAN NOT NULL DEFAULT FALSE)", + "CREATE TABLE outer_table (id INT PRIMARY KEY, inner_id INT NOT NULL REFERENCES inner_table(id), value INT NOT NULL)", + "INSERT INTO inner_table (id, excluded) VALUES (1, false), (2, true)", + "INSERT INTO outer_table (id, inner_id, value) VALUES (1, 1, 10), (2, 2, 20)" ] - test "NOT IN subquery should return 409 on move-in to subquery", %{ + test "NOT IN subquery emits a move-out when a dependency value moves in", %{ opts: opts, - db_conn: db_conn + db_conn: db_conn, + stack_id: stack_id } do - # Child rows where parent_id is NOT IN the set of excluded parents - # Initially: parent 1 is not excluded, so child 1 is in the shape - # parent 2 is excluded, so child 2 is NOT in the shape + # Outer rows where inner_id is NOT IN the set of excluded inner rows. + # Initially: inner row 1 is not excluded, so outer row 1 is in the shape. + # Inner row 2 is excluded, so outer row 2 is not in the shape. req = - make_shape_req("child", - where: "parent_id NOT IN (SELECT id FROM parent WHERE excluded = true)" + make_shape_req("outer_table", + where: "inner_id NOT IN (SELECT id FROM inner_table WHERE excluded = true)" ) assert {req, 200, [data, snapshot_end]} = shape_req(req, opts) - # Only child 1 should be in the shape (parent 1 is not excluded) + # Only outer row 1 should be in the shape. assert %{ - "value" => %{"id" => "1", "parent_id" => "1", "value" => "10"}, + "value" => %{"id" => "1", "inner_id" => "1", "value" => "10"}, "headers" => %{"operation" => "insert"} } = data @@ -2481,13 +2512,60 @@ defmodule Electric.Plug.RouterTest do task = live_shape_req(req, opts) - # Now set parent 1 to excluded = true - # This causes parent 1 to move INTO the subquery result - # Which should cause child 1 to move OUT of the outer shape - # Since NOT IN subquery move-out isn't implemented, we expect a 409 - Postgrex.query!(db_conn, "UPDATE parent SET excluded = true WHERE id = 1", []) + tag = + :crypto.hash(:md5, stack_id <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + + # Now set inner row 1 to excluded = true. + # This moves it into the subquery result and forces outer row 1 out. + Postgrex.query!(db_conn, "UPDATE inner_table SET excluded = true WHERE id = 1", []) + + assert {_req, 200, [data, up_to_date_ctl()]} = Task.await(task) + + assert %{ + "headers" => %{ + "event" => "move-out", + "patterns" => [%{"pos" => 0, "value" => ^tag}] + } + } = data + end + + @tag with_sql: [ + "CREATE TABLE inner_table (id INT PRIMARY KEY, excluded BOOLEAN NOT NULL DEFAULT FALSE)", + "CREATE TABLE outer_table (id INT PRIMARY KEY, inner_id INT NOT NULL REFERENCES inner_table(id), value INT NOT NULL)", + "INSERT INTO inner_table (id, excluded) VALUES (1, true), (2, true)", + "INSERT INTO outer_table (id, inner_id, value) VALUES (1, 1, 10), (2, 2, 20)" + ] + test "NOT IN subquery emits a move-in query when a dependency value moves out", %{ + opts: opts, + db_conn: db_conn, + stack_id: stack_id + } do + req = + make_shape_req("outer_table", + where: "inner_id NOT IN (SELECT id FROM inner_table WHERE excluded = true)" + ) + + assert {req, 200, [%{"headers" => %{"control" => "snapshot-end"}}]} = shape_req(req, opts) + + task = live_shape_req(req, opts) + + Postgrex.query!(db_conn, "UPDATE inner_table SET excluded = false WHERE id = 1", []) + + tag = + :crypto.hash(:md5, stack_id <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + + assert {_req, 200, + [ + %{"headers" => %{"event" => "move-in"}}, + data, + %{"headers" => %{"control" => "snapshot-end"}}, + up_to_date_ctl() + ]} = Task.await(task) - assert {_req, 409, _response} = Task.await(task) + assert %{"id" => "1", "inner_id" => "1", "value" => "10"} = data["value"] + assert %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag]} = data["headers"] end @tag with_sql: [ @@ -2540,6 +2618,7 @@ defmodule Electric.Plug.RouterTest do assert {_, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{"value" => %{"id" => "2", "other_value" => "4"}}, %{"headers" => %{"control" => "snapshot-end"}}, up_to_date_ctl() @@ -2587,6 +2666,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "value" => %{"id" => "2", "value" => "20"}, "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [tag]} @@ -2618,7 +2698,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, include_parent) VALUES (1, true)", "INSERT INTO child (id, parent_id, include_child) VALUES (1, 1, true)" ] - test "subquery combined with OR should return a 409 on move-out", %{ + test "subquery combined with OR handles move-out via DNF without invalidation", %{ opts: opts, db_conn: db_conn } do @@ -2632,16 +2712,33 @@ defmodule Electric.Plug.RouterTest do # Should contain the data record and the snapshot-end control message assert length(response) == 2 - assert %{"value" => %{"id" => "1", "include_child" => "true"}} = + tag = + :crypto.hash(:md5, opts[:stack_id] <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + + assert %{ + "value" => %{"id" => "1"}, + "headers" => %{ + "tags" => [^tag <> "/", "/1"], + "active_conditions" => [true, true] + } + } = Enum.find(response, &Map.has_key?(&1, "key")) task = live_shape_req(req, opts) - # Setting include_parent to false may cause a move out, but it doesn't in this case because include_child is still true + # Toggling the inner-side condition to false causes a move out on the + # subquery position, but the row stays because the outer-side predicate + # remains true on the second disjunct. + # With DNF runtime, this is handled as a position flip, not invalidation. Postgrex.query!(db_conn, "UPDATE parent SET include_parent = false WHERE id = 1", []) - # Rather than working out whether this is a move out or not we return a 409 - assert {_req, 409, _response} = Task.await(task) + assert {_req, 200, response} = Task.await(task) + + assert [%{"headers" => %{"event" => "move-out", "patterns" => [%{"pos" => 0}]}}] = + Enum.filter(response, &match?(%{"headers" => %{"event" => _}}, &1)) + + refute Enum.any?(response, &Map.has_key?(&1, "key")) end @tag with_sql: [ @@ -2650,7 +2747,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, include_parent) VALUES (1, false)", "INSERT INTO child (id, parent_id, include_child) VALUES (1, 1, true)" ] - test "subquery combined with OR should return a 409 on move-in", %{ + test "subquery combined with OR handles move-in via DNF without invalidation", %{ opts: opts, db_conn: db_conn } do @@ -2664,16 +2761,35 @@ defmodule Electric.Plug.RouterTest do # Should contain the data record and the snapshot-end control message assert length(response) == 2 - assert %{"value" => %{"id" => "1", "include_child" => "true"}} = + tag = + :crypto.hash(:md5, opts[:stack_id] <> req.handle <> "v:1") + |> Base.encode16(case: :lower) + + assert %{ + "value" => %{"id" => "1"}, + "headers" => %{ + "tags" => [^tag <> "/", "/1"], + "active_conditions" => [false, true] + } + } = Enum.find(response, &Map.has_key?(&1, "key")) task = live_shape_req(req, opts) - # Setting include_parent to true may cause a move in, but it doesn't in this case because include_child is already true + # Toggling the inner-side condition to true causes a move in on the + # subquery position. The row is already present via the outer-side + # predicate on the second disjunct. + # With DNF runtime, the move-in is handled as a position flip, not invalidation. Postgrex.query!(db_conn, "UPDATE parent SET include_parent = true WHERE id = 1", []) - # Rather than working out whether this is a move in or not we return a 409 - assert {_req, 409, _response} = Task.await(task) + assert {_req, 200, response} = Task.await(task) + + # Move-in control message with move-in query rows + move_in_events = + Enum.filter(response, &match?(%{"headers" => %{"event" => "move-in"}}, &1)) + + assert length(move_in_events) >= 1 + refute Enum.any?(response, &Map.has_key?(&1, "key")) end @tag with_sql: [ @@ -2684,7 +2800,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, grandparent_id, include_parent) VALUES (1, 1, true)", "INSERT INTO child (id, parent_id) VALUES (1, 1)" ] - test "nested subquery combined with OR should return a 409 on move-in", %{ + test "nested subquery combined with OR handles move-in via DNF without invalidation", %{ opts: opts, db_conn: db_conn } do @@ -2703,15 +2819,25 @@ defmodule Electric.Plug.RouterTest do task = live_shape_req(req, opts) - # Setting include_grandparent to true may cause a move in, but it doesn't in this case because include_parent is already true + # Toggling the innermost condition to true causes a move-in in the middle + # shape's subquery, but middle row 1 is already present because its direct + # predicate is still true. With DNF on the middle shape, this becomes a + # position flip with no row-level change. + # The outer shape sees no change and stays live. Postgrex.query!( db_conn, "UPDATE grandparent SET include_grandparent = true WHERE id = 1", [] ) - # Rather than working out whether this is a move in or not we return a 409 - assert {_req, 409, _response} = Task.await(task) + # The middle shape handles the move-in via DNF. The outer dependency does + # not change, so no move event is triggered on the outer shape. + # The live request should receive the middle shape's move-in broadcast + # (propagated via the dependency materializer). + assert {_req, 200, response} = Task.await(task) + + # Verify we got a response without invalidation. + assert is_list(response) end @tag with_sql: [ @@ -2755,6 +2881,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "value" => %{"id" => "2", "name" => "Team B"}, "headers" => %{"tags" => [^tag], "is_move_in" => true} @@ -2832,6 +2959,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"tags" => [^tag]}, "value" => %{"id" => "2", "role" => "Member"} @@ -2897,12 +3025,16 @@ defmodule Electric.Plug.RouterTest do task = live_shape_req(req, ctx.opts) Postgrex.query!(ctx.db_conn, "UPDATE parent SET other_value = 10 WHERE id = 2") - tag = + tag_hash = :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:20") |> Base.encode16(case: :lower) + # DNF tags: "subquery_hash/row_predicate_slot" + tag = "#{tag_hash}/1" + assert {_, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{"headers" => %{"tags" => [^tag]}, "value" => %{"id" => "3"}}, %{"headers" => %{"control" => "snapshot-end"}}, up_to_date_ctl() @@ -2928,7 +3060,11 @@ defmodule Electric.Plug.RouterTest do # Should contain the data record and the snapshot-end control message assert length(response) == 2 - tag = :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:1") |> Base.encode16(case: :lower) + tag_hash = + :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:1") |> Base.encode16(case: :lower) + + # DNF tags: "subquery_hash/row_predicate_slot" + tag = "#{tag_hash}/1" assert %{ "value" => %{"id" => "1", "parentId" => "1", "Value" => "10"}, @@ -2974,11 +3110,13 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ %{"headers" => %{"event" => "move-out"}}, + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [tag2]}, "value" => %{"parent_id" => "2", "value" => "12"} }, %{"headers" => %{"control" => "snapshot-end"}}, + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag]}, "value" => %{"id" => "1", "parent_id" => "1", "value" => "13"} @@ -3013,7 +3151,8 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO parent (id, value) VALUES (1, 1), (2, 2), (3, 3)", "INSERT INTO child (id, parent_id, value) VALUES (1, 1, 10), (2, 2, 20), (3, 3, 30)" ] - test "move-in into move-out into move-in of the same parent results in a ", ctx do + test "move-in into move-out into move-in of the same dependency row collapses queued oscillations", + ctx do req = make_shape_req("child", where: "parent_id in (SELECT id FROM parent WHERE value = 1)") assert {req, 200, [data, _snapshot_end]} = shape_req(req, ctx.opts) @@ -3038,19 +3177,33 @@ defmodule Electric.Plug.RouterTest do # Hard to wait exactly what we want, so this should be OK Process.sleep(1000) - # We're essentially guaranteed, in this test environment, to see move-out before move-in resolves. - # It's safe to propagate a move-out even for stuff client hasn't seen (because of hashing in the pattern) - # as it's just a no-op. - # So we should see 2 move-outs and a move-in but only for the 3rd parent. The move-in should be filtered despite - # being triggered for 2 moved in parents initially + tag2 = + :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:2") |> Base.encode16(case: :lower) + + tag3 = + :crypto.hash(:md5, ctx.stack_id <> req.handle <> "v:3") |> Base.encode16(case: :lower) + + # The reduced move queue keeps the first move-in/move-out pair for + # dependency row 2, then drops the later move-in/move-out oscillation + # before dependency row 3 moves in. assert {_req, 200, [ - %{"headers" => %{"event" => "move-out", "patterns" => p1}}, - %{"headers" => %{"event" => "move-out", "patterns" => p1}}, + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag2]}, + "value" => %{"id" => "2", "value" => "20"} + }, %{"headers" => %{"control" => "snapshot-end"}}, %{ - "headers" => %{"operation" => "insert", "is_move_in" => true}, - "value" => %{"id" => "3", "parent_id" => "3", "value" => "30"} + "headers" => %{ + "event" => "move-out", + "patterns" => [%{"pos" => 0, "value" => ^tag2}] + } + }, + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert", "is_move_in" => true, "tags" => [^tag3]}, + "value" => %{"id" => "3", "value" => "30"} }, %{"headers" => %{"control" => "snapshot-end"}}, up_to_date_ctl() @@ -3074,6 +3227,7 @@ defmodule Electric.Plug.RouterTest do assert {req, 200, [ + %{"headers" => %{"event" => "move-in"}}, %{ "value" => %{"id" => "1", "parent_id" => "1", "value" => "10"}, "headers" => %{"operation" => "insert", "tags" => [tag]} @@ -3108,7 +3262,7 @@ defmodule Electric.Plug.RouterTest do "INSERT INTO project_members (project_id, user_id) VALUES (1, 100), (3, 100)", "INSERT INTO projects (id, workspace_id, name) VALUES (1, 1, 'project 1'), (2, 1, 'project 2')" ] - test "supports two subqueries at the same level but returns 409 on move-in", %{ + test "supports two subqueries at the same level with move-in", %{ opts: opts, db_conn: db_conn } do @@ -3176,8 +3330,17 @@ defmodule Electric.Plug.RouterTest do [] ) - # Should get a 409 because multiple same-level subqueries cannot currently correctly handle move-ins - assert %{status: 409} = Task.await(task) + # With DNF runtime, multiple same-level subqueries now handle move-ins correctly + assert %{status: 200} = conn = Task.await(task) + + body = Jason.decode!(conn.resp_body) + + assert [%{"headers" => %{"event" => "move-in"}} | rest] = body + + assert Enum.any?(rest, fn + %{"value" => %{"id" => "2", "name" => "project 2"}} -> true + _ -> false + end) end end diff --git a/packages/sync-service/test/electric/replication/eval/decomposer_test.exs b/packages/sync-service/test/electric/replication/eval/decomposer_test.exs new file mode 100644 index 0000000000..223f1a7fe3 --- /dev/null +++ b/packages/sync-service/test/electric/replication/eval/decomposer_test.exs @@ -0,0 +1,487 @@ +defmodule Electric.Replication.Eval.DecomposerTest do + use ExUnit.Case, async: true + + alias Electric.Replication.Eval.Parser + alias Electric.Replication.Eval.SqlGenerator + alias Electric.Replication.Eval.Decomposer + + @refs %{ + ["a"] => :int4, + ["b"] => :int4, + ["c"] => :int4, + ["d"] => :int4, + ["e"] => :int4, + ["f"] => :int4, + ["g"] => :int4, + ["name"] => :text + } + + describe "decompose/1" do + test "should decompose a DNF query with shared subexpressions" do + # (a = 1 AND b = 2) OR (c = 3 AND d = 4) OR (a = 1 AND c = 3) + # Disjunct 1: positions 0-1 + # Disjunct 2: positions 2-3 + # Disjunct 3: positions 4-5 (reuses r1 for a=1, r3 for c=3) + ~S"(a = 1 AND b = 2) OR (c = 3 AND d = 4) OR (a = 1 AND c = 3)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil, nil, nil], + [nil, nil, ~s|"c" = 3|, ~s|"d" = 4|, nil, nil], + [nil, nil, nil, nil, ~s|"a" = 1|, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, ~s|"d" = 4|] + ) + end + + test "should handle a single comparison without AND/OR" do + ~S"a = 1" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[~s|"a" = 1|]], + expected_subexpressions: [~s|"a" = 1|] + ) + end + + test "should handle all ANDs as a single disjunct" do + ~S"a = 1 AND b = 2 AND c = 3" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|]], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should handle all ORs as N disjuncts with 1 expression each" do + # a = 1 OR b = 2 OR c = 3 + # Each OR branch is its own disjunct with 1 expression + # Total positions: 3 (one per disjunct) + ~S"a = 1 OR b = 2 OR c = 3" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, nil, nil], + [nil, ~s|"b" = 2|, nil], + [nil, nil, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should distribute AND over OR with subexpression reuse" do + # a = 1 AND (b = 2 OR c = 3) => (a = 1 AND b = 2) OR (a = 1 AND c = 3) + # After distribution, we get 2 disjuncts with 2 expressions each + # The "a = 1" subexpression should be deduplicated (same reference) + ~S"a = 1 AND (b = 2 OR c = 3)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should handle subquery expressions as atomic subexpressions" do + ~S"a = 1 AND (b IN (SELECT id FROM test_table) OR c = 3)" + |> prepare_with_sublinks( + %{["$sublink", "0"] => {:array, :int4}}, + %{0 => "SELECT id FROM test_table"} + ) + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" IN (SELECT $sublink.0)|, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"c" = 3|] + ], + expected_subexpressions: [ + ~s|"a" = 1|, + ~s|"b" IN (SELECT $sublink.0)|, + ~s|"c" = 3| + ] + ) + end + + test "should handle deeply nested distribution ((a OR b) AND (c OR d))" do + # (a OR b) AND (c OR d) => (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) + # 4 disjuncts, each with 2 expressions + ~S"(a = 1 OR b = 2) AND (c = 3 OR d = 4)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"c" = 3|, nil, nil, nil, nil, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"d" = 4|, nil, nil, nil, nil], + [nil, nil, nil, nil, ~s|"b" = 2|, ~s|"c" = 3|, nil, nil], + [nil, nil, nil, nil, nil, nil, ~s|"b" = 2|, ~s|"d" = 4|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, ~s|"d" = 4|] + ) + end + + test "should push NOT down to leaf expressions" do + # NOT a = 1 AND b = 2 parses as (NOT a = 1) AND b = 2 + # The NOT is already at the leaf, so it becomes {:not, ref} + ~S"NOT a = 1 AND b = 2" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[{:not, ~s|"a" = 1|}, ~s|"b" = 2|]], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|] + ) + end + + test "should apply De Morgan's law for NOT over OR" do + # NOT (a = 1 OR b = 2) => (NOT a = 1) AND (NOT b = 2) + # Single disjunct with two negated terms + ~S"NOT (a = 1 OR b = 2)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[{:not, ~s|"a" = 1|}, {:not, ~s|"b" = 2|}]], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|] + ) + end + + test "should apply De Morgan's law for NOT over AND" do + # NOT (a = 1 AND b = 2) => (NOT a = 1) OR (NOT b = 2) + # Two disjuncts, each with one negated term + ~S"NOT (a = 1 AND b = 2)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [{:not, ~s|"a" = 1|}, nil], + [nil, {:not, ~s|"b" = 2|}] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|] + ) + end + + test "should handle double negation" do + # NOT NOT a = 1 => a = 1 (double negation elimination) + ~S"NOT NOT a = 1" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [[~s|"a" = 1|]], + expected_subexpressions: [~s|"a" = 1|] + ) + end + + test "should handle function calls as atomic subexpressions" do + ~S"lower(name) = 'test' OR upper(name) = 'TEST'" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|lower("name") = 'test'|, nil], + [nil, ~s|upper("name") = 'TEST'|] + ], + expected_subexpressions: [~s|lower("name") = 'test'|, ~s|upper("name") = 'TEST'|] + ) + end + + test "should handle mixed-width disjuncts (multi-term AND with single-term OR)" do + # (a = 1 AND b = 2 AND c = 3) OR d = 4 + # Disjunct 1 has 3 terms, disjunct 2 has 1 term, total width = 4 + ~S"(a = 1 AND b = 2 AND c = 3) OR d = 4" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, nil], + [nil, nil, nil, ~s|"d" = 4|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|, ~s|"d" = 4|] + ) + end + + test "should combine De Morgan with distribution" do + # NOT (a = 1 AND b = 2) AND c = 3 + # De Morgan: NOT(AND(a,b)) => OR(NOT a, NOT b) + # Then: AND(OR(NOT a, NOT b), c) distributes to: + # (NOT a AND c) OR (NOT b AND c) + ~S"NOT (a = 1 AND b = 2) AND c = 3" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [{:not, ~s|"a" = 1|}, ~s|"c" = 3|, nil, nil], + [nil, nil, {:not, ~s|"b" = 2|}, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should apply De Morgan recursively over nested AND within OR" do + # NOT ((a = 1 AND b = 2) OR c = 3) + # De Morgan over OR: AND(NOT(AND(a,b)), NOT c) + # Inner De Morgan over AND: NOT(AND(a,b)) => OR(NOT a, NOT b) + # Distribution: AND(OR(NOT a, NOT b), NOT c) => + # (NOT a AND NOT c) OR (NOT b AND NOT c) + ~S"NOT ((a = 1 AND b = 2) OR c = 3)" + |> prepare() + |> Decomposer.decompose() + |> assert_expanded_dnf( + expected_disjuncts: [ + [{:not, ~s|"a" = 1|}, {:not, ~s|"c" = 3|}, nil, nil], + [nil, nil, {:not, ~s|"b" = 2|}, {:not, ~s|"c" = 3|}] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + end + + test "should handle double cross-product with deduplication" do + # ((a AND b) OR (c AND d)) AND ((d AND e) OR (f AND g)) + # Left OR: 2 disjuncts [ab, cd] + # Right OR: 2 disjuncts [de, fg] + # Cross-product: 2 × 2 = 4 disjuncts, each with 4 terms, expanded to width 16 + # d = 4 appears in left's 2nd disjunct AND right's 1st disjunct — shared ref + {:ok, decomposition} = + ~S"((a = 1 AND b = 2) OR (c = 3 AND d = 4)) AND ((d = 4 AND e = 5) OR (f = 6 AND g = 7))" + |> prepare() + |> Decomposer.decompose() + + assert_expanded_dnf({:ok, decomposition}, + expected_disjuncts: [ + # ab × de + [ + ~s|"a" = 1|, + ~s|"b" = 2|, + ~s|"d" = 4|, + ~s|"e" = 5|, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil + ], + # ab × fg + [ + nil, + nil, + nil, + nil, + ~s|"a" = 1|, + ~s|"b" = 2|, + ~s|"f" = 6|, + ~s|"g" = 7|, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil + ], + # cd × de + [ + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + ~s|"c" = 3|, + ~s|"d" = 4|, + ~s|"d" = 4|, + ~s|"e" = 5|, + nil, + nil, + nil, + nil + ], + # cd × fg + [ + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + ~s|"c" = 3|, + ~s|"d" = 4|, + ~s|"f" = 6|, + ~s|"g" = 7| + ] + ], + expected_subexpressions: [ + ~s|"a" = 1|, + ~s|"b" = 2|, + ~s|"c" = 3|, + ~s|"d" = 4|, + ~s|"e" = 5|, + ~s|"f" = 6|, + ~s|"g" = 7| + ] + ) + + # Verify d = 4 appears at 4 positions (shared across disjuncts) + d_eq_4_count = + decomposition.subexpressions + |> Enum.count(fn {_pos, subexpr} -> deparse(subexpr.ast) == ~s|"d" = 4| end) + + assert d_eq_4_count == 4 + end + + test "should share refs when same subexpression appears positive and negated" do + # (a = 1 AND b = 2) OR (NOT a = 1 AND c = 3) + # a = 1 appears positive in disjunct 1, negated in disjunct 2 + # The subexpressions map should have only 3 unique expressions + {:ok, decomposition} = + ~S"(a = 1 AND b = 2) OR (NOT a = 1 AND c = 3)" + |> prepare() + |> Decomposer.decompose() + + assert_expanded_dnf({:ok, decomposition}, + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil], + [nil, nil, {:not, ~s|"a" = 1|}, ~s|"c" = 3|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + + # Verify a = 1 appears at one positive and one negated position + a_eq_1_entries = + decomposition.subexpressions + |> Enum.filter(fn {_pos, subexpr} -> deparse(subexpr.ast) == ~s|"a" = 1| end) + + assert length(a_eq_1_entries) == 2 + polarities = a_eq_1_entries |> Enum.map(fn {_, s} -> s.negated end) |> Enum.sort() + assert polarities == [false, true] + end + + test "should deduplicate references for identical subexpressions" do + # All three disjuncts contain `a = 1` - should use same reference + {:ok, decomposition} = + ~S"(a = 1 AND b = 2) OR (a = 1 AND c = 3) OR a = 1" + |> prepare() + |> Decomposer.decompose() + + assert_expanded_dnf({:ok, decomposition}, + expected_disjuncts: [ + [~s|"a" = 1|, ~s|"b" = 2|, nil, nil, nil], + [nil, nil, ~s|"a" = 1|, ~s|"c" = 3|, nil], + [nil, nil, nil, nil, ~s|"a" = 1|] + ], + expected_subexpressions: [~s|"a" = 1|, ~s|"b" = 2|, ~s|"c" = 3|] + ) + + # a = 1 should appear at 3 positions (one per disjunct) + a_eq_1_count = + decomposition.subexpressions + |> Enum.count(fn {_pos, subexpr} -> deparse(subexpr.ast) == ~s|"a" = 1| end) + + assert a_eq_1_count == 3 + end + + test "should return error when disjunct count exceeds limit" do + # Build a WHERE clause with >100 disjuncts: a = 1 OR a = 2 OR ... OR a = 101 + clause = Enum.map_join(1..101, " OR ", &"a = #{&1}") + + result = + clause + |> prepare() + |> Decomposer.decompose() + + assert {:error, message} = result + assert message =~ "too complex" + assert message =~ "101 disjuncts" + assert message =~ "limit of 100" + end + end + + # Helper to prepare a WHERE clause string into a Parser AST + defp prepare(where_clause) do + {:ok, pgquery} = Parser.parse_query(where_clause) + {:ok, expr} = Parser.validate_where_ast(pgquery, refs: @refs) + expr.eval + end + + # Helper for WHERE clauses containing subqueries (IN (SELECT ...)) + defp prepare_with_sublinks(where_clause, sublink_refs, sublink_queries) do + {:ok, pgquery} = Parser.parse_query(where_clause) + all_refs = Map.merge(@refs, sublink_refs) + + {:ok, expr} = + Parser.validate_where_ast(pgquery, refs: all_refs, sublink_queries: sublink_queries) + + expr.eval + end + + # Helper to deparse an AST node back to SQL string + defp deparse(ast) do + SqlGenerator.to_sql(ast) + end + + # Assertion helper that verifies: + # 1. Position count matches expected width + # 2. Correct number of disjuncts + # 3. Subexpressions map contains exactly the expected unique expressions + # 4. Reconstructed expanded format matches expected disjuncts + defp assert_expanded_dnf({:ok, decomposition}, opts) do + expected_disjuncts = Keyword.fetch!(opts, :expected_disjuncts) + expected_subexpressions = Keyword.fetch!(opts, :expected_subexpressions) + + %{ + disjuncts: disjuncts, + subexpressions: subexpressions, + position_count: position_count + } = decomposition + + # 1. Verify position count matches expected width + expected_width = expected_disjuncts |> hd() |> length() + + assert position_count == expected_width, + "Position count (#{position_count}) must equal expected width (#{expected_width})" + + # 2. Verify correct number of disjuncts + assert length(disjuncts) == length(expected_disjuncts), + "Expected #{length(expected_disjuncts)} disjuncts, got #{length(disjuncts)}" + + # 3. Verify subexpressions map contains exactly the expected unique expressions + actual_subexprs = subexpressions |> Map.values() |> Enum.map(&deparse(&1.ast)) |> MapSet.new() + expected_subexprs = MapSet.new(expected_subexpressions) + + assert actual_subexprs == expected_subexprs, + "Subexpressions mismatch. Expected: #{inspect(expected_subexprs)}, got: #{inspect(actual_subexprs)}" + + # 4. Reconstruct expanded format from sparse disjuncts for comparison + actual_expanded = + MapSet.new(disjuncts, fn conj -> + row = List.duplicate(nil, position_count) + + Enum.reduce(conj, row, fn {pos, polarity}, row -> + subexpr = Map.fetch!(subexpressions, pos) + sql = deparse(subexpr.ast) + term = if polarity == :negated, do: {:not, sql}, else: sql + List.replace_at(row, pos, term) + end) + end) + + assert actual_expanded == MapSet.new(expected_disjuncts) + end +end diff --git a/packages/sync-service/test/electric/replication/eval/runner_test.exs b/packages/sync-service/test/electric/replication/eval/runner_test.exs index 66da1ebb1d..26ee3e8a7f 100644 --- a/packages/sync-service/test/electric/replication/eval/runner_test.exs +++ b/packages/sync-service/test/electric/replication/eval/runner_test.exs @@ -254,6 +254,26 @@ defmodule Electric.Replication.Eval.RunnerTest do |> Runner.execute(%{["test"] => 4, ["$sublink", "0"] => MapSet.new([2, 3, 4])}) end + test "subquery with callback-backed membership" do + expr = + ~S|test IN (SELECT val FROM tester)| + |> Parser.parse_and_validate_expression!( + refs: %{["test"] => :int4, ["$sublink", "0"] => {:array, :int4}}, + sublink_queries: %{0 => "SELECT val FROM tester"} + ) + + subquery_member? = fn + ["$sublink", "0"], 4 -> true + ["$sublink", "0"], _ -> false + end + + assert {:ok, true} = + Runner.execute(expr, %{["test"] => 4}, subquery_member?: subquery_member?) + + assert {:ok, false} = + Runner.execute(expr, %{["test"] => 5}, subquery_member?: subquery_member?) + end + test "subquery with row expression" do assert {:ok, true} = ~S|(test1, test2) IN (SELECT val1, val2 FROM tester)| diff --git a/packages/sync-service/test/electric/replication/eval/sql_generator_test.exs b/packages/sync-service/test/electric/replication/eval/sql_generator_test.exs new file mode 100644 index 0000000000..6d6fae6fcf --- /dev/null +++ b/packages/sync-service/test/electric/replication/eval/sql_generator_test.exs @@ -0,0 +1,621 @@ +defmodule Electric.Replication.Eval.SqlGeneratorTest do + use ExUnit.Case, async: true + + alias Electric.Replication.Eval.SqlGenerator + alias Electric.Replication.Eval.Parser.{Const, Ref, Func, Array, RowExpr} + + describe "comparison operators" do + test "equals" do + ast = %Func{name: "\"=\"", args: [%Ref{path: ["status"]}, %Const{value: "active"}]} + assert SqlGenerator.to_sql(ast) == ~s|"status" = 'active'| + end + + test "not equals" do + ast = %Func{name: "\"<>\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" <> 1| + end + + test "less than" do + ast = %Func{name: "\"<\"", args: [%Ref{path: ["age"]}, %Const{value: 30}]} + assert SqlGenerator.to_sql(ast) == ~s|"age" < 30| + end + + test "greater than" do + ast = %Func{name: "\">\"", args: [%Ref{path: ["score"]}, %Const{value: 100}]} + assert SqlGenerator.to_sql(ast) == ~s|"score" > 100| + end + + test "less than or equal" do + ast = %Func{name: "\"<=\"", args: [%Ref{path: ["x"]}, %Const{value: 5}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" <= 5| + end + + test "greater than or equal" do + ast = %Func{name: "\">=\"", args: [%Ref{path: ["y"]}, %Const{value: 10}]} + assert SqlGenerator.to_sql(ast) == ~s|"y" >= 10| + end + end + + describe "pattern matching" do + test "LIKE" do + ast = %Func{name: "\"~~\"", args: [%Ref{path: ["name"]}, %Const{value: "%foo%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" LIKE '%foo%'| + end + + test "ILIKE" do + ast = %Func{name: "\"~~*\"", args: [%Ref{path: ["name"]}, %Const{value: "%bar%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" ILIKE '%bar%'| + end + + test "NOT LIKE" do + ast = %Func{name: "\"!~~\"", args: [%Ref{path: ["name"]}, %Const{value: "%baz%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" NOT LIKE '%baz%'| + end + + test "NOT ILIKE" do + ast = %Func{name: "\"!~~*\"", args: [%Ref{path: ["name"]}, %Const{value: "%qux%"}]} + assert SqlGenerator.to_sql(ast) == ~s|"name" NOT ILIKE '%qux%'| + end + end + + describe "nullability" do + test "IS NULL" do + ast = %Func{name: "is null", args: [%Ref{path: ["deleted_at"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"deleted_at" IS NULL| + end + + test "IS NOT NULL" do + ast = %Func{name: "is not null", args: [%Ref{path: ["email"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"email" IS NOT NULL| + end + end + + describe "boolean tests" do + test "IS TRUE" do + ast = %Func{name: "IS_TRUE", args: [%Ref{path: ["active"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"active" IS TRUE| + end + + test "IS NOT TRUE" do + ast = %Func{name: "IS_NOT_TRUE", args: [%Ref{path: ["active"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"active" IS NOT TRUE| + end + + test "IS FALSE" do + ast = %Func{name: "IS_FALSE", args: [%Ref{path: ["deleted"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"deleted" IS FALSE| + end + + test "IS NOT FALSE" do + ast = %Func{name: "IS_NOT_FALSE", args: [%Ref{path: ["enabled"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"enabled" IS NOT FALSE| + end + + test "IS UNKNOWN" do + ast = %Func{name: "IS_UNKNOWN", args: [%Ref{path: ["flag"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"flag" IS UNKNOWN| + end + + test "IS NOT UNKNOWN" do + ast = %Func{name: "IS_NOT_UNKNOWN", args: [%Ref{path: ["flag"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"flag" IS NOT UNKNOWN| + end + end + + describe "membership" do + test "IN with literal array" do + ast = %Func{ + name: "in", + args: [ + %Ref{path: ["status"]}, + %Array{elements: [%Const{value: "a"}, %Const{value: "b"}, %Const{value: "c"}]} + ] + } + + assert SqlGenerator.to_sql(ast) == ~s|"status" IN ('a', 'b', 'c')| + end + + test "IN with integer array" do + ast = %Func{ + name: "in", + args: [ + %Ref{path: ["id"]}, + %Array{elements: [%Const{value: 1}, %Const{value: 2}, %Const{value: 3}]} + ] + } + + assert SqlGenerator.to_sql(ast) == ~s|"id" IN (1, 2, 3)| + end + end + + describe "sublink membership check" do + test "renders sublink reference" do + ast = %Func{ + name: "sublink_membership_check", + args: [ + %Ref{path: ["parent_id"]}, + %Ref{path: ["$sublink", "0"]} + ] + } + + assert SqlGenerator.to_sql(ast) == ~s|"parent_id" IN (SELECT $sublink.0)| + end + end + + describe "logical operators" do + test "NOT" do + inner = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + ast = %Func{name: "not", args: [inner]} + assert SqlGenerator.to_sql(ast) == ~s|NOT "x" = 1| + end + + test "AND" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + ast = %Func{name: "and", args: [a, b]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = 1 AND "y" = 2| + end + + test "OR" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + ast = %Func{name: "or", args: [a, b]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = 1 OR "y" = 2| + end + + test "nested AND within OR" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + c = %Func{name: "\"=\"", args: [%Ref{path: ["z"]}, %Const{value: 3}]} + ast = %Func{name: "or", args: [%Func{name: "and", args: [a, b]}, c]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = 1 AND "y" = 2 OR "z" = 3| + end + + test "nested OR within AND" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + c = %Func{name: "\"=\"", args: [%Ref{path: ["z"]}, %Const{value: 3}]} + ast = %Func{name: "and", args: [%Func{name: "or", args: [a, b]}, c]} + assert SqlGenerator.to_sql(ast) == ~s|("x" = 1 OR "y" = 2) AND "z" = 3| + end + + test "deeply nested logical expression" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["a"]}, %Const{value: 1}]} + b = %Func{name: "\">\"", args: [%Ref{path: ["b"]}, %Const{value: 2}]} + c = %Func{name: "\"<\"", args: [%Ref{path: ["c"]}, %Const{value: 3}]} + d = %Func{name: "is null", args: [%Ref{path: ["d"]}]} + + ast = + %Func{ + name: "or", + args: [ + %Func{name: "and", args: [a, b]}, + %Func{name: "and", args: [c, %Func{name: "not", args: [d]}]} + ] + } + + assert SqlGenerator.to_sql(ast) == + ~s|"a" = 1 AND "b" > 2 OR "c" < 3 AND NOT "d" IS NULL| + end + end + + describe "DISTINCT / NOT DISTINCT" do + test "IS DISTINCT FROM" do + left = %Ref{path: ["x"]} + right = %Const{value: 1} + comparison = %Func{name: "\"<>\"", args: [left, right]} + ast = %Func{name: "values_distinct?", args: [left, right, comparison]} + assert SqlGenerator.to_sql(ast) == ~s|"x" IS DISTINCT FROM 1| + end + + test "IS NOT DISTINCT FROM" do + left = %Ref{path: ["x"]} + right = %Const{value: nil} + comparison = %Func{name: "\"<>\"", args: [left, right]} + ast = %Func{name: "values_not_distinct?", args: [left, right, comparison]} + assert SqlGenerator.to_sql(ast) == ~s|"x" IS NOT DISTINCT FROM NULL| + end + end + + describe "ANY / ALL" do + test "ANY with equals" do + inner = %Func{ + name: "\"=\"", + args: [%Ref{path: ["x"]}, %Ref{path: ["arr"]}], + map_over_array_in_pos: 1 + } + + ast = %Func{name: "any", args: [inner]} + assert SqlGenerator.to_sql(ast) == ~s|"x" = ANY("arr")| + end + + test "ALL with less than" do + inner = %Func{ + name: "\"<\"", + args: [%Ref{path: ["x"]}, %Ref{path: ["arr"]}], + map_over_array_in_pos: 1 + } + + ast = %Func{name: "all", args: [inner]} + assert SqlGenerator.to_sql(ast) == ~s|"x" < ALL("arr")| + end + end + + describe "arithmetic operators" do + test "addition" do + ast = %Func{name: "\"+\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" + 1| + end + + test "subtraction" do + ast = %Func{name: "\"-\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" - 1| + end + + test "multiplication" do + ast = %Func{name: "\"*\"", args: [%Ref{path: ["x"]}, %Const{value: 2}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" * 2| + end + + test "division" do + ast = %Func{name: "\"/\"", args: [%Ref{path: ["x"]}, %Const{value: 2}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" / 2| + end + + test "exponentiation" do + ast = %Func{name: "\"^\"", args: [%Ref{path: ["x"]}, %Const{value: 2}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" ^ 2| + end + + test "unary plus" do + ast = %Func{name: "\"+\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|+ "x"| + end + + test "unary minus" do + ast = %Func{name: "\"-\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|- "x"| + end + + test "square root" do + ast = %Func{name: "\"|/\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s(\|/ "x") + end + + test "absolute value" do + ast = %Func{name: "\"@\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|@ "x"| + end + end + + describe "bitwise operators" do + test "bitwise AND" do + ast = %Func{name: "\"&\"", args: [%Ref{path: ["x"]}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" & 3| + end + + test "bitwise OR" do + ast = %Func{name: "\"|\"", args: [%Ref{path: ["x"]}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == ~s("x" | 3) + end + + test "bitwise XOR" do + ast = %Func{name: "\"#\"", args: [%Ref{path: ["x"]}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == ~s|"x" # 3| + end + + test "bitwise NOT" do + ast = %Func{name: "\"~\"", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|~ "x"| + end + end + + describe "string concatenation" do + test "||" do + ast = %Func{name: "\"||\"", args: [%Ref{path: ["first"]}, %Ref{path: ["last"]}]} + assert SqlGenerator.to_sql(ast) == ~s("first" || "last") + end + end + + describe "array operators" do + test "contains (@>)" do + ast = %Func{name: "\"@>\"", args: [%Ref{path: ["tags"]}, %Ref{path: ["required"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"tags" @> "required"| + end + + test "contained by (<@)" do + ast = %Func{name: "\"<@\"", args: [%Ref{path: ["tags"]}, %Ref{path: ["allowed"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"tags" <@ "allowed"| + end + + test "overlap (&&)" do + ast = %Func{name: "\"&&\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"a" && "b"| + end + end + + describe "named functions" do + test "lower" do + ast = %Func{name: "lower", args: [%Ref{path: ["name"]}]} + assert SqlGenerator.to_sql(ast) == ~s|lower("name")| + end + + test "upper" do + ast = %Func{name: "upper", args: [%Ref{path: ["name"]}]} + assert SqlGenerator.to_sql(ast) == ~s|upper("name")| + end + + test "array_ndims" do + ast = %Func{name: "array_ndims", args: [%Ref{path: ["arr"]}]} + assert SqlGenerator.to_sql(ast) == ~s|array_ndims("arr")| + end + end + + describe "type casts" do + test "cast with _to_ naming convention" do + ast = %Func{name: "int4_to_bool", args: [%Ref{path: ["x"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"x"::bool| + end + + test "another cast" do + ast = %Func{name: "text_to_int4", args: [%Ref{path: ["val"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"val"::int4| + end + end + + describe "column references" do + test "simple column" do + assert SqlGenerator.to_sql(%Ref{path: ["status"]}) == ~s|"status"| + end + + test "schema-qualified column" do + assert SqlGenerator.to_sql(%Ref{path: ["public", "users", "id"]}) == + ~s|"public"."users"."id"| + end + end + + describe "constants" do + test "NULL" do + assert SqlGenerator.to_sql(%Const{value: nil}) == "NULL" + end + + test "true" do + assert SqlGenerator.to_sql(%Const{value: true}) == "true" + end + + test "false" do + assert SqlGenerator.to_sql(%Const{value: false}) == "false" + end + + test "string" do + assert SqlGenerator.to_sql(%Const{value: "hello"}) == "'hello'" + end + + test "string with single quote escaping" do + assert SqlGenerator.to_sql(%Const{value: "it's"}) == "'it''s'" + end + + test "integer" do + assert SqlGenerator.to_sql(%Const{value: 42}) == "42" + end + + test "float" do + assert SqlGenerator.to_sql(%Const{value: 3.14}) == "3.14" + end + + test "negative integer" do + assert SqlGenerator.to_sql(%Const{value: -1}) == "-1" + end + end + + describe "array literals" do + test "simple array" do + ast = %Array{elements: [%Const{value: 1}, %Const{value: 2}, %Const{value: 3}]} + assert SqlGenerator.to_sql(ast) == "ARRAY[1, 2, 3]" + end + + test "string array" do + ast = %Array{elements: [%Const{value: "a"}, %Const{value: "b"}]} + assert SqlGenerator.to_sql(ast) == "ARRAY['a', 'b']" + end + + test "empty array" do + ast = %Array{elements: []} + assert SqlGenerator.to_sql(ast) == "ARRAY[]" + end + end + + describe "row expressions" do + test "simple row" do + ast = %RowExpr{elements: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + assert SqlGenerator.to_sql(ast) == ~s|ROW("a", "b")| + end + + test "row in sublink membership check" do + row = %RowExpr{elements: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + + ast = %Func{ + name: "sublink_membership_check", + args: [row, %Ref{path: ["$sublink", "0"]}] + } + + assert SqlGenerator.to_sql(ast) == ~s|ROW("a", "b") IN (SELECT $sublink.0)| + end + end + + describe "date/time/interval constants" do + test "date" do + ast = %Const{value: ~D[2024-01-15]} + assert SqlGenerator.to_sql(ast) == "'2024-01-15'::date" + end + + test "time" do + ast = %Const{value: ~T[13:45:00]} + assert SqlGenerator.to_sql(ast) == "'13:45:00'::time" + end + + test "timestamp (NaiveDateTime)" do + ast = %Const{value: ~N[2024-01-15 13:45:00]} + assert SqlGenerator.to_sql(ast) == "'2024-01-15T13:45:00'::timestamp" + end + + test "timestamptz (DateTime)" do + ast = %Const{value: DateTime.from_naive!(~N[2024-01-15 13:45:00], "Etc/UTC")} + assert SqlGenerator.to_sql(ast) == "'2024-01-15T13:45:00Z'::timestamptz" + end + + test "interval" do + ast = %Const{value: PgInterop.Interval.parse!("1 year 2 months 3 days")} + result = SqlGenerator.to_sql(ast) + assert result =~ ~r/^'.*'::interval$/ + end + end + + describe "error handling" do + test "raises ArgumentError for unsupported AST node" do + assert_raise ArgumentError, ~r/unsupported AST node/, fn -> + SqlGenerator.to_sql(%{unexpected: :node}) + end + end + + test "raises ArgumentError for unknown function name" do + assert_raise ArgumentError, ~r/unsupported AST node/, fn -> + SqlGenerator.to_sql(%Func{name: "totally_unknown_func", args: [%Const{value: 1}]}) + end + end + end + + describe "complex nested expressions" do + test "WHERE clause with AND, OR, comparisons and NULL check" do + status_check = %Func{ + name: "\"=\"", + args: [%Ref{path: ["status"]}, %Const{value: "active"}] + } + + age_check = %Func{name: "\">=\"", args: [%Ref{path: ["age"]}, %Const{value: 18}]} + email_check = %Func{name: "is not null", args: [%Ref{path: ["email"]}]} + + ast = + %Func{ + name: "and", + args: [ + %Func{name: "or", args: [status_check, age_check]}, + email_check + ] + } + + assert SqlGenerator.to_sql(ast) == + ~s|("status" = 'active' OR "age" >= 18) AND "email" IS NOT NULL| + end + + test "NOT with nested OR" do + a = %Func{name: "\"=\"", args: [%Ref{path: ["x"]}, %Const{value: 1}]} + b = %Func{name: "\"=\"", args: [%Ref{path: ["y"]}, %Const{value: 2}]} + + ast = %Func{name: "not", args: [%Func{name: "or", args: [a, b]}]} + + assert SqlGenerator.to_sql(ast) == ~s|NOT ("x" = 1 OR "y" = 2)| + end + + test "comparison with string concatenation" do + concat = %Func{name: "\"||\"", args: [%Ref{path: ["first"]}, %Ref{path: ["last"]}]} + ast = %Func{name: "\"=\"", args: [concat, %Const{value: "JohnDoe"}]} + assert SqlGenerator.to_sql(ast) == ~s("first" || "last" = 'JohnDoe') + end + + test "precedence: multiplication inside addition" do + # (a * b) + c — no parens needed since * binds tighter + mul = %Func{name: "\"*\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + ast = %Func{name: "\"+\"", args: [mul, %Ref{path: ["c"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"a" * "b" + "c"| + end + + test "precedence: addition inside multiplication" do + # a * (b + c) — parens needed since + binds looser + add = %Func{name: "\"+\"", args: [%Ref{path: ["b"]}, %Ref{path: ["c"]}]} + ast = %Func{name: "\"*\"", args: [%Ref{path: ["a"]}, add]} + assert SqlGenerator.to_sql(ast) == ~s|"a" * ("b" + "c")| + end + + test "precedence: left-associative subtraction" do + # a - (b - c) — parens needed on right child + inner = %Func{name: "\"-\"", args: [%Ref{path: ["b"]}, %Ref{path: ["c"]}]} + ast = %Func{name: "\"-\"", args: [%Ref{path: ["a"]}, inner]} + assert SqlGenerator.to_sql(ast) == ~s|"a" - ("b" - "c")| + end + + test "precedence: left-associative subtraction, left child" do + # (a - b) - c — no parens needed (left-associative) + inner = %Func{name: "\"-\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + ast = %Func{name: "\"-\"", args: [inner, %Ref{path: ["c"]}]} + assert SqlGenerator.to_sql(ast) == ~s|"a" - "b" - "c"| + end + + test "precedence: right-associative exponentiation" do + # a ^ (b ^ c) — no parens needed (right-associative) + inner = %Func{name: "\"^\"", args: [%Ref{path: ["b"]}, %Ref{path: ["c"]}]} + ast = %Func{name: "\"^\"", args: [%Ref{path: ["a"]}, inner]} + assert SqlGenerator.to_sql(ast) == ~s|"a" ^ "b" ^ "c"| + end + + test "precedence: right-associative exponentiation, left child" do + # (a ^ b) ^ c — parens needed on left child + inner = %Func{name: "\"^\"", args: [%Ref{path: ["a"]}, %Ref{path: ["b"]}]} + ast = %Func{name: "\"^\"", args: [inner, %Ref{path: ["c"]}]} + assert SqlGenerator.to_sql(ast) == ~s|("a" ^ "b") ^ "c"| + end + end + + describe "to_sql is the inverse of parse" do + use ExUnitProperties + + alias Electric.Replication.Eval.Parser + alias Support.PgExpressionGenerator + + property "to_sql output is parseable for any parseable WHERE clause" do + check all( + {sql, refs} <- PgExpressionGenerator.where_clause_generator(), + max_runs: 1_000, + max_run_time: 10_000 + ) do + assert_to_sql_inverts_parse(sql, refs) + end + end + + defp assert_to_sql_inverts_parse(sql, refs) do + # The parser may raise on some generated expressions (pre-existing parser + # limitations). We rescue those and skip — we only care that successfully + # parsed expressions produce valid SQL via to_sql. + parsed = + try do + Parser.parse_and_validate_expression(sql, refs: refs) + rescue + _ -> :skip + end + + case parsed do + {:ok, %{eval: ast}} -> + regenerated = SqlGenerator.to_sql(ast) + + reparsed = + try do + Parser.parse_and_validate_expression(regenerated, refs: refs) + rescue + e -> + flunk( + "to_sql output raised #{inspect(e)} when re-parsing: #{regenerated} (from: #{sql})" + ) + end + + assert {:ok, _} = reparsed, + "to_sql output is not valid SQL: #{regenerated} (from: #{sql})" + + {:error, _reason} -> + :ok + + :skip -> + :ok + end + end + end +end diff --git a/packages/sync-service/test/electric/replication/shape_log_collector_test.exs b/packages/sync-service/test/electric/replication/shape_log_collector_test.exs index a90f4b57c5..60fb9d9710 100644 --- a/packages/sync-service/test/electric/replication/shape_log_collector_test.exs +++ b/packages/sync-service/test/electric/replication/shape_log_collector_test.exs @@ -40,6 +40,16 @@ defmodule Electric.Replication.ShapeLogCollectorTest do @shape Shape.new!("test_table", inspector: @inspector) @shape_handle "the-shape-handle" + @subquery_inspector Support.StubInspector.new( + tables: [{1234, {"public", "test_table"}}, {5678, {"public", "parent"}}], + columns: [%{name: "id", type: "int8", type_id: {20, 1}, pk_position: 0}] + ) + @subquery_shape Shape.new!("test_table", + inspector: @subquery_inspector, + where: "id IN (SELECT id FROM public.parent)" + ) + @subquery_shape_handle "subquery-shape-handle" + def setup_log_collector(ctx) do %{stack_id: stack_id} = ctx # Start a test Registry @@ -144,6 +154,61 @@ defmodule Electric.Replication.ShapeLogCollectorTest do xids = Support.TransactionConsumer.assert_consume([{1, consumer}], [txn]) assert xids == [xmin] end + + @tag restore_shapes: [{@subquery_shape_handle, @subquery_shape}], + inspector: @subquery_inspector + test "restored subquery shape routes via fallback before consumer seeds index", ctx do + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + + # After restore, the subquery shape should be in fallback because + # no consumer has seeded the SubqueryIndex yet. + index = SubqueryIndex.for_stack(ctx.stack_id) + assert index != nil + assert SubqueryIndex.fallback?(index, @subquery_shape_handle) + + parent = self() + + consumer = + start_link_supervised!( + {Support.TransactionConsumer, + [ + id: 1, + stack_id: ctx.stack_id, + parent: parent, + shape: @subquery_shape, + shape_handle: @subquery_shape_handle, + stack_id: ctx.stack_id, + action: :restore + ]} + ) + + :ok = + Electric.Shapes.ConsumerRegistry.register_consumer( + consumer, + @subquery_shape_handle, + ctx.stack_id + ) + + xmin = 100 + lsn = Lsn.from_string("0/10") + last_log_offset = LogOffset.new(lsn, 0) + + # Any root-table change should route to the shape via fallback, + # even if the record wouldn't match the subquery membership. + txn = + transaction(xmin, lsn, [ + %Changes.NewRecord{ + relation: {"public", "test_table"}, + record: %{"id" => "999"}, + log_offset: last_log_offset + } + ]) + + assert :ok = ShapeLogCollector.handle_event(txn, ctx.stack_id) + + xids = Support.TransactionConsumer.assert_consume([{1, consumer}], [txn]) + assert xids == [xmin] + end end describe "lazy consumer initialization" do diff --git a/packages/sync-service/test/electric/shape_cache_test.exs b/packages/sync-service/test/electric/shape_cache_test.exs index 320bba5e29..c53dd01f61 100644 --- a/packages/sync-service/test/electric/shape_cache_test.exs +++ b/packages/sync-service/test/electric/shape_cache_test.exs @@ -1315,6 +1315,34 @@ defmodule Electric.ShapeCacheTest do assert [{^dep_handle, _}, {^shape_handle, _}] = ShapeCache.list_shapes(ctx.stack_id) end + test "restarted subquery shape reseeds the subquery index after restart", ctx do + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + # Before restart: shape should have positions in the SubqueryIndex + index_before = SubqueryIndex.for_stack(ctx.stack_id) + assert index_before != nil + assert SubqueryIndex.has_positions?(index_before, shape_handle) + + restart_shape_cache(ctx) + + # After restart: the SubqueryIndex is recreated by the ShapeLogCollector. + # The consumer re-initializes and reseeds the index. + # Wait for the consumer to finish restoring. + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + index_after = SubqueryIndex.for_stack(ctx.stack_id) + assert index_after != nil + + assert wait_until(200, fn -> + SubqueryIndex.has_positions?(index_after, shape_handle) + end) + end + test "restores shapes with subqueries and their materializers when backup missing", ctx do {shape_handle, _} = ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) @@ -1361,6 +1389,22 @@ defmodule Electric.ShapeCacheTest do :ok = stop_supervised(name) end end + + defp wait_until(timeout_ms, fun, started_at \\ System.monotonic_time(:millisecond)) + + defp wait_until(timeout_ms, fun, started_at) do + cond do + fun.() -> + true + + System.monotonic_time(:millisecond) - started_at >= timeout_ms -> + false + + true -> + Process.sleep(10) + wait_until(timeout_ms, fun, started_at) + end + end end describe "start_consumer_for_handle/2" do diff --git a/packages/sync-service/test/electric/shapes/consumer/change_handling_test.exs b/packages/sync-service/test/electric/shapes/consumer/change_handling_test.exs deleted file mode 100644 index 990a46c2d5..0000000000 --- a/packages/sync-service/test/electric/shapes/consumer/change_handling_test.exs +++ /dev/null @@ -1,419 +0,0 @@ -defmodule Electric.Shapes.Consumer.ChangeHandlingTest do - use ExUnit.Case, async: true - - alias Electric.Replication.Changes.NewRecord - alias Electric.Replication.Changes.UpdatedRecord - alias Electric.Replication.LogOffset - alias Electric.Shapes.Consumer.ChangeHandling - alias Electric.Shapes.Consumer.MoveIns - alias Electric.Shapes.Consumer.State - alias Electric.Shapes.Shape - - import Support.ComponentSetup - - @moduletag :tmp_dir - - @inspector Support.StubInspector.new( - tables: ["users"], - columns: [ - %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, - %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, - %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}} - ] - ) - - describe "process_changes/3 with move-ins" do - setup [:with_stack_id_from_test] - - setup %{stack_id: stack_id} do - # Create a shape with dependencies (subquery) - shape = - Shape.new!("users", where: "parent_id IN (SELECT id FROM users)", inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - %{state: state, shape: shape} - end - - test "skips change when value is in unresolved move-in with nil snapshot", %{state: state} do - # Set up move-in state with a waiting move-in that has nil snapshot - # This simulates a move-in that was triggered but query hasn't started yet - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "a96441e4-59bd-426d-aefe-66c7fef4ddd2", - {["$sublink", "0"], MapSet.new([1])} - ) - - state = %{state | move_handling_state: move_handling_state} - - # Create a change that references the moved-in value (parent_id = 1) - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "1", "parent_id" => "1", "value" => "11"}, - record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"child\"/\"1\"", - changed_columns: MapSet.new(["value"]) - } - - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([])}, %{["$sublink", "0"] => MapSet.new([1])}} - } - - # The change should be skipped because: - # 1. Its parent_id=1 matches the in-flight moved value - # 2. The move-in has nil snapshot, meaning we don't know when it will be visible yet - # 3. Therefore we should skip to avoid duplicates when move-in results arrive - result = ChangeHandling.process_changes([change], state, ctx) - - # Should return empty changes since it should be skipped - {filtered_changes, _state, count, _offset} = result - - assert filtered_changes == [] - - assert count == 0 - end - - test "skips change when value is in unresolved move-in with known snapshot and xid is visible", - %{state: state} do - # Set up move-in state with a waiting move-in that has a known snapshot - # xid 962 should be visible in snapshot {963, 963, []} (since 962 < 963) - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "ab234061-eb07-4ef7-97c5-301ad2056280", - {["$sublink", "0"], MapSet.new([1])} - ) - |> MoveIns.set_snapshot("ab234061-eb07-4ef7-97c5-301ad2056280", {963, 963, []}) - - state = %{state | move_handling_state: move_handling_state} - - # Create a change that references the moved-in value (parent_id = 1) - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "1", "parent_id" => "1", "value" => "11"}, - record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"1\"", - changed_columns: MapSet.new(["value"]) - } - - # xid 962 is visible in snapshot {963, 963, []} - ctx = %{xid: 962, extra_refs: %{}} - - result = ChangeHandling.process_changes([change], state, ctx) - - {filtered_changes, _state, count, _offset} = result - - assert filtered_changes == [], - "Change should be skipped when value is in unresolved move-in and xid is visible" - - assert count == 0 - end - - test "keeps change but converts it to an insert if it covers the snapshot, and adds it to touched keys", - %{state: state} do - # Set up move-in state with a waiting move-in that has a known snapshot - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "ab234061-eb07-4ef7-97c5-301ad2056280", - {["$sublink", "0"], MapSet.new([1])} - ) - |> MoveIns.set_snapshot("ab234061-eb07-4ef7-97c5-301ad2056280", {963, 963, []}) - - state = %{state | move_handling_state: move_handling_state} - - # Create a change that references the moved-in value (parent_id = 1) - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "1", "parent_id" => "1", "value" => "11"}, - record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"1\"", - changed_columns: MapSet.new(["value"]) - } - - # xid 970 covers the snapshot - ctx = %{ - xid: 970, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([])}, %{["$sublink", "0"] => MapSet.new([1])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - - assert {[change], state, 1, _offset} = result - - assert %NewRecord{record: %{"id" => "1", "parent_id" => "1", "value" => "13"}, key: key} = - change - - assert state.move_handling_state.touch_tracker == %{key => 970} - end - end - - describe "process_changes/3 with subquery combined with other conditions" do - # Tests for shapes that have a subquery ANDed with other non-subquery conditions. - # The bug occurred when a change's sublink value was in a pending move-in, but - # the record didn't match other parts of the WHERE clause. The old code would - # incorrectly skip the change, assuming the move-in would cover it. - # - # Example: "parent_id IN (SELECT id FROM parents WHERE active) AND status = 'published'" - # If parent becomes active (triggers move-in), but record has status='draft', - # the change should NOT be skipped because the move-in won't return this row. - - @parents_inspector Support.StubInspector.new( - tables: ["parents", "children"], - columns: [ - %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, - %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, - %{name: "status", type: "text", pk_position: nil, type_id: {28, 1}}, - %{name: "active", type: "bool", pk_position: nil, type_id: {16, 1}} - ] - ) - - setup [:with_stack_id_from_test] - - setup %{stack_id: stack_id} do - # Create a shape with a subquery AND a simple equality condition: - # parent must be active AND child must be published - shape = - Shape.new!( - "children", - where: - "parent_id IN (SELECT id FROM parents WHERE active = true) AND status = 'published'", - inspector: @parents_inspector - ) - - state = State.new(stack_id, "test-handle", shape) - %{state: state, shape: shape} - end - - test "processes change when sublink is in move-in but record fails other WHERE conditions", %{ - state: state - } do - # This tests the fix: parent_id=3 enters a move-in (parent became active), - # but the child has status='draft', so the change should NOT be skipped. - # The move-in query uses the full WHERE clause, so it won't return this row. - - # Set up move-in state: parent_id=3 just became active (triggers move-in) - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-3", - {["$sublink", "0"], MapSet.new([3])} - ) - - state = %{state | move_handling_state: move_handling_state} - - # A record moving FROM parent_id=1 (in shape) TO parent_id=3 (active but status=draft) - # Old record: parent_id=1 active, status=published -> in shape - # New record: parent_id=3 active, status=draft -> NOT in shape (fails status check) - # This should result in a DELETE, not be skipped - change = %UpdatedRecord{ - relation: {"public", "children"}, - old_record: %{"id" => "100", "parent_id" => "1", "status" => "published"}, - record: %{"id" => "100", "parent_id" => "3", "status" => "draft"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"children\"/\"100\"", - changed_columns: MapSet.new(["parent_id", "status"]) - } - - # extra_refs: old has parent 1 active, new has parent 3 active - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new([1, 3])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - {filtered_changes, _state, count, _offset} = result - - # The change should NOT be skipped - it should be processed as a delete - # because the new record doesn't match status = 'published' - assert count == 1 - assert length(filtered_changes) == 1 - - [processed_change] = filtered_changes - # Should be converted to a delete since old was in shape, new is not - assert %Electric.Replication.Changes.DeletedRecord{} = processed_change - assert processed_change.old_record["id"] == "100" - end - - test "skips change when value is in move-in AND matches full WHERE clause", %{state: state} do - # When parent_id=2 enters a move-in AND the record has status='published', - # the change should be skipped (covered by move-in query) - - # Set up move-in state: parent_id=2 just became active (triggers move-in) - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-2", - {["$sublink", "0"], MapSet.new([2])} - ) - - state = %{state | move_handling_state: move_handling_state} - - # A record with parent_id=2 and status=published being updated - # Both subquery (parent active) and status condition are satisfied - # This change should be skipped because the move-in will handle it - change = %UpdatedRecord{ - relation: {"public", "children"}, - old_record: %{"id" => "100", "parent_id" => "2", "status" => "published"}, - record: %{"id" => "100", "parent_id" => "2", "status" => "published"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"children\"/\"100\"", - changed_columns: MapSet.new([]) - } - - # extra_refs: parent 2 is now active (in new refs) - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new([1, 2])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - {filtered_changes, _state, count, _offset} = result - - # The change should be skipped because: - # 1. parent_id=2 is in the pending move-in - # 2. status='published' satisfies the other WHERE condition - # 3. The move-in query will return this row - assert filtered_changes == [] - assert count == 0 - end - - test "processes delete when record fails non-subquery condition even with active move-in", %{ - state: state - } do - # When a record changes from status='published' to status='draft', - # even if the parent is in a pending move-in, we should delete - # because the status condition fails. - - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-1", - {["$sublink", "0"], MapSet.new([1])} - ) - |> MoveIns.set_snapshot("move-in-for-parent-1", {963, 963, []}) - - state = %{state | move_handling_state: move_handling_state} - - # Record changes status from published (in shape) to draft (not in shape) - change = %UpdatedRecord{ - relation: {"public", "children"}, - old_record: %{"id" => "200", "parent_id" => "1", "status" => "published"}, - record: %{"id" => "200", "parent_id" => "1", "status" => "draft"}, - log_offset: LogOffset.new(12346, 0), - key: "\"public\".\"children\"/\"200\"", - changed_columns: MapSet.new(["status"]) - } - - # xid 962 is visible in snapshot {963, 963, []} - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new([1])}} - } - - result = ChangeHandling.process_changes([change], state, ctx) - {filtered_changes, _state, count, _offset} = result - - # Should produce a delete, not be skipped - assert count == 1 - assert [%Electric.Replication.Changes.DeletedRecord{} = delete] = filtered_changes - assert delete.old_record["id"] == "200" - end - end - - describe "process_changes/3 with sublink value changes during move-in" do - setup [:with_stack_id_from_test] - - setup %{stack_id: stack_id} do - shape = - Shape.new!("users", where: "parent_id IN (SELECT id FROM users)", inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - %{state: state, shape: shape} - end - - test "processes UpdatedRecord with changed sublink value even when new value is in a pending move-in", - %{state: state} do - # parent_id changes from 2 to 3, and there's a pending move-in for parent_id=3. - # The change must still be processed so removed_move_tags are emitted for the - # old sublink value — the move-in query only returns INSERTs with the new tag. - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-3", - {["$sublink", "0"], MapSet.new([3])} - ) - - state = %{state | move_handling_state: move_handling_state} - - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "10", "parent_id" => "2", "value" => "hello"}, - record: %{"id" => "10", "parent_id" => "3", "value" => "hello"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"10\"", - changed_columns: MapSet.new(["parent_id"]) - } - - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([2])}, %{["$sublink", "0"] => MapSet.new([2, 3])}} - } - - {filtered_changes, _state, count, _offset} = - ChangeHandling.process_changes([change], state, ctx) - - assert count == 1 - assert [%UpdatedRecord{} = processed] = filtered_changes - assert processed.record["parent_id"] == "3" - assert processed.old_record["parent_id"] == "2" - assert processed.move_tags != [] - assert processed.removed_move_tags != [] - end - - test "skips UpdatedRecord when sublink value is unchanged and in a pending move-in", - %{state: state} do - # Only a non-sublink field changes — the move-in will return the row with - # identical tags, so the WAL change can safely be skipped. - move_handling_state = - MoveIns.new() - |> MoveIns.add_waiting( - "move-in-for-parent-2", - {["$sublink", "0"], MapSet.new([2])} - ) - - state = %{state | move_handling_state: move_handling_state} - - change = %UpdatedRecord{ - relation: {"public", "users"}, - old_record: %{"id" => "10", "parent_id" => "2", "value" => "old"}, - record: %{"id" => "10", "parent_id" => "2", "value" => "new"}, - log_offset: LogOffset.new(12345, 0), - key: "\"public\".\"users\"/\"10\"", - changed_columns: MapSet.new(["value"]) - } - - ctx = %{ - xid: 962, - extra_refs: - {%{["$sublink", "0"] => MapSet.new([])}, %{["$sublink", "0"] => MapSet.new([2])}} - } - - {filtered_changes, _state, count, _offset} = - ChangeHandling.process_changes([change], state, ctx) - - assert filtered_changes == [] - assert count == 0 - end - end -end diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler/default_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler/default_test.exs new file mode 100644 index 0000000000..e7a0d40af3 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler/default_test.exs @@ -0,0 +1,106 @@ +defmodule Electric.Shapes.Consumer.EventHandler.DefaultTest do + use ExUnit.Case, async: true + + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.EventHandler + alias Electric.Shapes.Shape + + @inspector Support.StubInspector.new( + tables: ["child"], + columns: [ + %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, + %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, + %{name: "name", type: "text", pk_position: nil, type_id: {28, 1}} + ] + ) + + test "returns notify flushed effect for empty transaction" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + txn = %Transaction{ + xid: 1, + changes: [], + num_changes: 0, + lsn: lsn(1), + last_log_offset: Electric.Replication.LogOffset.new(lsn(1), 0) + } + + assert {:ok, %EventHandler.Default{}, [%Effects.NotifyFlushed{log_offset: offset}]} = + EventHandler.handle_event(handler, txn) + + assert offset != nil + end + + test "ignores global_last_seen_lsn" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + assert {:ok, %EventHandler.Default{}, []} = + EventHandler.handle_event(handler, {:global_last_seen_lsn, 42}) + end + + test "returns truncate error on TruncatedRelation" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + assert {:error, {:truncate, 1}} = + EventHandler.handle_event(handler, txn(1, [child_truncate()])) + end + + test "marks the final emitted change as last" do + handler = %EventHandler.Default{ + shape: simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + } + + assert {:ok, %EventHandler.Default{}, + [ + %Effects.AppendChanges{ + xid: 1, + changes: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}] + }, + %Effects.NotifyFlushed{} + ]} = EventHandler.handle_event(handler, txn(1, [child_insert("1")])) + end + + defp simple_shape do + Shape.new!("child", inspector: @inspector) + end + + defp txn(xid, changes) do + %Transaction{ + xid: xid, + changes: changes, + num_changes: length(changes), + lsn: lsn(xid), + last_log_offset: Electric.Replication.LogOffset.new(lsn(xid), max(length(changes) - 1, 0)) + } + end + + defp lsn(value), do: Lsn.from_integer(value) + + defp child_truncate do + %Changes.TruncatedRelation{relation: {"public", "child"}} + end + + defp child_insert(id) do + %Changes.NewRecord{ + relation: {"public", "child"}, + record: %{"id" => id, "parent_id" => "1", "name" => "child-#{id}"} + } + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer/event_handler/subqueries_test.exs b/packages/sync-service/test/electric/shapes/consumer/event_handler/subqueries_test.exs new file mode 100644 index 0000000000..f9e0863615 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/consumer/event_handler/subqueries_test.exs @@ -0,0 +1,957 @@ +defmodule Electric.Shapes.Consumer.EventHandler.SubqueriesTest do + use ExUnit.Case, async: true + + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.EventHandler + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Buffering + alias Electric.Shapes.Consumer.EventHandler.Subqueries.Steady + alias Electric.Shapes.Consumer.Subqueries.ActiveMove + alias Electric.Shapes.Consumer.Subqueries.RefResolver + alias Electric.Shapes.Consumer.Subqueries.ShapeInfo + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Shape + + @inspector Support.StubInspector.new( + tables: ["parent", "child"], + columns: [ + %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, + %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}}, + %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, + %{name: "name", type: "text", pk_position: nil, type_id: {28, 1}} + ] + ) + + describe "Subquery handler" do + test "converts transactions against the current subquery view" do + handler = new_handler(subquery_view: MapSet.new([1])) + + assert {:ok, %Steady{}, plan} = + EventHandler.handle_event( + handler, + txn(50, [child_insert("1", "1"), child_insert("2", "2")]) + ) + + assert [ + %Effects.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}] + }, + %Effects.NotifyFlushed{log_offset: _} + ] = plan + end + + test "still converts root transactions when dependency moves are configured to invalidate" do + handler = + new_handler( + subquery_view: MapSet.new([1]), + dependency_move_policy: :invalidate_on_dependency_move + ) + + assert {:ok, %Steady{}, plan} = + EventHandler.handle_event( + handler, + txn(50, [child_insert("1", "1"), child_insert("2", "2")]) + ) + + assert [ + %Effects.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}] + }, + %Effects.NotifyFlushed{log_offset: _} + ] = plan + end + + test "returns unsupported_subquery when dependency moves are configured to invalidate" do + handler = new_handler(dependency_move_policy: :invalidate_on_dependency_move) + dep_handle = dep_handle(handler) + + assert {:error, :unsupported_subquery} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + end + + test "negated subquery turns dependency move-in into an outer move-out" do + handler = new_handler(shape: negated_shape()) + dep_handle = dep_handle(handler) + + assert {:ok, %Steady{views: %{["$sublink", "0"] => view}} = _handler, plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert view == MapSet.new([1]) + + # Case D: negated move-in completes immediately — effects_for_complete + # adds the value to the index (deferred to completion for NOT IN broadening) + assert [ + %Effects.AppendControl{ + message: %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + }, + %Effects.AddToSubqueryIndex{dep_index: 0, values: [{1, "1"}]} + ] = plan + end + + test "negated subquery turns dependency move-out into a buffered outer move-in" do + handler = new_handler(shape: negated_shape(), subquery_view: MapSet.new([1])) + dep_handle = dep_handle(handler) + + # Case B: negated move-out → remove the value when buffering starts so the + # negated index reflects the post-move exclusion set while buffering. + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + assert [ + %Effects.SubscribeGlobalLsn{}, + %Effects.RemoveFromSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, + %Effects.StartMoveInQuery{} + ] = plan + + assert %Buffering{ + active_move: %ActiveMove{ + views_before_move: %{["$sublink", "0"] => before_view}, + views_after_move: %{["$sublink", "0"] => after_view} + } + } = handler + + assert before_view == MapSet.new([1]) + assert after_view == MapSet.new() + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + # Case B: negated move-out → no further index effect at complete because + # the buffering-start removal already matches the post-splice dependency view. + assert {:ok, %Steady{views: %{["$sublink", "0"] => view}}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert view == MapSet.new() + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "splices buffered transactions around the snapshot visibility boundary" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, + [ + %Effects.SubscribeGlobalLsn{}, + %Effects.AddToSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, + %Effects.StartMoveInQuery{} + ]} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_insert("10", "1")])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(150, [child_insert("11", "1")])) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert view_for(views) == MapSet.new([1]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}] + }, + %Effects.NotifyFlushed{log_offset: _}, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "splices move-in query rows between emitted pre and post boundary changes" do + handler = new_handler(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_insert("10", "1")])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(150, [child_insert("11", "2")])) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert view_for(views) == MapSet.new([1, 2]) + + assert [ + %Effects.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "10"}}] + }, + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}] + }, + %Effects.NotifyFlushed{log_offset: _}, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "splices updates that become a delete before the boundary and an insert after it" do + handler = new_handler(subquery_view: MapSet.new([1])) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_update("10", "1", "2")])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(150, [child_update("11", "3", "2")])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert view_for(views) == MapSet.new([1, 2]) + + assert [ + %Effects.AppendChanges{ + changes: [%Changes.DeletedRecord{old_record: %{"id" => "10"}}] + }, + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.AppendChanges{ + changes: [%Changes.NewRecord{record: %{"id" => "11"}, last?: true}] + }, + %Effects.NotifyFlushed{log_offset: _}, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "uses lsn updates to splice at the current buffer tail" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(120, [child_insert("10", "1")])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(20)) + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert view_for(views) == MapSet.new([1]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.NotifyFlushed{log_offset: _}, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "waits for an lsn update even when the move-in query completes with an empty buffer" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(20)) + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert view_for(views) == MapSet.new([1]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "keeps an empty stored move-in snapshot as an effect so execution can clean it up" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(20), row_count: 0, row_bytes: 0) + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert view_for(views) == MapSet.new([1]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 0, + row_bytes: 0 + }, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "uses an lsn update that arrived before the move-in query completed" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(20)) + ) + + assert view_for(views) == MapSet.new([1]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "keeps the newest seen lsn when an older update arrives later" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 300, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(20)) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(20)) + ) + + assert view_for(views) == MapSet.new([1]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "defers queued move outs until after splice and starts the next move in" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert %Buffering{ + active_move: %ActiveMove{ + values: [{2, "2"}], + views_before_move: views_before, + views_after_move: views_after + } + } = handler + + assert view_for(views_before) == MapSet.new() + assert view_for(views_after) == MapSet.new([2]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.AppendControl{ + message: %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + }, + %Effects.RemoveFromSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, + %Effects.AddToSubqueryIndex{dep_index: 0, values: [{2, "2"}]}, + %Effects.StartMoveInQuery{} + ] = plan + end + + test "queued second move-in emits buffering effects only after it is dequeued" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, + [ + %Effects.SubscribeGlobalLsn{}, + %Effects.AddToSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, + %Effects.StartMoveInQuery{} + ]} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}, queue: queue} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + assert queue.move_in == %{0 => [{2, "2"}]} + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{values: [{2, "2"}]}} = _handler, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.AddToSubqueryIndex{dep_index: 0, values: [{2, "2"}]}, + %Effects.StartMoveInQuery{} + ] = plan + end + + test "chained move-in resolves without needing a new lsn broadcast" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + # First splice completes, second move-in starts + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + # Second move-in resolves with no further lsn broadcasts + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {200, 300, []}}) + + assert {:ok, %Steady{views: views}, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert view_for(views) == MapSet.new([2]) + end + + test "applies a queued move out for the active move-in value after splice" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert view_for(views) == MapSet.new() + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.AppendControl{ + message: %{headers: %{event: "move-out", patterns: [%{pos: 0}]}} + }, + %Effects.RemoveFromSubqueryIndex{dep_index: 0, values: [{1, "1"}]}, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "batches consecutive move ins into a single active move in" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, + %{move_in: [{1, "1"}, {2, "2"}], move_out: []}} + ) + + assert %Buffering{ + active_move: %ActiveMove{ + values: [{1, "1"}, {2, "2"}], + views_before_move: views_before, + views_after_move: views_after + } + } = handler + + assert view_for(views_before) == MapSet.new() + assert view_for(views_after) == MapSet.new([1, 2]) + end + + test "cancels pending inverse ops while buffering" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{2, "2"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert view_for(views) == MapSet.new([1]) + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + end + + test "merges queued move outs into a single control message after splice" do + handler = new_handler(subquery_view: MapSet.new([2])) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{1, "1"}]}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, []} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [], move_out: [{2, "2"}]}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert {:ok, %Steady{views: views}, plan} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + + assert view_for(views) == MapSet.new() + + assert [ + %Effects.AppendControl{message: %{headers: %{event: "move-in"}}}, + %Effects.AppendMoveInSnapshot{ + snapshot_name: "move-in-snapshot", + row_count: 1, + row_bytes: 100 + }, + %Effects.AppendControl{ + message: %{headers: %{event: "move-out", patterns: patterns}} + }, + %Effects.RemoveFromSubqueryIndex{values: values}, + %Effects.UnsubscribeGlobalLsn{} + ] = plan + + assert length(patterns) == 2 + assert length(values) == 2 + end + + test "returns {:error, :buffer_overflow} when buffered transactions exceed the limit" do + handler = new_handler(buffer_max_transactions: 3) + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_insert("1", "1")])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(51, [child_insert("2", "1")])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(52, [child_insert("3", "1")])) + + assert {:error, :buffer_overflow} = + EventHandler.handle_event(handler, txn(53, [child_insert("4", "1")])) + end + + test "returns truncate error on TruncatedRelation while steady" do + handler = new_handler(subquery_view: MapSet.new([1])) + + assert {:error, {:truncate, 1}} = + EventHandler.handle_event(handler, txn(1, [child_truncate()])) + end + + test "returns truncate error while buffering once splice completes" do + handler = new_handler() + dep_handle = dep_handle(handler) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + {:materializer_changes, dep_handle, %{move_in: [{1, "1"}], move_out: []}} + ) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, txn(50, [child_truncate()])) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 150, []}}) + + assert {:ok, %Buffering{active_move: %ActiveMove{}} = handler, _plan} = + EventHandler.handle_event( + handler, + move_in_complete(lsn(10)) + ) + + assert {:error, {:truncate, 50}} = + EventHandler.handle_event(handler, global_last_seen_lsn(10)) + end + + test "raises on dependency handle mismatch" do + assert_raise ArgumentError, ~r/unexpected dependency handle/, fn -> + new_handler() + |> EventHandler.handle_event( + {:materializer_changes, "wrong", %{move_in: [], move_out: []}} + ) + end + end + + test "raises on query callbacks while steady" do + handler = new_handler() + + assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> + EventHandler.handle_event(handler, {:pg_snapshot_known, {100, 200, []}}) + end + + assert_raise ArgumentError, ~r/no move-in is buffering/, fn -> + EventHandler.handle_event(handler, move_in_complete(lsn(1), row_count: 0, row_bytes: 0)) + end + end + end + + # -- Helpers -- + + defp new_handler(opts \\ []) do + shape = Keyword.get(opts, :shape, shape()) + {:ok, dnf_plan} = DnfPlan.compile(shape) + dep_handle = hd(shape.shape_dependencies_handles) + + %Steady{ + shape_info: %ShapeInfo{ + shape: shape, + stack_id: "stack-id", + shape_handle: "shape-handle", + dnf_plan: dnf_plan, + ref_resolver: + RefResolver.new(%{dep_handle => {0, ["$sublink", "0"]}}, %{0 => ["$sublink", "0"]}), + buffer_max_transactions: Keyword.get(opts, :buffer_max_transactions, 1000), + dependency_move_policy: + Keyword.get(opts, :dependency_move_policy, :stream_dependency_moves) + }, + views: %{["$sublink", "0"] => Keyword.get(opts, :subquery_view, MapSet.new())} + } + end + + defp dep_handle(handler) do + handler.shape_info.ref_resolver.handle_to_ref |> Map.keys() |> hd() + end + + defp view_for(views, ref \\ ["$sublink", "0"]) when is_map(views) do + views[ref] + end + + defp shape do + Shape.new!("child", + where: "parent_id IN (SELECT id FROM public.parent WHERE value = 'keep')", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) + |> fill_handles() + end + + defp negated_shape do + Shape.new!("child", + where: "parent_id NOT IN (SELECT id FROM public.parent WHERE value = 'keep')", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) + |> fill_handles() + end + + defp fill_handles(shape) do + filled_deps = Enum.map(shape.shape_dependencies, &fill_handles/1) + handles = Enum.map(filled_deps, &Shape.generate_id/1) + %{shape | shape_dependencies: filled_deps, shape_dependencies_handles: handles} + end + + defp txn(xid, changes) do + %Transaction{ + xid: xid, + changes: changes, + num_changes: length(changes), + lsn: lsn(xid), + last_log_offset: Electric.Replication.LogOffset.new(lsn(xid), max(length(changes) - 1, 0)) + } + end + + defp lsn(value), do: Lsn.from_integer(value) + defp global_last_seen_lsn(value), do: {:global_last_seen_lsn, value} + + defp move_in_complete(lsn, opts \\ []) do + {:query_move_in_complete, Keyword.get(opts, :snapshot_name, "move-in-snapshot"), + Keyword.get(opts, :row_count, 1), Keyword.get(opts, :row_bytes, 100), lsn} + end + + defp child_insert(id, parent_id) do + %Changes.NewRecord{ + relation: {"public", "child"}, + record: %{"id" => id, "parent_id" => parent_id, "name" => "child-#{id}"} + } + |> Changes.fill_key(["id"]) + end + + defp child_truncate do + %Changes.TruncatedRelation{relation: {"public", "child"}} + end + + defp child_update(id, old_parent_id, new_parent_id) do + Changes.UpdatedRecord.new( + relation: {"public", "child"}, + old_record: %{"id" => id, "parent_id" => old_parent_id, "name" => "child-#{id}-old"}, + record: %{"id" => id, "parent_id" => new_parent_id, "name" => "child-#{id}-new"} + ) + |> Changes.fill_key(["id"]) + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs index ad2d2fc50a..f96182266a 100644 --- a/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/materializer_test.exs @@ -545,20 +545,6 @@ defmodule Electric.Shapes.Consumer.MaterializerTest do assert_receive {:materializer_changes, _, %{move_out: [{2, "2"}], move_in: [{3, "3"}]}} end - - @tag snapshot_data: [%Changes.NewRecord{record: %{"id" => "1", "value" => "10"}}] - test "get_link_values reads from ETS cache and does not require the GenServer to be alive", - ctx do - ctx = with_materializer(ctx) - - assert Materializer.get_link_values(ctx) == MapSet.new([10]) - - # Stop the materializer GenServer — a pure GenServer.call path would now raise - GenServer.stop(Materializer.whereis(ctx)) - - # ETS-cached values should be returned without touching the (now-dead) GenServer - assert Materializer.get_link_values(ctx) == MapSet.new([10]) - end end describe "same-batch move event cancellation" do @@ -1032,6 +1018,221 @@ defmodule Electric.Shapes.Consumer.MaterializerTest do end end + describe "DNF: multiple tags per row with active_conditions" do + test "insert with active_conditions where row is not initially included", ctx do + ctx = with_materializer(ctx) + + # Row has two disjunct tags but active_conditions says position 0 is false + # Tag "hash_a/" participates in position 0, tag "/hash_b" participates in position 1 + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [false, false] + } + ]) + + # Row is not included because no disjunct has all positions active + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + end + + test "insert with active_conditions where one disjunct is satisfied", ctx do + ctx = with_materializer(ctx) + + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [true, false] + } + ]) + + # First disjunct "hash_a/" has position 0 active → included + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + end + + test "move-in broadcast activates a previously excluded row", ctx do + ctx = with_materializer(ctx) + + # Insert with position 0 inactive + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [false, false] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + + # Move-in at position 0 with value "hash_a" + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-in", patterns: [%{pos: 0, value: "hash_a"}]}} + ]) + + # Now position 0 is true, first disjunct "hash_a/" is satisfied + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + end + + test "move-out does not remove row when another disjunct still holds", ctx do + ctx = with_materializer(ctx) + + # Insert with both positions active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [true, true] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + + # Move-out at position 0 - but position 1 still holds via second disjunct + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-out", patterns: [%{pos: 0, value: "hash_a"}]}} + ]) + + # Row should still be included because disjunct "/hash_b" at position 1 is still true + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + refute_received {:materializer_changes, _, _} + end + + test "move-out removes row when last active disjunct becomes false", ctx do + ctx = with_materializer(ctx) + + # Insert with only position 1 active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [false, true] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + + # Move-out at position 1 - now no disjunct holds + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-out", patterns: [%{pos: 1, value: "hash_b"}]}} + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new() + assert_receive {:materializer_changes, _, %{move_out: [{10, "10"}]}} + end + + test "move-in on already-present row is a no-op for value counts", ctx do + ctx = with_materializer(ctx) + + # Insert with position 0 active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/", "/hash_b"], + active_conditions: [true, false] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + + # Move-in at position 1 - row was already included via position 0 + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-in", patterns: [%{pos: 1, value: "hash_b"}]}} + ]) + + # No value count change + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + refute_received {:materializer_changes, _, _} + end + + test "multi-position disjunct requires all positions active", ctx do + ctx = with_materializer(ctx) + + # Tag "hash_a/1" means positions 0 AND 1 must be active for this disjunct + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/1"], + active_conditions: [true, false] + } + ]) + + # Position 1 is false, so the disjunct is not satisfied + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + end + + test "multi-position disjunct becomes satisfied when all positions active", ctx do + ctx = with_materializer(ctx) + + # Tag "hash_a/1" needs both positions active + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_a/1"], + active_conditions: [false, true] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new() + refute_received {:materializer_changes, _, _} + + # Move-in at position 0 makes both positions active + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-in", patterns: [%{pos: 0, value: "hash_a"}]}} + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10]) + assert_receive {:materializer_changes, _, %{move_in: [{10, "10"}]}} + end + + test "composite-key tag indexing works for position lookup", ctx do + ctx = with_materializer(ctx) + + # Two rows with different position-0 hashes + Materializer.new_changes(ctx, [ + %Changes.NewRecord{ + key: "1", + record: %{"value" => "10"}, + move_tags: ["hash_x/"], + active_conditions: [true, false] + }, + %Changes.NewRecord{ + key: "2", + record: %{"value" => "20"}, + move_tags: ["hash_y/"], + active_conditions: [true, false] + } + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([10, 20]) + assert_receive {:materializer_changes, _, %{move_in: _}} + + # Move-out only for hash_x at position 0 + Materializer.new_changes(ctx, [ + %{headers: %{event: "move-out", patterns: [%{pos: 0, value: "hash_x"}]}} + ]) + + assert Materializer.get_link_values(ctx) == MapSet.new([20]) + assert_receive {:materializer_changes, _, %{move_out: [{10, "10"}]}} + end + end + defp respond_to_call(request, response) do receive do {:"$gen_call", {from, ref}, {^request, _arg}} -> diff --git a/packages/sync-service/test/electric/shapes/consumer/move_ins_test.exs b/packages/sync-service/test/electric/shapes/consumer/move_ins_test.exs deleted file mode 100644 index 424bc4455d..0000000000 --- a/packages/sync-service/test/electric/shapes/consumer/move_ins_test.exs +++ /dev/null @@ -1,629 +0,0 @@ -defmodule Electric.Shapes.Consumer.MoveInsTest do - use ExUnit.Case, async: true - - alias Electric.Shapes.Consumer.MoveIns - alias Electric.Replication.Changes.Transaction - alias Electric.Replication.Changes - - describe "new/0" do - test "creates empty state" do - state = MoveIns.new() - - assert state.waiting_move_ins == %{} - assert state.filtering_move_ins == [] - end - end - - describe "add_waiting/4" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "adds a single move-in with nil snapshot", %{state: state} do - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - - assert Map.has_key?(state.waiting_move_ins, "move1") - assert state.waiting_move_ins["move1"] == {nil, moved_values} - end - - @tag :move_in - test "adds multiple move-ins", %{state: state} do - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values1) - |> MoveIns.add_waiting("move2", moved_values2) - - assert map_size(state.waiting_move_ins) == 2 - assert state.waiting_move_ins["move1"] == {nil, moved_values1} - assert state.waiting_move_ins["move2"] == {nil, moved_values2} - end - end - - describe "set_snapshot/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "sets snapshot for waiting move-in", %{state: state} do - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - snapshot = {100, 200, [150]} - state = MoveIns.set_snapshot(state, "move1", snapshot) - - assert state.waiting_move_ins["move1"] == {snapshot, moved_values} - end - - @tag :move_in - test "raises on non-existent move-in", %{state: state} do - snapshot = {100, 200, [150]} - - assert_raise KeyError, fn -> - MoveIns.set_snapshot(state, "nonexistent", snapshot) - end - end - end - - describe "change_to_filtering/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "moves from waiting to filtering and returns visibility boundary", %{state: state} do - snapshot = {100, 200, []} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", snapshot) - - key_set = MapSet.new(["key1", "key2"]) - {visibility_boundary, state} = MoveIns.change_to_filtering(state, "move1", key_set) - - assert state.waiting_move_ins == %{} - assert [{^snapshot, ^key_set}] = state.filtering_move_ins - # Single move-in returns its snapshot as visibility boundary - assert visibility_boundary == snapshot - end - - @tag :move_in - test "keeps other waiting move-ins", %{state: state} do - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values1) - |> MoveIns.set_snapshot("move1", {100, 200, []}) - |> MoveIns.add_waiting("move2", moved_values2) - |> MoveIns.set_snapshot("move2", {150, 250, []}) - - {_boundary, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - - assert Map.has_key?(state.waiting_move_ins, "move2") - refute Map.has_key?(state.waiting_move_ins, "move1") - end - - @tag :move_in - test "raises on unknown move-in name", %{state: state} do - assert_raise KeyError, fn -> - MoveIns.change_to_filtering(state, "nonexistent", MapSet.new([])) - end - end - - @tag :move_in - test "returns snapshot when resolving minimum with no other waiting", %{state: state} do - snapshot = {100, 200, []} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", snapshot) - - {boundary, _state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary == snapshot - end - - @tag :move_in - test "returns snapshot when resolving minimum among concurrent move-ins", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move1 (minimum) - {boundary, _state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary == snapshot1 - end - - @tag :move_in - test "returns nil when resolving non-minimum", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move2 (non-minimum) - should return nil and store snapshot2 - {boundary, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary == nil - assert state.maximum_resolved_snapshot == snapshot2 - end - - @tag :move_in - test "returns stored maximum when last move-in resolves", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move2 (non-minimum) first - {boundary1, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary1 == nil - - # Resolve move1 (last one) - should return stored maximum (snapshot2) - {boundary2, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary2 == snapshot2 - assert state.maximum_resolved_snapshot == nil - end - end - - describe "remove_completed/2" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "removes move-ins where xid >= xmax", %{state: state} do - # Move-in with xmax=200 - moved_values = {[], MapSet.new()} - - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - {_boundary, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - - # Transaction with xid=200 (at xmax boundary - should complete) - txn = %Transaction{xid: 200, lsn: {0, 1}, changes: []} - state = MoveIns.remove_completed(state, txn) - - assert state.filtering_move_ins == [] - end - - @tag :move_in - test "keeps move-ins where xid < xmax", %{state: state} do - moved_values = {[], MapSet.new()} - - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - {_boundary, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - - txn = %Transaction{xid: 150, lsn: {0, 1}, changes: []} - state = MoveIns.remove_completed(state, txn) - - assert length(state.filtering_move_ins) == 1 - end - - @tag :move_in - test "removes only completed move-ins from multiple", %{state: state} do - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values1) - |> MoveIns.set_snapshot("move1", {100, 200, []}) - |> MoveIns.add_waiting("move2", moved_values2) - |> MoveIns.set_snapshot("move2", {100, 300, []}) - - {_boundary1, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new(["key1"])) - {_boundary2, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new(["key2"])) - - # xid=250 completes move1 (xmax=200) but not move2 (xmax=300) - txn = %Transaction{xid: 250, lsn: {0, 1}, changes: []} - state = MoveIns.remove_completed(state, txn) - - assert length(state.filtering_move_ins) == 1 - [{snapshot, key_set}] = state.filtering_move_ins - assert snapshot == {100, 300, []} - assert key_set == MapSet.new(["key2"]) - end - end - - describe "track_touch/3" do - @tag :move_in - test "tracks INSERT operations" do - state = MoveIns.new() - change = %Changes.NewRecord{key: "key1", record: %{}} - - state = MoveIns.track_touch(state, 100, change) - - assert state.touch_tracker == %{"key1" => 100} - end - - @tag :move_in - test "tracks UPDATE operations" do - state = MoveIns.new() - change = %Changes.UpdatedRecord{key: "key1", record: %{}, old_record: %{}} - - state = MoveIns.track_touch(state, 100, change) - - assert state.touch_tracker == %{"key1" => 100} - end - - @tag :move_in - test "does NOT track DELETE operations" do - state = MoveIns.new() - change = %Changes.DeletedRecord{key: "key1", old_record: %{}} - - state = MoveIns.track_touch(state, 100, change) - - assert state.touch_tracker == %{} - end - - @tag :move_in - test "updates existing key with newer xid" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 100}} - change = %Changes.NewRecord{key: "key1", record: %{}} - - state = MoveIns.track_touch(state, 150, change) - - assert state.touch_tracker == %{"key1" => 150} - end - end - - describe "gc_touch_tracker/1" do - @tag :move_in - test "clears all when no pending queries" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 100, "key2" => 150}} - - state = MoveIns.gc_touch_tracker(state) - - assert state.touch_tracker == %{} - end - - @tag :move_in - test "keeps all touches when no snapshots known yet" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 100, "key2" => 150}} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - - state = MoveIns.gc_touch_tracker(state) - - assert state.touch_tracker == %{"key1" => 100, "key2" => 150} - end - - @tag :move_in - test "removes touches < min_xmin" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 50, "key2" => 100, "key3" => 150}} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - - state = MoveIns.gc_touch_tracker(state) - - assert state.touch_tracker == %{"key2" => 100, "key3" => 150} - end - - @tag :move_in - test "keeps touches >= min_xmin across multiple snapshots" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 50, "key2" => 100, "key3" => 150}} - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values1) - state = MoveIns.set_snapshot(state, "move1", {100, 200, []}) - state = MoveIns.add_waiting(state, "move2", moved_values2) - state = MoveIns.set_snapshot(state, "move2", {120, 250, []}) - - state = MoveIns.gc_touch_tracker(state) - - # min_xmin = 100, so keeps keys with xid >= 100 - assert state.touch_tracker == %{"key2" => 100, "key3" => 150} - end - - @tag :move_in - test "handles mix of nil and real snapshots" do - state = MoveIns.new() - state = %{state | touch_tracker: %{"key1" => 50, "key2" => 100, "key3" => 150}} - moved_values1 = {[], MapSet.new()} - moved_values2 = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values1) - state = MoveIns.add_waiting(state, "move2", moved_values2) - state = MoveIns.set_snapshot(state, "move2", {120, 250, []}) - - state = MoveIns.gc_touch_tracker(state) - - # min_xmin = 120, so only keeps key3 - assert state.touch_tracker == %{"key3" => 150} - end - end - - describe "should_skip_query_row?/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "returns false when key not in tracker", %{state: state} do - snapshot = {100, 200, []} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - assert result == false - end - - @tag :move_in - test "returns false when touch is visible in snapshot", %{state: state} do - state = %{state | touch_tracker: %{"key1" => 50}} - snapshot = {100, 200, []} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - # xid=50 < xmin=100, so visible - assert result == false - end - - @tag :move_in - test "returns true when touch xid >= xmax", %{state: state} do - state = %{state | touch_tracker: %{"key1" => 250}} - snapshot = {100, 200, []} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - # xid=250 >= xmax=200, so not visible (happened after snapshot) - assert result == true - end - - @tag :move_in - test "returns true when touch xid in xip_list", %{state: state} do - state = %{state | touch_tracker: %{"key1" => 150}} - snapshot = {100, 200, [150]} - - result = MoveIns.should_skip_query_row?(state.touch_tracker, snapshot, "key1") - - # xid=150 is in xip_list, so not visible (not committed at snapshot time) - assert result == true - end - end - - describe "visibility boundary scenarios (integration)" do - setup do - state = MoveIns.new() - %{state: state} - end - - @tag :move_in - test "single move-in: returns its own snapshot", %{state: state} do - snapshot = {100, 200, [150]} - moved_values = {[], MapSet.new()} - state = MoveIns.add_waiting(state, "move1", moved_values) - state = MoveIns.set_snapshot(state, "move1", snapshot) - - {boundary, _state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary == snapshot - end - - @tag :move_in - test "two move-ins resolving in order (both minimum): both return their snapshots", %{ - state: state - } do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move1 (minimum) first - returns snapshot1 - {boundary1, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary1 == snapshot1 - - # Resolve move2 (last one) - returns snapshot2 - {boundary2, _state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary2 == snapshot2 - end - - @tag :move_in - test "two move-ins resolving out of order: stores max, returns it on last", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - - # Resolve move2 (non-minimum) first - returns nil, stores snapshot2 - {boundary1, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary1 == nil - assert state.maximum_resolved_snapshot == snapshot2 - - # Resolve move1 (last one) - returns stored maximum (snapshot2) - {boundary2, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary2 == snapshot2 - assert state.maximum_resolved_snapshot == nil - end - - @tag :move_in - test "three move-ins resolving: 2nd, 3rd, then 1st", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - snapshot3 = {120, 250, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - |> MoveIns.add_waiting("move3", moved_values) - |> MoveIns.set_snapshot("move3", snapshot3) - - # Resolve move2 (largest, not minimum) - stores snapshot2 - {boundary1, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary1 == nil - assert state.maximum_resolved_snapshot == snapshot2 - - # Resolve move3 (middle, not minimum) - keeps maximum as snapshot2 - {boundary2, state} = MoveIns.change_to_filtering(state, "move3", MapSet.new([])) - assert boundary2 == nil - assert state.maximum_resolved_snapshot == snapshot2 - - # Resolve move1 (last one) - returns stored maximum (snapshot2) - {boundary3, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary3 == snapshot2 - assert state.maximum_resolved_snapshot == nil - end - - @tag :move_in - test "equal snapshots: both treated as minimum, both return snapshot", %{state: state} do - snapshot = {100, 200, [150]} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot) - - # Resolve move1 - returns snapshot - {boundary1, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary1 == snapshot - - # Resolve move2 (last one) - also returns snapshot - {boundary2, _state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary2 == snapshot - end - - @tag :move_in - test "complex: 4 move-ins resolving in order 4→2→3→1", %{state: state} do - snapshot1 = {100, 200, []} - snapshot2 = {150, 300, []} - snapshot3 = {120, 250, []} - snapshot4 = {200, 400, []} - moved_values = {[], MapSet.new()} - - state = - state - |> MoveIns.add_waiting("move1", moved_values) - |> MoveIns.set_snapshot("move1", snapshot1) - |> MoveIns.add_waiting("move2", moved_values) - |> MoveIns.set_snapshot("move2", snapshot2) - |> MoveIns.add_waiting("move3", moved_values) - |> MoveIns.set_snapshot("move3", snapshot3) - |> MoveIns.add_waiting("move4", moved_values) - |> MoveIns.set_snapshot("move4", snapshot4) - - # Resolve move4 (largest, not minimum) - stores snapshot4 - {boundary1, state} = MoveIns.change_to_filtering(state, "move4", MapSet.new([])) - assert boundary1 == nil - assert state.maximum_resolved_snapshot == snapshot4 - - # Resolve move2 (second largest, not minimum) - keeps snapshot4 - {boundary2, state} = MoveIns.change_to_filtering(state, "move2", MapSet.new([])) - assert boundary2 == nil - assert state.maximum_resolved_snapshot == snapshot4 - - # Resolve move3 (second smallest, not minimum) - keeps snapshot4 - {boundary3, state} = MoveIns.change_to_filtering(state, "move3", MapSet.new([])) - assert boundary3 == nil - assert state.maximum_resolved_snapshot == snapshot4 - - # Resolve move1 (last one) - returns stored maximum (snapshot4) - {boundary4, state} = MoveIns.change_to_filtering(state, "move1", MapSet.new([])) - assert boundary4 == snapshot4 - assert state.maximum_resolved_snapshot == nil - end - end - - describe "change_visible_in_unresolved_move_ins_for_values?/3" do - setup do - state = MoveIns.new() - %{state: state} - end - - test "returns true when value is in unresolved move-in with nil snapshot", %{state: state} do - state = MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - - assert MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 1}, - 100 - ) - end - - test "returns true when value is in unresolved move-in with known snapshot and xid is visible", - %{state: state} do - state = - MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - |> MoveIns.set_snapshot("move1", {150, 200, []}) - - assert MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 1}, - 100 - ) - end - - test "returns false when value is in unresolved move-in with known snapshot and xid is not visible", - %{state: state} do - state = - MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - |> MoveIns.set_snapshot("move1", {150, 200, []}) - - refute MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 1}, - 300 - ) - end - - test "returns false when value is not in unresolved move-in", %{state: state} do - state = - MoveIns.add_waiting(state, "move1", {["$sublink", "0"], MapSet.new([1])}) - - refute MoveIns.change_visible_in_unresolved_move_ins_for_values?( - state, - %{["$sublink", "0"] => 2}, - 100 - ) - end - end -end diff --git a/packages/sync-service/test/electric/shapes/consumer/state_test.exs b/packages/sync-service/test/electric/shapes/consumer/state_test.exs index 288c8fdaa0..c00bfdc199 100644 --- a/packages/sync-service/test/electric/shapes/consumer/state_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer/state_test.exs @@ -9,37 +9,6 @@ defmodule Electric.Shapes.Consumer.StateTest do @moduletag :tmp_dir - @inspector Support.StubInspector.new( - tables: [ - {1, {"public", "items"}}, - {2, {"public", "parent"}}, - {2, {"public", "grandparent"}} - ], - columns: [ - %{ - name: "id", - type: "int8", - pk_position: 0, - type_id: {20, 1}, - is_generated: false - }, - %{ - name: "parent_id", - type: "int8", - pk_position: nil, - type_id: {20, 1}, - is_generated: false - }, - %{ - name: "flag", - type: "bool", - pk_position: nil, - type_id: {16, 1}, - is_generated: false - } - ] - ) - describe "new/3" do setup [:with_stack_id_from_test] @@ -168,142 +137,4 @@ defmodule Electric.Shapes.Consumer.StateTest do assert log =~ "Falling back to full-transaction buffering" end end - - describe "or_with_subquery? field in new/3" do - setup [:with_stack_id_from_test] - - for {where, expected} <- [ - # No WHERE clause - {nil, false}, - - # WHERE clause without subquery - {"id = 1", false}, - {"id = 1 AND flag = true", false}, - {"id = 1 OR flag = true", false}, - - # Subquery without OR - {"id IN (SELECT id FROM parent)", false}, - {"id = 1 AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND id = 1", false}, - {"parent_id IN (SELECT id FROM parent) AND flag = true AND id = 1", false}, - - # OR directly with subquery - {"parent_id IN (SELECT id FROM parent) OR flag = true", true}, - {"flag = true OR parent_id IN (SELECT id FROM parent)", true}, - {"(parent_id IN (SELECT id FROM parent)) OR (flag = true)", true}, - - # OR that is ANDed with subquery (OR not directly containing subquery) - {"(id = 1 OR flag = true) AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND (id = 1 OR flag = true)", false}, - - # Nested cases - OR with subquery in one branch - {"id = 1 OR parent_id IN (SELECT id FROM parent)", true}, - {"id = 1 OR (flag = true AND parent_id IN (SELECT id FROM parent))", true}, - {"(id = 1 AND parent_id IN (SELECT id FROM parent)) OR flag = true", true}, - - # Subquery has OR inside - {"id IN (SELECT id FROM parent WHERE flag = true OR id = 2)", false}, - - # Subquery has OR with nested subquery - {"id IN (SELECT id FROM parent WHERE id = 2 OR id IN (SELECT id FROM grandparent))", - false}, - - # NOT should not change result - {"NOT (parent_id IN (SELECT id FROM parent) OR flag = true)", true}, - {"parent_id NOT IN (SELECT id FROM parent) OR flag = true", true}, - {"parent_id NOT IN (SELECT id FROM parent)", false}, - {"NOT(parent_id IN (SELECT id FROM parent))", false} - ] do - @tag where: where, expected: expected - test "#{inspect(where)} -> or_with_subquery?=#{expected}", %{ - stack_id: stack_id, - where: where, - expected: expected - } do - shape = Shape.new!("items", where: where, inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - - assert state.or_with_subquery? == expected - end - end - end - - describe "not_with_subquery? field in new/3" do - setup [:with_stack_id_from_test] - - for {where, expected} <- [ - # No WHERE clause - {nil, false}, - - # WHERE clause without subquery (NOT doesn't matter without subquery) - {"id = 1", false}, - {"NOT (id = 1)", false}, - {"NOT (id = 1 AND flag = true)", false}, - {"id = 1 AND NOT flag = true", false}, - - # Subquery without NOT - {"id IN (SELECT id FROM parent)", false}, - {"id = 1 AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND id = 1", false}, - {"parent_id IN (SELECT id FROM parent) OR flag = true", false}, - - # x NOT IN (subquery) - the most common case - {"parent_id NOT IN (SELECT id FROM parent)", true}, - {"parent_id NOT IN (SELECT id FROM parent) AND id = 1", true}, - {"id = 1 AND parent_id NOT IN (SELECT id FROM parent)", true}, - - # NOT(x IN subquery) - equivalent to NOT IN - {"NOT(parent_id IN (SELECT id FROM parent))", true}, - {"NOT (parent_id IN (SELECT id FROM parent))", true}, - - # NOT(condition AND x IN subquery) - NOT wrapping expression with subquery - {"NOT(flag = true AND parent_id IN (SELECT id FROM parent))", true}, - {"NOT(parent_id IN (SELECT id FROM parent) AND flag = true)", true}, - - # NOT(condition OR x IN subquery) - NOT wrapping OR with subquery - {"NOT(flag = true OR parent_id IN (SELECT id FROM parent))", true}, - {"NOT(parent_id IN (SELECT id FROM parent) OR flag = true)", true}, - - # Nested NOT with subquery - {"NOT(id = 1 AND (flag = true OR parent_id IN (SELECT id FROM parent)))", true}, - {"NOT((parent_id IN (SELECT id FROM parent)) AND id = 1)", true}, - - # NOT inside subquery (shouldn't affect outer query) - {"id IN (SELECT id FROM parent WHERE NOT flag = true)", false}, - {"id IN (SELECT id FROM parent WHERE id NOT IN (SELECT id FROM grandparent))", false}, - - # NOT combined with AND/OR at outer level - {"parent_id NOT IN (SELECT id FROM parent) OR flag = true", true}, - {"parent_id NOT IN (SELECT id FROM parent) AND flag = true", true}, - {"flag = true OR parent_id NOT IN (SELECT id FROM parent)", true}, - {"flag = true AND parent_id NOT IN (SELECT id FROM parent)", true}, - - # Multiple subqueries with NOT - {"parent_id NOT IN (SELECT id FROM parent) AND id IN (SELECT id FROM grandparent)", - true}, - {"parent_id IN (SELECT id FROM parent) AND id NOT IN (SELECT id FROM grandparent)", - true}, - - # Double NOT (cancels out, but still has NOT wrapping subquery in AST) - {"NOT(NOT(parent_id IN (SELECT id FROM parent)))", true}, - - # NOT on non-subquery part, subquery without NOT - {"NOT(flag = true) AND parent_id IN (SELECT id FROM parent)", false}, - {"parent_id IN (SELECT id FROM parent) AND NOT(flag = true)", false} - ] do - @tag where: where, expected: expected - test "#{inspect(where)} -> not_with_subquery?=#{expected}", %{ - stack_id: stack_id, - where: where, - expected: expected - } do - shape = Shape.new!("items", where: where, inspector: @inspector) - - state = State.new(stack_id, "test-handle", shape) - - assert state.not_with_subquery? == expected - end - end - end end diff --git a/packages/sync-service/test/electric/shapes/consumer/subqueries/move_queue_test.exs b/packages/sync-service/test/electric/shapes/consumer/subqueries/move_queue_test.exs new file mode 100644 index 0000000000..36642118d9 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/consumer/subqueries/move_queue_test.exs @@ -0,0 +1,110 @@ +defmodule Electric.Shapes.Consumer.Subqueries.MoveQueueTest do + use ExUnit.Case, async: true + + alias Electric.Shapes.Consumer.Subqueries.MoveQueue + + @dep 0 + + test "drops redundant move outs for values absent from the base view" do + queue = MoveQueue.enqueue(MoveQueue.new(), @dep, %{move_out: [{1, "1"}]}, MapSet.new()) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "drops redundant move ins for values already present in the base view" do + queue = MoveQueue.enqueue(MoveQueue.new(), @dep, %{move_in: [{1, "1"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "cancels a pending move in with a later move out for the same value" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "1"}]}, MapSet.new()) + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}]}, MapSet.new()) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "cancels a pending move out with a later move in for the same value" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "1"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "merges repeated move ins and keeps the terminal tuple" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "01"}]}, MapSet.new()) + |> MoveQueue.enqueue(@dep, %{move_in: [{1, "1"}], move_out: []}, MapSet.new()) + + assert %MoveQueue{move_in: %{0 => [{1, "1"}]}, move_out: empty_out} = queue + assert empty_out == %{} + end + + test "merges repeated move outs and keeps the terminal tuple" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "01"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}], move_in: []}, MapSet.new([1])) + + assert %MoveQueue{move_out: %{0 => [{1, "1"}]}, move_in: empty_in} = queue + assert empty_in == %{} + end + + test "orders surviving move outs before move ins" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_out: [{1, "1"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: %{0 => [{1, "1"}]}, move_in: %{0 => [{2, "2"}]}} = queue + end + + test "uses the provided base view when reducing buffering follow-up moves" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_out: [{2, "2"}]}, MapSet.new([1])) + + assert %MoveQueue{move_out: empty_out, move_in: empty_in} = queue + assert empty_out == %{} + assert empty_in == %{} + end + + test "pop_next returns the whole move out batch before the move in batch" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_in: [{3, "3"}]}, MapSet.new([1])) + + assert {{:move_out, 0, [{1, "1"}]}, queue} = MoveQueue.pop_next(queue) + assert queue.move_out == %{} + assert queue.move_in == %{0 => [{2, "2"}, {3, "3"}]} + + assert {{:move_in, 0, [{2, "2"}, {3, "3"}]}, queue} = MoveQueue.pop_next(queue) + assert queue.move_out == %{} + assert queue.move_in == %{} + assert nil == MoveQueue.pop_next(queue) + end + + test "length counts queued values across both batches" do + queue = + MoveQueue.new() + |> MoveQueue.enqueue(@dep, %{move_in: [{2, "2"}], move_out: [{1, "1"}]}, MapSet.new([1])) + |> MoveQueue.enqueue(@dep, %{move_in: [{3, "3"}]}, MapSet.new([1])) + + assert 3 == MoveQueue.length(queue) + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer/transaction_converter_test.exs b/packages/sync-service/test/electric/shapes/consumer/transaction_converter_test.exs new file mode 100644 index 0000000000..b8e21b8689 --- /dev/null +++ b/packages/sync-service/test/electric/shapes/consumer/transaction_converter_test.exs @@ -0,0 +1,163 @@ +defmodule Electric.Shapes.Consumer.TransactionConverterTest do + use ExUnit.Case, async: true + + alias Electric.Postgres.Lsn + alias Electric.Replication.Changes + alias Electric.Replication.Changes.Transaction + alias Electric.Shapes.Consumer.Effects + alias Electric.Shapes.Consumer.TransactionConverter + alias Electric.Shapes.Shape + + @inspector Support.StubInspector.new( + tables: ["child"], + columns: [ + %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, + %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}}, + %{name: "name", type: "text", pk_position: nil, type_id: {28, 1}} + ] + ) + + test "marks only the final emitted change as last" do + txn = %Transaction{ + xid: 7, + lsn: lsn(7), + last_log_offset: Electric.Replication.LogOffset.new(lsn(7), 1), + changes: [ + child_insert("1"), + child_insert("2") + ] + } + + assert {:ok, + [ + %Effects.AppendChanges{ + xid: 7, + changes: [ + %Changes.NewRecord{record: %{"id" => "1"}, last?: false}, + %Changes.NewRecord{record: %{"id" => "2"}, last?: true} + ] + } + ]} = + TransactionConverter.transaction_to_effects( + txn, + simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + ) + end + + test "converts updates through Shape.convert_change and marks the final change" do + txn = %Transaction{ + xid: 8, + lsn: lsn(8), + last_log_offset: Electric.Replication.LogOffset.new(lsn(8), 1), + changes: [child_update("1", "2")] + } + + assert {:ok, + [ + %Effects.AppendChanges{ + xid: 8, + changes: [ + %Changes.NewRecord{record: %{"id" => "2"}, last?: true} + ] + } + ]} = + TransactionConverter.transaction_to_effects( + txn, + filtered_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + ) + end + + test "surfaces truncate before emitting changes" do + txn = %Transaction{ + xid: 9, + lsn: lsn(9), + last_log_offset: Electric.Replication.LogOffset.new(lsn(9), 1), + changes: [%Changes.TruncatedRelation{relation: {"public", "child"}}] + } + + assert {:error, {:truncate, 9}} = + TransactionConverter.transaction_to_effects(txn, simple_shape()) + end + + test "returns no append effects for an empty converted transaction" do + txn = %Transaction{ + xid: 10, + lsn: lsn(10), + last_log_offset: Electric.Replication.LogOffset.new(lsn(10), 1), + changes: [] + } + + assert {:ok, []} = TransactionConverter.transaction_to_effects(txn, simple_shape()) + end + + test "converts multiple transactions into ordered append effects" do + txns = [ + %Transaction{ + xid: 11, + lsn: lsn(11), + last_log_offset: Electric.Replication.LogOffset.new(lsn(11), 1), + changes: [child_insert("1")] + }, + %Transaction{ + xid: 12, + lsn: lsn(12), + last_log_offset: Electric.Replication.LogOffset.new(lsn(12), 1), + changes: [] + }, + %Transaction{ + xid: 13, + lsn: lsn(13), + last_log_offset: Electric.Replication.LogOffset.new(lsn(13), 1), + changes: [child_insert("2")] + } + ] + + assert {:ok, + [ + %Effects.AppendChanges{ + xid: 11, + changes: [%Changes.NewRecord{record: %{"id" => "1"}, last?: true}] + }, + %Effects.AppendChanges{ + xid: 13, + changes: [%Changes.NewRecord{record: %{"id" => "2"}, last?: true}] + } + ]} = + TransactionConverter.transactions_to_effects( + txns, + simple_shape(), + stack_id: "stack-id", + shape_handle: "shape-handle" + ) + end + + defp lsn(value), do: Lsn.from_integer(value) + + defp simple_shape do + Shape.new!("child", inspector: @inspector) + end + + defp filtered_shape do + Shape.new!("child", where: "id = 2", inspector: @inspector) + end + + defp child_update(old_id, new_id) do + %Changes.UpdatedRecord{ + relation: {"public", "child"}, + old_record: %{"id" => old_id, "parent_id" => "1", "name" => "child-#{old_id}"}, + record: %{"id" => new_id, "parent_id" => "1", "name" => "child-#{new_id}"}, + changed_columns: MapSet.new(["id"]) + } + end + + defp child_insert(id) do + %Changes.NewRecord{ + relation: {"public", "child"}, + record: %{"id" => id, "parent_id" => "1", "name" => "child-#{id}"} + } + end +end diff --git a/packages/sync-service/test/electric/shapes/consumer_test.exs b/packages/sync-service/test/electric/shapes/consumer_test.exs index 8e30800c5f..40b27576c5 100644 --- a/packages/sync-service/test/electric/shapes/consumer_test.exs +++ b/packages/sync-service/test/electric/shapes/consumer_test.exs @@ -2,6 +2,7 @@ defmodule Electric.Shapes.ConsumerTest do use ExUnit.Case, async: true use Repatch.ExUnit, assert_expectations: true + alias Electric.LsnTracker alias Electric.Postgres.Lsn alias Electric.Replication.Changes.Relation alias Electric.Replication.Changes @@ -1992,15 +1993,11 @@ defmodule Electric.Shapes.ConsumerTest do # Mock query_move_in_async to simulate a query without hitting the database Repatch.patch( - Electric.Shapes.PartialModes, + Electric.Shapes.Consumer.Effects, :query_move_in_async, [mode: :shared], - fn _task_sup, _shape_handle, _shape, _where_clause, opts -> - consumer_pid = opts[:consumer_pid] - name = opts[:move_in_name] - results_fn = opts[:results_fn] - - send(parent, {:query_requested, name, consumer_pid, results_fn}) + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) :ok end @@ -2030,10 +2027,10 @@ defmodule Electric.Shapes.ConsumerTest do ctx.stack_id ) - assert_receive {:query_requested, name, ^consumer_pid, results_fn} + assert_receive {:query_requested, ^consumer_pid} # Snapshot here is intentionally before the update to make sure the update is considered shadowing - send(consumer_pid, {:pg_snapshot_known, name, {90, 95, []}}) + send(consumer_pid, {:pg_snapshot_known, {90, 95, []}}) # Now send an UPDATE (xid = 100) before move-in query completes # This should be converted to INSERT @@ -2053,43 +2050,360 @@ defmodule Electric.Shapes.ConsumerTest do assert :ok = ShapeLogCollector.handle_event(txn, ctx.stack_id) - # Should get new_changes notification for the UPDATE-as-INSERT + shape_storage = Storage.for_shape(shape_handle, ctx.storage) + + send_stored_move_in_complete( + consumer_pid, + shape_storage, + [ + [ + ~s'"public"."test_table"/"1"', + [], + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"}, + "headers" => %{ + "operation" => "insert", + "relation" => ["public", "test_table"] + } + }) + ] + ], + Lsn.from_integer(100) + ) + assert_receive {^ref, :new_changes, _offset}, @receive_timeout - # Now write data for the move-in query - results_fn.( + # Check storage for operations + assert [ + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert"}, + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"} + }, + %{ + "headers" => %{ + "control" => "snapshot-end", + "xmin" => "90", + "xmax" => "95", + "xip_list" => [] + } + }, + %{ + "headers" => %{"operation" => "update", "txids" => [100]}, + "key" => ~s'"public"."test_table"/"1"' + } + ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) + end + + test "consumer splices a pending move-in on global_last_seen_lsn broadcast", ctx do + parent = self() + + Repatch.patch( + Electric.Shapes.Consumer.Effects, + :query_move_in_async, + [mode: :shared], + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) + :ok + end + ) + + Support.TestUtils.activate_mocks_for_descendant_procs(Consumer) + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + consumer_pid = Consumer.whereis(ctx.stack_id, shape_handle) + ref = Shapes.Consumer.register_for_changes(ctx.stack_id, shape_handle) + + ShapeLogCollector.handle_event( + complete_txn_fragment(100, Lsn.from_integer(50), [ + %Changes.NewRecord{ + relation: {"public", "other_table"}, + record: %{"id" => "1"}, + log_offset: LogOffset.new(Lsn.from_integer(50), 0) + } + ]), + ctx.stack_id + ) + + assert_receive {:query_requested, ^consumer_pid} + + send(consumer_pid, {:pg_snapshot_known, {100, 300, []}}) + + shape_storage = Storage.for_shape(shape_handle, ctx.storage) + + send_stored_move_in_complete( + consumer_pid, + shape_storage, [ [ - "\"public\".\"test_table\"/\"1\"", - ["tag_does_not_matter"], - Jason.encode!(%{"value" => %{"id" => "1", "value" => "old"}}) + ~s'"public"."test_table"/"1"', + [], + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"}, + "headers" => %{ + "operation" => "insert", + "relation" => ["public", "test_table"] + } + }) ] ], - {90, 95, []} + Lsn.from_integer(100) ) - send(consumer_pid, {:query_move_in_complete, name, ["test_key"], {90, 95, []}}) + refute_receive {^ref, :new_changes, _}, 100 + assert :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 100) assert_receive {^ref, :new_changes, _offset}, @receive_timeout - # Check storage for operations + assert [ + %{"headers" => %{"event" => "move-in"}}, + %{ + "headers" => %{"operation" => "insert"}, + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"} + }, + %{ + "headers" => %{ + "control" => "snapshot-end", + "xmin" => "100", + "xmax" => "300", + "xip_list" => [] + } + } + ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) + end + + test "consumer replays the latest broadcast when subscribing for a move-in", ctx do + parent = self() + + Repatch.patch( + Electric.Shapes.Consumer.Effects, + :query_move_in_async, + [mode: :shared], + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) + :ok + end + ) + + Support.TestUtils.activate_mocks_for_descendant_procs(Consumer) + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + {:ok, shape} = Electric.Shapes.fetch_shape_by_handle(ctx.stack_id, shape_handle) + [dep_handle] = shape.shape_dependencies_handles + + consumer_pid = Consumer.whereis(ctx.stack_id, shape_handle) + ref = Shapes.Consumer.register_for_changes(ctx.stack_id, shape_handle) + + assert :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 100) + + send( + consumer_pid, + {:materializer_changes, dep_handle, + %{ + move_in: [{1, "1"}], + move_out: [] + }} + ) + + assert_receive {:query_requested, ^consumer_pid} + + send(consumer_pid, {:pg_snapshot_known, {100, 300, []}}) + shape_storage = Storage.for_shape(shape_handle, ctx.storage) + send_stored_move_in_complete( + consumer_pid, + shape_storage, + [ + [ + ~s'"public"."test_table"/"1"', + [], + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"}, + "headers" => %{ + "operation" => "insert", + "relation" => ["public", "test_table"] + } + }) + ] + ], + Lsn.from_integer(100) + ) + + assert_receive {^ref, :new_changes, _offset}, @receive_timeout + assert [ + %{"headers" => %{"event" => "move-in"}}, %{ "headers" => %{"operation" => "insert"}, - "value" => %{"id" => "1", "value" => "updated"} + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "old"} }, %{ "headers" => %{ "control" => "snapshot-end", - "xmin" => "90", - "xmax" => "95", + "xmin" => "100", + "xmax" => "300", "xip_list" => [] } } ] = get_log_items_from_storage(LogOffset.last_before_real_offsets(), shape_storage) end + + test "consumer startup seeds the stack-scoped subquery index", ctx do + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + # The consumer should have seeded the SubqueryIndex during initialization + index = SubqueryIndex.for_stack(ctx.stack_id) + assert index != nil + + # The shape should be registered with positions (by Filter.add_shape) + assert SubqueryIndex.has_positions?(index, shape_handle) + + # The shape should be marked ready (no longer in fallback) once + # the consumer has seeded the index. After await_snapshot_start returns + # the consumer has completed initialization including subquery seeding. + {:ok, _shape} = Electric.Shapes.fetch_shape_by_handle(ctx.stack_id, shape_handle) + + # The consumer seeds the index via SubqueryIndex.for_stack, but the + # index is also modified by the Filter (which runs in the + # ShapeLogCollector process). Check that the shape has positions + # and that membership entries are correct (empty views for a fresh shape). + positions = SubqueryIndex.positions_for_shape(index, shape_handle) + assert length(positions) > 0 + + # Verify the index is accessible and has retained node registrations. + assert positions == SubqueryIndex.positions_for_shape(index, shape_handle) + end + + test "consumer steady dependency move_in adds value to the subquery index", ctx do + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + + parent = self() + + Repatch.patch( + Electric.Shapes.Consumer.Effects, + :query_move_in_async, + [mode: :shared], + fn _task_sup, _consumer_state, _buffering_state, consumer_pid -> + send(parent, {:query_requested, consumer_pid}) + :ok + end + ) + + Support.TestUtils.activate_mocks_for_descendant_procs(Consumer) + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + index = SubqueryIndex.for_stack(ctx.stack_id) + {:ok, _shape} = Electric.Shapes.fetch_shape_by_handle(ctx.stack_id, shape_handle) + + # Before any dependency changes, the index has empty membership + refute SubqueryIndex.member?(index, shape_handle, ["$sublink", "0"], 1) + + # Send a new record for the dependency table to trigger a move_in + ShapeLogCollector.handle_event( + complete_txn_fragment(100, Lsn.from_integer(50), [ + %Changes.NewRecord{ + relation: {"public", "other_table"}, + record: %{"id" => "1"}, + log_offset: LogOffset.new(Lsn.from_integer(50), 0) + } + ]), + ctx.stack_id + ) + + # Wait for the consumer to process the event and request a move_in query + assert_receive {:query_requested, consumer_pid} + + # During buffering, the value should have been added to the index + # (union for positive dependency: before ∪ after) + assert SubqueryIndex.member?(index, shape_handle, ["$sublink", "0"], 1) + + # Complete the move_in query to transition back to steady state + send(consumer_pid, {:pg_snapshot_known, {100, 300, []}}) + + shape_storage = Storage.for_shape(shape_handle, ctx.storage) + + send_stored_move_in_complete( + consumer_pid, + shape_storage, + [ + [ + ~s'"public"."test_table"/"1"', + [], + Jason.encode!(%{ + "key" => ~s'"public"."test_table"/"1"', + "value" => %{"id" => "1", "value" => "val"}, + "headers" => %{ + "operation" => "insert", + "relation" => ["public", "test_table"] + } + }) + ] + ], + Lsn.from_integer(100) + ) + + # Allow the consumer to process the completion + assert :ok = LsnTracker.broadcast_last_seen_lsn(ctx.stack_id, 100) + ref = Shapes.Consumer.register_for_changes(ctx.stack_id, shape_handle) + assert_receive {^ref, :new_changes, _offset}, @receive_timeout + + # After move_in completes, value should still be in the index (now steady state) + assert SubqueryIndex.member?(index, shape_handle, ["$sublink", "0"], 1) + end + + test "consumer cleanup removes shape rows from the subquery index", ctx do + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + + {shape_handle, _} = + ShapeCache.get_or_create_shape_handle(@shape_with_subquery, ctx.stack_id) + + :started = ShapeCache.await_snapshot_start(shape_handle, ctx.stack_id) + + index = SubqueryIndex.for_stack(ctx.stack_id) + assert SubqueryIndex.has_positions?(index, shape_handle) + + # Monitor the consumer so we know when cleanup finishes + consumer_name = Shapes.Consumer.name(ctx.stack_id, shape_handle) + consumer_pid = GenServer.whereis(consumer_name) + ref = Process.monitor(consumer_pid) + + expect_shape_status(remove_shape: fn _, ^shape_handle -> :ok end) + ShapeCache.clean_shape(shape_handle, ctx.stack_id) + + # Wait for consumer to shut down, flushing any other messages first + assert_receive {:DOWN, ^ref, :process, ^consumer_pid, _reason}, 5000 + + # The ShapeLogCollector removes the shape from the filter asynchronously. + # Wait briefly for it to process. + Process.sleep(100) + + # After cleanup, the shape's rows should be removed from the index + refute SubqueryIndex.has_positions?(index, shape_handle) + end end defp refute_storage_calls_for_txn_fragment(shape_handle) do @@ -2128,4 +2442,16 @@ defmodule Electric.Shapes.ConsumerTest do defp get_log_items_from_storage(offset, shape_storage) do Storage.get_log_stream(offset, shape_storage) |> Enum.map(&Jason.decode!/1) end + + defp send_stored_move_in_complete(consumer_pid, shape_storage, rows, lsn) do + snapshot_name = Electric.Utils.uuid4() + row_bytes = Enum.reduce(rows, 0, fn [_, _, json], acc -> acc + IO.iodata_length(json) end) + + Storage.write_move_in_snapshot!(rows, snapshot_name, shape_storage) + + send( + consumer_pid, + {:query_move_in_complete, snapshot_name, length(rows), row_bytes, lsn} + ) + end end diff --git a/packages/sync-service/test/electric/shapes/dnf_plan_test.exs b/packages/sync-service/test/electric/shapes/dnf_plan_test.exs new file mode 100644 index 0000000000..c424c1a1cf --- /dev/null +++ b/packages/sync-service/test/electric/shapes/dnf_plan_test.exs @@ -0,0 +1,373 @@ +defmodule Electric.Shapes.DnfPlanTest do + use ExUnit.Case, async: true + + alias Electric.Replication.Eval.Parser + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Shape + + @refs %{ + ["id"] => :int4, + ["x"] => :int4, + ["y"] => :int4, + ["z"] => :int4, + ["status"] => :text, + ["name"] => :text, + ["a"] => :int4, + ["b"] => :int4 + } + + describe "compile/1 - no subqueries" do + test "returns :no_subqueries for shape without where clause" do + shape = make_shape(nil, []) + assert :no_subqueries = DnfPlan.compile(shape) + end + + test "returns :no_subqueries for shape without dependencies" do + where = parse_where(~S"x = 1") + shape = make_shape(where, []) + assert :no_subqueries = DnfPlan.compile(shape) + end + end + + describe "compile/1 - single subquery" do + test "single subquery shape" do + {where, deps} = parse_where_with_sublinks(~S"x IN (SELECT id FROM dep)", 1) + shape = make_shape(where, deps) + + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 1 + assert length(plan.disjuncts) == 1 + + # Single position, which is a subquery + assert map_size(plan.positions) == 1 + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.negated == false + assert pos0.dependency_index == 0 + assert pos0.subquery_ref == ["$sublink", "0"] + assert pos0.tag_columns == ["x"] + + assert plan.dependency_positions == %{0 => [0]} + assert plan.dependency_disjuncts == %{0 => [0]} + assert plan.dependency_polarities == %{0 => :positive} + end + end + + describe "compile/1 - OR with subqueries" do + test "x IN sq1 OR y IN sq2" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 2 + assert length(plan.disjuncts) == 2 + + # Position 0: x IN sq1 + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.dependency_index == 0 + assert pos0.tag_columns == ["x"] + + # Position 1: y IN sq2 + pos1 = plan.positions[1] + assert pos1.is_subquery == true + assert pos1.dependency_index == 1 + assert pos1.tag_columns == ["y"] + + # Each dependency maps to its own position and disjunct + assert plan.dependency_positions == %{0 => [0], 1 => [1]} + assert plan.dependency_disjuncts == %{0 => [0], 1 => [1]} + assert plan.dependency_polarities == %{0 => :positive, 1 => :positive} + end + + test "(x IN sq1 AND status = 'open') OR y IN sq2" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 3 + assert length(plan.disjuncts) == 2 + + # Find the subquery positions + subquery_positions = + plan.positions + |> Enum.filter(fn {_pos, info} -> info.is_subquery end) + |> Enum.sort_by(fn {_pos, info} -> info.dependency_index end) + + assert length(subquery_positions) == 2 + + [{sq1_pos, sq1_info}, {sq2_pos, sq2_info}] = subquery_positions + assert sq1_info.dependency_index == 0 + assert sq1_info.tag_columns == ["x"] + assert sq2_info.dependency_index == 1 + assert sq2_info.tag_columns == ["y"] + + # Find the row predicate position + row_positions = + plan.positions + |> Enum.filter(fn {_pos, info} -> not info.is_subquery end) + + assert [{row_pos, row_info}] = row_positions + assert row_info.sql =~ "status" + assert row_info.is_subquery == false + assert row_info.dependency_index == nil + + # Disjunct 0 should contain sq1 + row predicate, disjunct 1 should contain sq2 + [d0, d1] = plan.disjuncts + d0_positions = Enum.map(d0, &elem(&1, 0)) |> MapSet.new() + d1_positions = Enum.map(d1, &elem(&1, 0)) |> MapSet.new() + + assert MapSet.member?(d0_positions, sq1_pos) + assert MapSet.member?(d0_positions, row_pos) + assert MapSet.member?(d1_positions, sq2_pos) + + # dependency_disjuncts: dep 0 in disjunct 0, dep 1 in disjunct 1 + assert plan.dependency_disjuncts[0] == [0] + assert plan.dependency_disjuncts[1] == [1] + end + end + + describe "compile/1 - AND with subqueries" do + test "x IN sq1 AND y IN sq2" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) AND y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + # AND produces a single disjunct + assert plan.position_count == 2 + assert length(plan.disjuncts) == 1 + + [d0] = plan.disjuncts + assert length(d0) == 2 + + # Both deps are in the same (only) disjunct + assert plan.dependency_disjuncts == %{0 => [0], 1 => [0]} + end + end + + describe "compile/1 - composite key subqueries" do + test "composite key subquery position" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x, y) IN (SELECT a, b FROM dep1)", + 1, + sublink_refs: %{["$sublink", "0"] => {:array, {:row, [:int4, :int4]}}}, + dep_columns: [["a", "b"]] + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.position_count == 1 + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.tag_columns == {:hash_together, ["x", "y"]} + end + end + + describe "compile/1 - negated subqueries" do + test "NOT with subquery has negated polarity" do + {where, deps} = + parse_where_with_sublinks(~S"NOT x IN (SELECT id FROM dep1)", 1) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + pos0 = plan.positions[0] + assert pos0.is_subquery == true + assert pos0.negated == true + assert plan.dependency_polarities == %{0 => :negated} + end + + test "positive subquery has positive polarity" do + {where, deps} = + parse_where_with_sublinks(~S"x IN (SELECT id FROM dep1)", 1) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert plan.dependency_polarities == %{0 => :positive} + end + end + + describe "compile/1 - mixed polarity" do + test "returns error when same subquery is used with both positive and negative polarity" do + # Parse with 2 separate sublinks, then remap $sublink/1 -> $sublink/0 + # to simulate what Shape.new's canonicalize_where_sublink_refs does + # when the same subquery appears with opposite polarity. + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR NOT x IN (SELECT id FROM dep2)", + 2 + ) + + # Remap $sublink/1 refs to $sublink/0 in the AST, simulating deduplication + remapped_eval = remap_sublink_ref(where.eval, "1", "0") + + remapped_used_refs = + where.used_refs + |> Map.delete(["$sublink", "1"]) + + where = %{where | eval: remapped_eval, used_refs: remapped_used_refs} + + # Only 1 dependency since both refs now point to the same sublink + shape = make_shape(where, [hd(deps)]) + assert {:error, reason} = DnfPlan.compile(shape) + assert reason =~ "positive and negative polarity" + end + end + + describe "compile/1 - nested subqueries compile per level" do + test "outer and inner shapes compile independently" do + # Outer shape: x IN sq1 (where sq1 itself has subqueries) + {outer_where, outer_deps} = + parse_where_with_sublinks(~S"x IN (SELECT id FROM dep1)", 1) + + outer_shape = make_shape(outer_where, outer_deps) + + # Inner shape: a IN sq2 (the inner subquery's own WHERE) + {inner_where, inner_deps} = + parse_where_with_sublinks(~S"a IN (SELECT id FROM dep2)", 1) + + inner_shape = make_shape(inner_where, inner_deps) + + # Each compiles independently + assert {:ok, outer_plan} = DnfPlan.compile(outer_shape) + assert {:ok, inner_plan} = DnfPlan.compile(inner_shape) + + # Each has its own positions + assert outer_plan.position_count == 1 + assert inner_plan.position_count == 1 + + # Each references its own dependency index 0 + assert outer_plan.dependency_positions == %{0 => [0]} + assert inner_plan.dependency_positions == %{0 => [0]} + end + end + + describe "compile/1 - distribution" do + test "AND distributes over OR with subqueries" do + # x IN sq1 AND (status = 'open' OR y IN sq2) + # Distributes to: (x IN sq1 AND status = 'open') OR (x IN sq1 AND y IN sq2) + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) AND (status = 'open' OR y IN (SELECT id FROM dep2))", + 2 + ) + + shape = make_shape(where, deps) + assert {:ok, plan} = DnfPlan.compile(shape) + + assert length(plan.disjuncts) == 2 + + # dep 0 (sq1) should be in both disjuncts since AND distributes + assert plan.dependency_disjuncts[0] == [0, 1] + # dep 1 (sq2) should be in only the second disjunct + assert plan.dependency_disjuncts[1] == [1] + end + end + + # -- Helpers -- + + defp parse_where(where_clause) do + {:ok, pgquery} = Parser.parse_query(where_clause) + {:ok, expr} = Parser.validate_where_ast(pgquery, refs: @refs) + expr + end + + defp parse_where_with_sublinks(where_clause, num_deps, opts \\ []) do + sublink_refs = + Keyword.get_lazy(opts, :sublink_refs, fn -> + Map.new(0..(num_deps - 1), fn i -> + {["$sublink", "#{i}"], {:array, :int4}} + end) + end) + + dep_columns = Keyword.get(opts, :dep_columns, nil) + + sublink_queries = + Map.new(0..(num_deps - 1), fn i -> + cols = + if dep_columns do + Enum.at(dep_columns, i) |> Enum.join(", ") + else + "id" + end + + {i, "SELECT #{cols} FROM dep#{i + 1}"} + end) + + all_refs = Map.merge(@refs, sublink_refs) + {:ok, pgquery} = Parser.parse_query(where_clause) + + {:ok, expr} = + Parser.validate_where_ast(pgquery, + refs: all_refs, + sublink_queries: sublink_queries + ) + + deps = + Enum.map(0..(num_deps - 1), fn _i -> + %Shape{ + root_table: {"public", "dep"}, + root_table_id: 100, + root_pk: ["id"], + root_column_count: 1, + where: nil, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + end) + + {expr, deps} + end + + defp make_shape(where, deps) do + %Shape{ + root_table: {"public", "test"}, + root_table_id: 1, + root_pk: ["id"], + root_column_count: 5, + where: where, + selected_columns: ["id", "x", "y", "status"], + explicitly_selected_columns: ["id", "x", "y", "status"], + shape_dependencies: deps, + shape_dependencies_handles: Enum.with_index(deps, fn _, i -> "dep_handle_#{i}" end) + } + end + + # Recursively remap $sublink refs in an eval AST + defp remap_sublink_ref(%Parser.Ref{path: ["$sublink", from]} = ref, from, to) do + %{ref | path: ["$sublink", to]} + end + + defp remap_sublink_ref(%Parser.Func{args: args} = func, from, to) do + %{func | args: Enum.map(args, &remap_sublink_ref(&1, from, to))} + end + + defp remap_sublink_ref(%Parser.Array{elements: elements} = arr, from, to) do + %{arr | elements: Enum.map(elements, &remap_sublink_ref(&1, from, to))} + end + + defp remap_sublink_ref(%Parser.RowExpr{elements: elements} = row, from, to) do + %{row | elements: Enum.map(elements, &remap_sublink_ref(&1, from, to))} + end + + defp remap_sublink_ref(other, _from, _to), do: other +end diff --git a/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs new file mode 100644 index 0000000000..48df275c7a --- /dev/null +++ b/packages/sync-service/test/electric/shapes/filter/subquery_index_test.exs @@ -0,0 +1,228 @@ +defmodule Electric.Shapes.Filter.Indexes.SubqueryIndexTest do + use ExUnit.Case + + alias Electric.Replication.Eval.Parser.{Func, Ref} + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Filter + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + alias Electric.Shapes.Filter.WhereCondition + + @subquery_ref ["$sublink", "0"] + @field "par_id" + + setup do + filter = Filter.new() + condition_id = make_ref() + WhereCondition.init(filter, condition_id) + + %{ + filter: filter, + table: Filter.subquery_index(filter), + condition_id: condition_id + } + end + + describe "shape-level metadata" do + test "register_shape stores polarity and fallback used by exact evaluation", %{table: table} do + SubqueryIndex.register_shape(table, "s1", make_plan()) + + assert SubqueryIndex.fallback?(table, "s1") + assert SubqueryIndex.membership_or_fallback?(table, "s1", @subquery_ref, 99) + + SubqueryIndex.register_shape(table, "s2", make_plan(polarity: :negated)) + + refute SubqueryIndex.membership_or_fallback?(table, "s2", @subquery_ref, 99) + end + + test "unregister_shape removes exact membership metadata", %{table: table} do + SubqueryIndex.register_shape(table, "s1", make_plan()) + SubqueryIndex.add_value(table, "s1", @subquery_ref, 0, 5) + + assert SubqueryIndex.member?(table, "s1", @subquery_ref, 5) + + SubqueryIndex.unregister_shape(table, "s1") + + refute SubqueryIndex.member?(table, "s1", @subquery_ref, 5) + refute SubqueryIndex.fallback?(table, "s1") + end + end + + describe "node registration and updates" do + test "add_shape registers node mappings for a dependency", %{ + filter: filter, + table: table, + condition_id: condition_id + } do + register_node_shape(filter, table, condition_id, "s1") + + assert SubqueryIndex.has_positions?(table, "s1") + assert [{^condition_id, @field}] = SubqueryIndex.positions_for_shape(table, "s1") + end + + test "multiple shapes on the same node infer emptiness from node registrations", %{ + filter: filter, + table: table, + condition_id: condition_id + } do + register_node_shape(filter, table, condition_id, "s1") + register_node_shape(filter, table, condition_id, "s2") + register_node_shape(filter, table, condition_id, "s3") + + assert [ + {{:node_shape, {^condition_id, @field}}, {"s1", 0, :positive, _}}, + {{:node_shape, {^condition_id, @field}}, {"s2", 0, :positive, _}}, + {{:node_shape, {^condition_id, @field}}, {"s3", 0, :positive, _}} + ] = Enum.sort(:ets.lookup(table, {:node_shape, {condition_id, @field}})) + + assert :ok = + SubqueryIndex.remove_shape(filter, condition_id, "s1", subquery_optimisation()) + + assert MapSet.new(["s2", "s3"]) == SubqueryIndex.all_shape_ids(filter, condition_id, @field) + + assert :ok = + SubqueryIndex.remove_shape(filter, condition_id, "s2", subquery_optimisation()) + + assert :deleted = + SubqueryIndex.remove_shape(filter, condition_id, "s3", subquery_optimisation()) + + assert [] == :ets.lookup(table, {:node_shape, {condition_id, @field}}) + assert [] == :ets.lookup(table, {:node_meta, {condition_id, @field}}) + end + + test "seed_membership updates node-local routing and exact membership", %{ + filter: filter, + table: table, + condition_id: condition_id + } do + register_node_shape(filter, table, condition_id, "s1") + + SubqueryIndex.seed_membership(table, "s1", @subquery_ref, 0, MapSet.new([5])) + SubqueryIndex.mark_ready(table, "s1") + + assert SubqueryIndex.member?(table, "s1", @subquery_ref, 5) + + assert MapSet.new(["s1"]) == + SubqueryIndex.affected_shapes( + filter, + condition_id, + @field, + %{"par_id" => "5"} + ) + end + + test "negated nodes use local complement semantics", %{ + filter: filter, + table: table, + condition_id: condition_id + } do + register_node_shape(filter, table, condition_id, "s1", polarity: :negated) + + SubqueryIndex.seed_membership(table, "s1", @subquery_ref, 0, MapSet.new([5])) + SubqueryIndex.mark_ready(table, "s1") + + refute MapSet.member?( + SubqueryIndex.affected_shapes( + filter, + condition_id, + @field, + %{"par_id" => "5"} + ), + "s1" + ) + + assert MapSet.member?( + SubqueryIndex.affected_shapes( + filter, + condition_id, + @field, + %{"par_id" => "99"} + ), + "s1" + ) + end + + test "remove_shape clears node registrations", %{ + filter: filter, + table: table, + condition_id: condition_id + } do + register_node_shape(filter, table, condition_id, "s1") + SubqueryIndex.add_value(table, "s1", @subquery_ref, 0, 5) + + assert :deleted = + SubqueryIndex.remove_shape(filter, condition_id, "s1", subquery_optimisation()) + + refute SubqueryIndex.has_positions?(table, "s1") + + SubqueryIndex.unregister_shape(table, "s1") + + refute SubqueryIndex.fallback?(table, "s1") + end + end + + describe "stack lookup" do + test "stores and retrieves table ref by stack_id" do + table = SubqueryIndex.new(stack_id: "test-stack-123") + assert SubqueryIndex.for_stack("test-stack-123") == table + end + + test "returns nil for unknown stack" do + assert SubqueryIndex.for_stack("nonexistent-stack") == nil + end + end + + defp register_node_shape(filter, table, condition_id, shape_id, opts \\ []) do + SubqueryIndex.register_shape(table, shape_id, make_plan(opts)) + :ok = SubqueryIndex.add_shape(filter, condition_id, shape_id, subquery_optimisation(opts)) + end + + defp subquery_optimisation(opts \\ []) do + field = Keyword.get(opts, :field, @field) + + %{ + operation: "subquery", + field: field, + testexpr: %Ref{path: [field], type: :int8}, + subquery_ref: Keyword.get(opts, :subquery_ref, @subquery_ref), + dep_index: Keyword.get(opts, :dep_index, 0), + polarity: Keyword.get(opts, :polarity, :positive), + and_where: Keyword.get(opts, :and_where) + } + end + + defp make_plan(opts \\ []) do + polarity = Keyword.get(opts, :polarity, :positive) + dep_index = Keyword.get(opts, :dep_index, 0) + subquery_ref = Keyword.get(opts, :subquery_ref, @subquery_ref) + field = Keyword.get(opts, :field, @field) + + testexpr = %Ref{path: [field], type: :int8} + ref = %Ref{path: subquery_ref, type: {:array, :int8}} + + ast = %Func{ + name: "sublink_membership_check", + args: [testexpr, ref], + type: :bool + } + + %DnfPlan{ + disjuncts: [], + disjuncts_positions: [], + position_count: 1, + positions: %{ + 0 => %{ + ast: ast, + sql: "fake", + is_subquery: true, + negated: polarity == :negated, + dependency_index: dep_index, + subquery_ref: subquery_ref, + tag_columns: [field] + } + }, + dependency_positions: %{dep_index => [0]}, + dependency_disjuncts: %{}, + dependency_polarities: %{dep_index => polarity} + } + end +end diff --git a/packages/sync-service/test/electric/shapes/filter/subquery_node_test.exs b/packages/sync-service/test/electric/shapes/filter/subquery_node_test.exs new file mode 100644 index 0000000000..1696e9eecb --- /dev/null +++ b/packages/sync-service/test/electric/shapes/filter/subquery_node_test.exs @@ -0,0 +1,232 @@ +defmodule Electric.Shapes.Filter.Indexes.SubqueryIndexNodeTest do + use ExUnit.Case + + alias Electric.Replication.Eval.Parser + alias Electric.Replication.Eval.Parser.{Func, Ref} + alias Electric.Shapes.DnfPlan + alias Electric.Shapes.Filter + alias Electric.Shapes.Filter.Indexes.SubqueryIndex + alias Electric.Shapes.Filter.WhereCondition + + @subquery_ref ["$sublink", "0"] + @field "par_id" + @other_field "id" + + setup do + filter = Filter.new() + condition_id = make_ref() + WhereCondition.init(filter, condition_id) + + %{ + filter: filter, + condition_id: condition_id, + reverse_index: Filter.subquery_index(filter) + } + end + + describe "affected_shapes/4" do + test "returns only shapes registered under the current field key", %{ + filter: filter, + condition_id: condition_id, + reverse_index: reverse_index + } do + register_node_shape(filter, reverse_index, condition_id, "local_shape") + + register_node_shape(filter, reverse_index, condition_id, "other_field_shape", + field: @other_field + ) + + seed_shape(reverse_index, "local_shape", [5]) + seed_shape(reverse_index, "other_field_shape", [5]) + + assert MapSet.new(["local_shape"]) == + SubqueryIndex.affected_shapes( + filter, + condition_id, + @field, + %{"par_id" => "5", "id" => "5"} + ) + end + + test "delegates matching candidates to the child where condition", %{ + filter: filter, + condition_id: condition_id, + reverse_index: reverse_index + } do + register_node_shape( + filter, + reverse_index, + condition_id, + "shape_with_exact_tail", + and_where: where("name ILIKE 'keep%'", %{["name"] => :text}) + ) + + seed_shape(reverse_index, "shape_with_exact_tail", [5]) + + assert MapSet.new(["shape_with_exact_tail"]) == + SubqueryIndex.affected_shapes( + filter, + condition_id, + @field, + %{"par_id" => "5", "name" => "keep_me"} + ) + + assert MapSet.new() == + SubqueryIndex.affected_shapes( + filter, + condition_id, + @field, + %{"par_id" => "5", "name" => "discard"} + ) + end + + test "routes unseeded shapes once traversal reaches the node", %{ + filter: filter, + condition_id: condition_id, + reverse_index: reverse_index + } do + register_node_shape(filter, reverse_index, condition_id, "unseeded_shape") + + assert MapSet.new(["unseeded_shape"]) == + SubqueryIndex.affected_shapes( + filter, + condition_id, + @field, + %{"par_id" => "999"} + ) + end + end + + describe "all_shape_ids/3" do + test "returns only the shape ids for the requested field key", %{ + filter: filter, + condition_id: condition_id, + reverse_index: reverse_index + } do + register_node_shape(filter, reverse_index, condition_id, "shape1") + + register_node_shape( + filter, + reverse_index, + condition_id, + "shape2", + and_where: where("name ILIKE 'keep%'", %{["name"] => :text}) + ) + + register_node_shape(filter, reverse_index, condition_id, "other_field_shape", + field: @other_field + ) + + assert MapSet.new(["shape1", "shape2"]) == + SubqueryIndex.all_shape_ids(filter, condition_id, @field) + end + end + + describe "remove_shape/4" do + test "tracks emptiness per field key", %{ + filter: filter, + condition_id: condition_id, + reverse_index: reverse_index + } do + register_node_shape(filter, reverse_index, condition_id, "shape1") + register_node_shape(filter, reverse_index, condition_id, "shape2", field: @other_field) + + assert :deleted = + SubqueryIndex.remove_shape( + filter, + condition_id, + "shape1", + subquery_optimisation() + ) + + refute SubqueryIndex.has_positions?(reverse_index, "shape1") + + assert MapSet.new(["shape2"]) == + SubqueryIndex.all_shape_ids(filter, condition_id, @other_field) + + assert :deleted = + SubqueryIndex.remove_shape( + filter, + condition_id, + "shape2", + subquery_optimisation(field: @other_field) + ) + end + end + + defp register_node_shape(filter, reverse_index, condition_id, shape_id, opts \\ []) do + SubqueryIndex.register_shape(reverse_index, shape_id, make_plan(opts)) + + :ok = + SubqueryIndex.add_shape( + filter, + condition_id, + shape_id, + subquery_optimisation(opts) + ) + end + + defp seed_shape(reverse_index, shape_id, values) do + SubqueryIndex.seed_membership( + reverse_index, + shape_id, + @subquery_ref, + 0, + MapSet.new(values) + ) + + SubqueryIndex.mark_ready(reverse_index, shape_id) + end + + defp subquery_optimisation(opts \\ []) do + %{ + operation: "subquery", + field: Keyword.get(opts, :field, @field), + testexpr: %Ref{path: [Keyword.get(opts, :field, @field)], type: :int8}, + subquery_ref: Keyword.get(opts, :subquery_ref, @subquery_ref), + dep_index: Keyword.get(opts, :dep_index, 0), + polarity: Keyword.get(opts, :polarity, :positive), + and_where: Keyword.get(opts, :and_where) + } + end + + defp where(query, refs) do + Parser.parse_and_validate_expression!(query, refs: refs) + end + + defp make_plan(opts) do + polarity = Keyword.get(opts, :polarity, :positive) + dep_index = Keyword.get(opts, :dep_index, 0) + subquery_ref = Keyword.get(opts, :subquery_ref, @subquery_ref) + field = Keyword.get(opts, :field, @field) + + testexpr = %Ref{path: [field], type: :int8} + ref = %Ref{path: subquery_ref, type: {:array, :int8}} + + ast = %Func{ + name: "sublink_membership_check", + args: [testexpr, ref], + type: :bool + } + + %DnfPlan{ + disjuncts: [], + disjuncts_positions: [], + position_count: 1, + positions: %{ + 0 => %{ + ast: ast, + sql: "fake", + is_subquery: true, + negated: polarity == :negated, + dependency_index: dep_index, + subquery_ref: subquery_ref, + tag_columns: [field] + } + }, + dependency_positions: %{dep_index => [0]}, + dependency_disjuncts: %{}, + dependency_polarities: %{dep_index => polarity} + } + end +end diff --git a/packages/sync-service/test/electric/shapes/filter_test.exs b/packages/sync-service/test/electric/shapes/filter_test.exs index f7c68d3bcf..1cd9f48519 100644 --- a/packages/sync-service/test/electric/shapes/filter_test.exs +++ b/packages/sync-service/test/electric/shapes/filter_test.exs @@ -9,6 +9,7 @@ defmodule Electric.Shapes.FilterTest do alias Electric.Replication.Changes.TruncatedRelation alias Electric.Replication.Changes.UpdatedRecord alias Electric.Shapes.Filter + alias Electric.Shapes.Filter.Indexes.SubqueryIndex alias Electric.Shapes.Shape alias Support.StubInspector @@ -33,28 +34,6 @@ defmodule Electric.Shapes.FilterTest do refute Filter.indexed_shape?(shape) end - - test "returns true for non-optimisable subquery shapes with sublink fields" do - shape = - Shape.new!("t1", - where: "id = 1 OR id IN (SELECT id FROM t2)", - inspector: @inspector - ) - |> with_known_dependency_handles() - - assert Filter.indexed_shape?(shape) - end - - test "returns false for row-expression subquery shapes with no indexable fields" do - shape = - Shape.new!("t1", - where: "(id, number) IN (SELECT id, number FROM t2)", - inspector: @inspector - ) - |> with_known_dependency_handles() - - refute Filter.indexed_shape?(shape) - end end describe "affected_shapes/2" do @@ -521,7 +500,17 @@ defmodule Electric.Shapes.FilterTest do Shape.new!("table", where: "id = 1 AND 1 = ANY(an_array)", inspector: @inspector), Shape.new!("table", where: "id IN (1, 2, 3)", inspector: @inspector), Shape.new!("table", where: "id IN (4, 5)", inspector: @inspector), - Shape.new!("table", where: "id IN (1, 2) AND number > 5", inspector: @inspector) + Shape.new!("table", where: "id IN (1, 2) AND number > 5", inspector: @inspector), + Shape.new!("table", + where: "id IN (SELECT id FROM another_table)", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ), + Shape.new!("table", + where: "NOT id IN (SELECT id FROM another_table)", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) ] filter = Filter.new() @@ -549,6 +538,33 @@ defmodule Electric.Shapes.FilterTest do end) end + test "Filter.remove_shape/2 removes seeded subquery index state" do + filter = Filter.new() + state_before = snapshot_filter_ets(filter) + shape_id = "seeded-shape" + + shape = + Shape.new!("table", + where: "id IN (SELECT id FROM another_table)", + inspector: @inspector, + feature_flags: ["allow_subqueries"] + ) + + Filter.add_shape(filter, shape_id, shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + SubqueryIndex.seed_membership(index, shape_id, subquery_ref, 0, MapSet.new([5])) + SubqueryIndex.mark_ready(index, shape_id) + + assert snapshot_filter_ets(filter) != state_before + + Filter.remove_shape(filter, shape_id) + + assert snapshot_filter_ets(filter) == state_before + end + # Captures the full state of all ETS tables in a filter for comparison defp snapshot_filter_ets(filter) do %{ @@ -557,8 +573,7 @@ defmodule Electric.Shapes.FilterTest do where_cond: :ets.tab2list(filter.where_cond_table) |> Enum.sort(), eq_index: :ets.tab2list(filter.eq_index_table) |> Enum.sort(), incl_index: :ets.tab2list(filter.incl_index_table) |> Enum.sort(), - sublink_field: :ets.tab2list(filter.sublink_field_table) |> Enum.sort(), - sublink_dep: :ets.tab2list(filter.sublink_dep_table) |> Enum.sort() + subquery_index: :ets.tab2list(filter.subquery_index) |> Enum.sort() } end @@ -814,12 +829,7 @@ defmodule Electric.Shapes.FilterTest do } end - defp with_known_dependency_handles(%Shape{shape_dependencies: deps} = shape) do - handles = Enum.with_index(deps, fn _dep, index -> "dep-#{index}" end) - %{shape | shape_dependencies_handles: handles} - end - - describe "refs_fun threading through indexes" do + describe "subquery shapes routing in filter" do import Support.DbSetup import Support.DbStructureSetup import Support.ComponentSetup @@ -836,29 +846,18 @@ defmodule Electric.Shapes.FilterTest do "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" ] - test "refs_fun is threaded through equality index for compound WHERE clause with subquery", + test "unseeded subquery shape is still pruned by outer equality before fallback", %{inspector: inspector} do - # Create a shape with an equality-indexed condition AND a subquery - # The where clause "par_id = 7 AND id IN (SELECT id FROM parent)" will: - # 1. Use equality index for par_id = 7 - # 2. Use refs_fun for the subquery evaluation in the AND clause {:ok, shape} = Shape.new("child", inspector: inspector, where: "par_id = 7 AND id IN (SELECT id FROM parent)" ) - # Create refs_fun that returns sublink values based on the shape - # When id is in the sublink MapSet, the subquery condition passes - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([1, 2, 3])} - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape) - # Record with par_id = 7 AND id in sublink results -> affected insert_matching = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "1", "par_id" => "7"} @@ -866,49 +865,44 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_matching) == MapSet.new(["shape1"]) - # Record with par_id = 7 but id NOT in sublink results -> not affected insert_not_in_subquery = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "99", "par_id" => "7"} } - assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) - # Record with par_id != 7 -> not affected (equality index filters it out) insert_wrong_par_id = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "1", "par_id" => "8"} } assert Filter.affected_shapes(filter, insert_wrong_par_id) == MapSet.new([]) + + insert_on_other_table = %NewRecord{ + relation: {"public", "parent"}, + record: %{"id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_on_other_table) == MapSet.new([]) end @tag with_sql: [ "CREATE TABLE IF NOT EXISTS incl_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS incl_child (id INT PRIMARY KEY, par_id INT REFERENCES incl_parent(id), tags int[] NOT NULL)" ] - test "refs_fun is threaded through inclusion index for compound WHERE clause with subquery", + test "unseeded subquery shape is still pruned by outer inclusion before fallback", %{inspector: inspector} do - # Create a shape with an inclusion-indexed condition AND a subquery - # The where clause "tags @> '{1,2}' AND id IN (SELECT id FROM parent)" will: - # 1. Use inclusion index for tags @> '{1,2}' - # 2. Use refs_fun for the subquery evaluation in the AND clause {:ok, shape} = Shape.new("incl_child", inspector: inspector, where: "tags @> '{1,2}' AND id IN (SELECT id FROM incl_parent)" ) - # Create refs_fun that returns sublink values based on the shape - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([10, 20, 30])} - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape) - # Record with tags containing {1,2} AND id in sublink results -> affected insert_matching = %NewRecord{ relation: {"public", "incl_child"}, record: %{"id" => "10", "par_id" => "7", "tags" => "{1,2,3}"} @@ -916,15 +910,13 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_matching) == MapSet.new(["shape1"]) - # Record with tags containing {1,2} but id NOT in sublink results -> not affected insert_not_in_subquery = %NewRecord{ relation: {"public", "incl_child"}, record: %{"id" => "99", "par_id" => "7", "tags" => "{1,2,3}"} } - assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) - # Record with tags not containing {1,2} -> not affected (inclusion index filters it out) insert_wrong_tags = %NewRecord{ relation: {"public", "incl_child"}, record: %{"id" => "10", "par_id" => "7", "tags" => "{3,4}"} @@ -937,10 +929,9 @@ defmodule Electric.Shapes.FilterTest do "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" ] - test "refs_fun is called with the correct shape when multiple shapes exist", %{ + test "unseeded subquery shapes are still separated by outer indexed branches", %{ inspector: inspector } do - # Create two shapes that will have different sublink results {:ok, shape1} = Shape.new("child", inspector: inspector, @@ -953,21 +944,11 @@ defmodule Electric.Shapes.FilterTest do where: "par_id = 8 AND id IN (SELECT id FROM parent)" ) - # refs_fun returns different values based on which shape is being evaluated - refs_fun = fn shape -> - if shape.where.query =~ "par_id = 7" do - %{["$sublink", "0"] => MapSet.new([1, 2])} - else - %{["$sublink", "0"] => MapSet.new([3, 4])} - end - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape1) |> Filter.add_shape("shape2", shape2) - # Record matching shape1's equality AND subquery conditions insert1 = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "1", "par_id" => "7"} @@ -975,7 +956,6 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert1) == MapSet.new(["shape1"]) - # Record matching shape2's equality AND subquery conditions insert2 = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "3", "par_id" => "8"} @@ -983,37 +963,31 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert2) == MapSet.new(["shape2"]) - # Record matching shape1's equality but NOT its subquery (id=3 is in shape2's refs) insert3 = %NewRecord{ relation: {"public", "child"}, record: %{"id" => "3", "par_id" => "7"} } - assert Filter.affected_shapes(filter, insert3) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert3) == MapSet.new(["shape1"]) end @tag with_sql: [ "CREATE TABLE IF NOT EXISTS nested_parent (id INT PRIMARY KEY)", "CREATE TABLE IF NOT EXISTS nested_child (id INT PRIMARY KEY, field1 INT NOT NULL, field2 INT REFERENCES nested_parent(id))" ] - test "refs_fun is threaded through nested equality indexes", %{inspector: inspector} do - # Create a shape with two equality conditions and a subquery - # WHERE field1 = 10 AND field2 = 20 AND id IN (SELECT id FROM parent) + test "unseeded subquery shape with nested equality conditions is always routed (fallback)", %{ + inspector: inspector + } do {:ok, shape} = Shape.new("nested_child", inspector: inspector, where: "field1 = 10 AND field2 = 20 AND id IN (SELECT id FROM nested_parent)" ) - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([1, 2, 3])} - end - filter = - Filter.new(refs_fun: refs_fun) + Filter.new() |> Filter.add_shape("shape1", shape) - # Record matching all conditions insert_matching = %NewRecord{ relation: {"public", "nested_child"}, record: %{"id" => "1", "field1" => "10", "field2" => "20"} @@ -1021,13 +995,12 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, insert_matching) == MapSet.new(["shape1"]) - # Record matching equality conditions but not subquery insert_not_in_subquery = %NewRecord{ relation: {"public", "nested_child"}, record: %{"id" => "99", "field1" => "10", "field2" => "20"} } - assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new([]) + assert Filter.affected_shapes(filter, insert_not_in_subquery) == MapSet.new(["shape1"]) end @tag with_sql: [ @@ -1040,20 +1013,25 @@ defmodule Electric.Shapes.FilterTest do # Shape with OR combining a subquery and a simple condition. # OR is not optimisable, so the shape lands in other_shapes AND # gets registered in the sublink inverted index. Root table changes - # must still be routed to this shape. + # must still be routed to this shape once seeded. {:ok, shape} = Shape.new("or_child", inspector: inspector, where: "par_id IN (SELECT id FROM or_parent) OR value = 'target'" ) - refs_fun = fn _shape -> - %{["$sublink", "0"] => MapSet.new([1, 2, 3])} + filter = Filter.new() + filter = Filter.add_shape(filter, "shape1", shape) + + # Seed the reverse index with subquery membership values + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + for value <- [1, 2, 3] do + SubqueryIndex.add_value(index, "shape1", subquery_ref, 0, value) end - filter = - Filter.new(refs_fun: refs_fun) - |> Filter.add_shape("shape1", shape) + SubqueryIndex.mark_ready(index, "shape1") # Record matching the OR's simple condition (value = 'target') insert_matching_value = %NewRecord{ @@ -1088,5 +1066,427 @@ defmodule Electric.Shapes.FilterTest do assert Filter.affected_shapes(filter, update_into_shape) == MapSet.new(["shape1"]) end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS like_parent_unseeded (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS like_child_unseeded (id INT PRIMARY KEY, name TEXT NOT NULL, parent_id INT REFERENCES like_parent_unseeded(id))" + ] + test "unseeded LIKE + subquery shape still lets non-subquery terms prune", %{ + inspector: inspector + } do + {:ok, shape} = + Shape.new("like_child_unseeded", + inspector: inspector, + where: "name LIKE 'keep%' AND parent_id IN (SELECT id FROM like_parent_unseeded)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + insert_match = %NewRecord{ + relation: {"public", "like_child_unseeded"}, + record: %{"id" => "10", "name" => "keep_me", "parent_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_match) == MapSet.new(["shape1"]) + + insert_like_miss = %NewRecord{ + relation: {"public", "like_child_unseeded"}, + record: %{"id" => "11", "name" => "discard", "parent_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_like_miss) == MapSet.new([]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" + ] + test "seeded subquery shape reached via non-subquery indexes is still verified against the full predicate", + %{inspector: inspector} do + {:ok, shape} = + Shape.new("child", + inspector: inspector, + where: "par_id = 7 AND id IN (SELECT id FROM parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + SubqueryIndex.add_value(index, "shape1", subquery_ref, 0, 1) + SubqueryIndex.mark_ready(index, "shape1") + + wrong_subquery_value = %NewRecord{ + relation: {"public", "child"}, + record: %{"id" => "99", "par_id" => "7"} + } + + assert Filter.affected_shapes(filter, wrong_subquery_value) == MapSet.new([]) + + matching_record = %NewRecord{ + relation: {"public", "child"}, + record: %{"id" => "1", "par_id" => "7"} + } + + assert Filter.affected_shapes(filter, matching_record) == MapSet.new(["shape1"]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" + ] + test "subquery positions are isolated per shape even when DNF positions overlap", %{ + inspector: inspector + } do + {:ok, shape1} = + Shape.new("child", + inspector: inspector, + where: "id IN (SELECT id FROM parent)" + ) + + {:ok, shape2} = + Shape.new("child", + inspector: inspector, + where: "par_id IN (SELECT id FROM parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape1) + |> Filter.add_shape("shape2", shape2) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + SubqueryIndex.add_value(index, "shape1", subquery_ref, 0, 1) + SubqueryIndex.add_value(index, "shape2", subquery_ref, 0, 1) + SubqueryIndex.mark_ready(index, "shape1") + SubqueryIndex.mark_ready(index, "shape2") + + change = %NewRecord{ + relation: {"public", "child"}, + record: %{"id" => "50", "par_id" => "1"} + } + + assert Filter.affected_shapes(filter, change) == MapSet.new(["shape2"]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS child (id INT PRIMARY KEY, par_id INT REFERENCES parent(id))" + ] + test "remove_shape cleans up subquery index metadata and values", %{inspector: inspector} do + {:ok, shape} = + Shape.new("child", + inspector: inspector, + where: "id IN (SELECT id FROM parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + SubqueryIndex.add_value(index, "shape1", subquery_ref, 0, 1) + SubqueryIndex.mark_ready(index, "shape1") + + assert :ets.tab2list(index) != [] + + Filter.remove_shape(filter, "shape1") + + assert :ets.tab2list(index) == [] + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS neg_parent (id INT PRIMARY KEY, value TEXT NOT NULL)", + "CREATE TABLE IF NOT EXISTS neg_child (id INT PRIMARY KEY, parent_id INT REFERENCES neg_parent(id))" + ] + test "negated candidate derivation by complement", %{inspector: inspector} do + {:ok, shape} = + Shape.new("neg_child", + inspector: inspector, + where: "parent_id NOT IN (SELECT id FROM neg_parent WHERE value = 'keep')" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + # Seed membership with value 1 (parent id 1 matches the subquery "WHERE value = 'keep'") + SubqueryIndex.seed_membership( + index, + "shape1", + subquery_ref, + 0, + MapSet.new([1]) + ) + + SubqueryIndex.mark_ready(index, "shape1") + + # parent_id=1 is in the subquery view, so NOT IN means this should NOT route + insert_matching_member = %NewRecord{ + relation: {"public", "neg_child"}, + record: %{"id" => "10", "parent_id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_matching_member) == MapSet.new([]) + + # parent_id=2 is NOT in the subquery view, so NOT IN means this SHOULD route + insert_not_member = %NewRecord{ + relation: {"public", "neg_child"}, + record: %{"id" => "11", "parent_id" => "2"} + } + + assert Filter.affected_shapes(filter, insert_not_member) == MapSet.new(["shape1"]) + + # Update crossing from non-matching to matching should route (union of old/new) + update_crossing = %UpdatedRecord{ + relation: {"public", "neg_child"}, + record: %{"id" => "10", "parent_id" => "1"}, + old_record: %{"id" => "10", "parent_id" => "2"} + } + + assert Filter.affected_shapes(filter, update_crossing) == MapSet.new(["shape1"]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS rep_parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS rep_child (id INT PRIMARY KEY, par_id INT REFERENCES rep_parent(id))" + ] + test "repeated dependency positions in one shape", %{inspector: inspector} do + # Both positions reference the same dependency (rep_parent.id), but + # compare against different root-table columns. + {:ok, shape} = + Shape.new("rep_child", + inspector: inspector, + where: "id IN (SELECT id FROM rep_parent) OR par_id IN (SELECT id FROM rep_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + # Seed the membership view with values {1, 2} + SubqueryIndex.seed_membership( + index, + "shape1", + subquery_ref, + 0, + MapSet.new([1, 2]) + ) + + SubqueryIndex.mark_ready(index, "shape1") + + # Only id matches (id=1, par_id=99) -> should route + insert_id_match = %NewRecord{ + relation: {"public", "rep_child"}, + record: %{"id" => "1", "par_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_id_match) == MapSet.new(["shape1"]) + + # Only par_id matches (id=99, par_id=2) -> should route + insert_par_match = %NewRecord{ + relation: {"public", "rep_child"}, + record: %{"id" => "99", "par_id" => "2"} + } + + assert Filter.affected_shapes(filter, insert_par_match) == MapSet.new(["shape1"]) + + # Neither matches (id=99, par_id=99) -> should not route + insert_neither = %NewRecord{ + relation: {"public", "rep_child"}, + record: %{"id" => "99", "par_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_neither) == MapSet.new([]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS comp_parent (x INT NOT NULL, y INT NOT NULL, PRIMARY KEY (x, y))", + "CREATE TABLE IF NOT EXISTS comp_child (id INT PRIMARY KEY, a INT NOT NULL, b INT NOT NULL)" + ] + test "composite-key subquery routing", %{inspector: inspector} do + {:ok, shape} = + Shape.new("comp_child", + inspector: inspector, + where: "(a, b) IN (SELECT x, y FROM comp_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + # Seed membership with a tuple value {10, 20} + SubqueryIndex.seed_membership( + index, + "shape1", + subquery_ref, + 0, + MapSet.new([{10, 20}]) + ) + + SubqueryIndex.mark_ready(index, "shape1") + + # Matching tuple (a=10, b=20) should route + insert_match = %NewRecord{ + relation: {"public", "comp_child"}, + record: %{"id" => "1", "a" => "10", "b" => "20"} + } + + assert Filter.affected_shapes(filter, insert_match) == MapSet.new(["shape1"]) + + # Only one column matches (a=10, b=99) should not route + insert_partial = %NewRecord{ + relation: {"public", "comp_child"}, + record: %{"id" => "2", "a" => "10", "b" => "99"} + } + + assert Filter.affected_shapes(filter, insert_partial) == MapSet.new([]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS like_parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS like_child (id INT PRIMARY KEY, name TEXT NOT NULL, parent_id INT REFERENCES like_parent(id))" + ] + test "LIKE + subquery on the other_shapes path uses callback verification", %{ + inspector: inspector + } do + # LIKE is not optimisable, so this shape ends up in other_shapes. + # The subquery membership check should use the callback path. + {:ok, shape} = + Shape.new("like_child", + inspector: inspector, + where: "name LIKE 'keep%' AND parent_id IN (SELECT id FROM like_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("shape1", shape) + + index = Filter.subquery_index(filter) + subquery_ref = ["$sublink", "0"] + + SubqueryIndex.seed_membership( + index, + "shape1", + subquery_ref, + 0, + MapSet.new([1, 2]) + ) + + SubqueryIndex.mark_ready(index, "shape1") + + # Both conditions match -> route + insert_match = %NewRecord{ + relation: {"public", "like_child"}, + record: %{"id" => "10", "name" => "keep_me", "parent_id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_match) == MapSet.new(["shape1"]) + + # LIKE matches but subquery membership fails -> no route + insert_like_only = %NewRecord{ + relation: {"public", "like_child"}, + record: %{"id" => "11", "name" => "keep_me", "parent_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_like_only) == MapSet.new([]) + + # Subquery matches but LIKE fails -> no route + insert_subquery_only = %NewRecord{ + relation: {"public", "like_child"}, + record: %{"id" => "12", "name" => "discard", "parent_id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_subquery_only) == MapSet.new([]) + end + + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS fb_parent (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS fb_child (id INT PRIMARY KEY, par_id INT REFERENCES fb_parent(id))" + ] + test "unseeded shape routes conservatively until marked ready", %{ + inspector: inspector + } do + # Shapes start unseeded in the general subquery index and route + # conservatively until mark_ready/2 is called. + {:ok, fallback_shape} = + Shape.new("fb_child", + inspector: inspector, + where: "par_id IN (SELECT id FROM fb_parent)" + ) + + {:ok, indexed_shape} = + Shape.new("fb_child", + inspector: inspector, + where: "id IN (SELECT id FROM fb_parent)" + ) + + filter = + Filter.new() + |> Filter.add_shape("fallback_s", fallback_shape) + |> Filter.add_shape("indexed_s", indexed_shape) + + index = Filter.subquery_index(filter) + + # fallback_s stays unseeded by not calling mark_ready. indexed_s gets + # seeded and marked ready. + subquery_ref = ["$sublink", "0"] + + SubqueryIndex.seed_membership( + index, + "indexed_s", + subquery_ref, + 0, + MapSet.new([1]) + ) + + SubqueryIndex.mark_ready(index, "indexed_s") + + assert SubqueryIndex.fallback?(index, "fallback_s") + refute SubqueryIndex.fallback?(index, "indexed_s") + + # fallback_s routes for any root-table change, indexed_s only for matching + insert_match = %NewRecord{ + relation: {"public", "fb_child"}, + record: %{"id" => "1", "par_id" => "99"} + } + + assert Filter.affected_shapes(filter, insert_match) == + MapSet.new(["fallback_s", "indexed_s"]) + + insert_no_match = %NewRecord{ + relation: {"public", "fb_child"}, + record: %{"id" => "99", "par_id" => "99"} + } + + # fallback_s still routes, indexed_s does not + assert Filter.affected_shapes(filter, insert_no_match) == MapSet.new(["fallback_s"]) + + # Changes on unrelated table should not route either shape + insert_other = %NewRecord{ + relation: {"public", "fb_parent"}, + record: %{"id" => "1"} + } + + assert Filter.affected_shapes(filter, insert_other) == MapSet.new([]) + end end end diff --git a/packages/sync-service/test/electric/shapes/querying_test.exs b/packages/sync-service/test/electric/shapes/querying_test.exs index ffda11baca..4e3660ad83 100644 --- a/packages/sync-service/test/electric/shapes/querying_test.exs +++ b/packages/sync-service/test/electric/shapes/querying_test.exs @@ -1,10 +1,25 @@ defmodule Electric.Shapes.QueryingTest do use Support.TransactionCase, async: true - alias Electric.Shapes.Shape.SubqueryMoves + alias Electric.Replication.Eval.Parser + alias Electric.Shapes.DnfPlan alias Electric.Postgres.Inspector.DirectInspector - alias Electric.Shapes.Shape alias Electric.Shapes.Querying + alias Electric.Shapes.Shape + alias Electric.Shapes.SubqueryTags + + @refs %{ + ["id"] => :int4, + ["x"] => :int4, + ["y"] => :int4, + ["z"] => :int4, + ["status"] => :text, + ["name"] => :text, + ["a"] => :int4, + ["b"] => :int4 + } + @stack_id "test_stack" + @shape_handle "test_shape" describe "stream_initial_data/4" do test "should give information about the table and the result stream", %{db_conn: conn} do @@ -348,12 +363,65 @@ defmodule Electric.Shapes.QueryingTest do ) assert [ - %{value: %{value: "10", parent_id: "1"}, headers: %{tags: [^tag1]}}, - %{value: %{value: "20", parent_id: nil}, headers: %{tags: [^tag_null]}}, - %{value: %{value: "30", parent_id: "2"}, headers: %{tags: [^tag2]}} + %{ + value: %{value: "10", parent_id: "1"}, + headers: %{ + tags: [^tag1 <> "/", "/1"], + active_conditions: [true, false] + } + }, + %{ + value: %{value: "20", parent_id: nil}, + headers: %{ + tags: [^tag_null <> "/", "/1"], + active_conditions: [false, true] + } + }, + %{ + value: %{value: "30", parent_id: "2"}, + headers: %{ + tags: [^tag2 <> "/", "/1"], + active_conditions: [true, false] + } + } ] = result end + test "if shape has a negated subquery, computes DNF tags and active conditions", %{ + db_conn: conn + } do + for statement <- [ + "CREATE TABLE parent (id SERIAL PRIMARY KEY, excluded BOOLEAN NOT NULL DEFAULT FALSE)", + "CREATE TABLE child (id SERIAL PRIMARY KEY, value INTEGER, parent_id INTEGER REFERENCES parent(id))", + "INSERT INTO parent (excluded) VALUES (false), (true)", + "INSERT INTO child (value, parent_id) VALUES (10, 1), (20, 2)" + ], + do: Postgrex.query!(conn, statement) + + shape = + Shape.new!("child", + where: "parent_id NOT IN (SELECT id FROM parent WHERE excluded = true)", + inspector: {DirectInspector, conn} + ) + + tag1 = + :crypto.hash(:md5, "dummy-stack-id" <> "dummy-shape-handle" <> "v:1") + |> Base.encode16(case: :lower) + + assert [ + %{ + value: %{value: "10", parent_id: "1"}, + headers: %{ + tags: [^tag1], + active_conditions: [true] + } + } + ] = + decode_stream( + Querying.stream_initial_data(conn, "dummy-stack-id", "dummy-shape-handle", shape) + ) + end + test "if shape has a subquery, tags the results (with composite keys)", %{db_conn: conn} do tag1 = :crypto.hash( @@ -523,7 +591,7 @@ defmodule Electric.Shapes.QueryingTest do end end - describe "query_move_in/5 with SubqueryMoves.move_in_where_clause/3" do + describe "query_move_in/5 with Querying.move_in_where_clause/5" do test "preserves space padding for char(n) join columns", %{db_conn: conn} do for statement <- [ "CREATE TABLE parent (id CHAR(8) PRIMARY KEY, value INTEGER)", @@ -540,18 +608,35 @@ defmodule Electric.Shapes.QueryingTest do ) |> fill_handles() + tag1 = SubqueryTags.make_value_hash("dummy-stack-id", "dummy-shape-handle", "ab ") + tag2 = SubqueryTags.make_value_hash("dummy-stack-id", "dummy-shape-handle", "cd ") + + assert [ + %{value: %{parent_id: "ab "}, headers: %{tags: [^tag1]}}, + %{value: %{parent_id: "cd "}, headers: %{tags: [^tag2]}}, + %{value: %{parent_id: "ef "}, headers: %{tags: [_]}} + ] = + decode_stream( + Querying.stream_initial_data(conn, "dummy-stack-id", "dummy-shape-handle", shape) + ) + + {:ok, dnf_plan} = DnfPlan.compile(shape) move_in_values = ["ab ", "cd "] + views_before_move = %{["$sublink", "0"] => MapSet.new()} + views_after_move = %{["$sublink", "0"] => MapSet.new(move_in_values)} assert {where, params} = - SubqueryMoves.move_in_where_clause( - shape, - hd(shape.shape_dependencies_handles), - move_in_values + Querying.move_in_where_clause( + dnf_plan, + 0, + views_before_move, + views_after_move, + shape.where.used_refs ) assert [ - %{value: %{parent_id: "ab "}}, - %{value: %{parent_id: "cd "}} + %{value: %{parent_id: "ab "}, headers: %{tags: [^tag1]}}, + %{value: %{parent_id: "cd "}, headers: %{tags: [^tag2]}} ] = Querying.query_move_in( conn, @@ -580,13 +665,18 @@ defmodule Electric.Shapes.QueryingTest do ) |> fill_handles() - move_in_values = ["1", "2"] + {:ok, dnf_plan} = DnfPlan.compile(shape) + move_in_values = [1, 2] + views_before_move = %{["$sublink", "0"] => MapSet.new()} + views_after_move = %{["$sublink", "0"] => MapSet.new(move_in_values)} assert {where, params} = - SubqueryMoves.move_in_where_clause( - shape, - hd(shape.shape_dependencies_handles), - move_in_values + Querying.move_in_where_clause( + dnf_plan, + 0, + views_before_move, + views_after_move, + shape.where.used_refs ) tag1 = @@ -628,13 +718,18 @@ defmodule Electric.Shapes.QueryingTest do ) |> fill_handles() - move_in_values = [{"1", "1"}, {"2", "2"}] + {:ok, dnf_plan} = DnfPlan.compile(shape) + move_in_values = [{1, 1}, {2, 2}] + views_before_move = %{["$sublink", "0"] => MapSet.new()} + views_after_move = %{["$sublink", "0"] => MapSet.new(move_in_values)} assert {where, params} = - SubqueryMoves.move_in_where_clause( - shape, - hd(shape.shape_dependencies_handles), - move_in_values + Querying.move_in_where_clause( + dnf_plan, + 0, + views_before_move, + views_after_move, + shape.where.used_refs ) tag1 = @@ -665,6 +760,350 @@ defmodule Electric.Shapes.QueryingTest do |> Enum.map(fn [_key, _tags, json] -> json end) |> decode_stream() end + + test "returns rows that become visible when the same dependency is used twice in a conjunction", + %{db_conn: conn} do + for statement <- [ + "CREATE TABLE dep (id INTEGER PRIMARY KEY, value INTEGER)", + "CREATE TABLE child (id INTEGER PRIMARY KEY, x INTEGER, y INTEGER)", + "INSERT INTO dep (id, value) VALUES (1, 10), (2, 20)", + "INSERT INTO child (id, x, y) VALUES (1, 1, 1), (2, 2, 1), (3, 1, 2), (4, 2, 2)" + ], + do: Postgrex.query!(conn, statement) + + shape = + Shape.new!("child", + where: "x IN (SELECT id FROM dep) AND y IN (SELECT id FROM dep)", + inspector: {DirectInspector, conn} + ) + |> fill_handles() + + assert length(shape.shape_dependencies) == 1 + + {:ok, dnf_plan} = DnfPlan.compile(shape) + views_before_move = %{["$sublink", "0"] => MapSet.new([1])} + views_after_move = %{["$sublink", "0"] => MapSet.new([1, 2])} + + {where, params} = + Querying.move_in_where_clause( + dnf_plan, + 0, + views_before_move, + views_after_move, + shape.where.used_refs + ) + + rows = + Querying.query_move_in( + conn, + "dummy-stack-id", + "dummy-shape-handle", + shape, + {where, params} + ) + |> Enum.map(fn [_key, _tags, json] -> json end) + |> decode_stream() + + assert rows |> Enum.map(& &1.value.id) |> Enum.sort() == ["2", "3", "4"] + end + + test "does not re-select rows that were already visible through another branch of the same dependency", + %{db_conn: conn} do + for statement <- [ + "CREATE TABLE dep (id INTEGER PRIMARY KEY, value INTEGER)", + "CREATE TABLE child (id INTEGER PRIMARY KEY, x INTEGER, y INTEGER)", + "INSERT INTO dep (id, value) VALUES (1, 10), (2, 20)", + "INSERT INTO child (id, x, y) VALUES (1, 1, 1), (2, 2, 1), (3, 1, 2), (4, 2, 2)" + ], + do: Postgrex.query!(conn, statement) + + shape = + Shape.new!("child", + where: "x IN (SELECT id FROM dep) OR y IN (SELECT id FROM dep)", + inspector: {DirectInspector, conn} + ) + |> fill_handles() + + assert length(shape.shape_dependencies) == 1 + + {:ok, dnf_plan} = DnfPlan.compile(shape) + views_before_move = %{["$sublink", "0"] => MapSet.new([1])} + views_after_move = %{["$sublink", "0"] => MapSet.new([1, 2])} + + {where, params} = + Querying.move_in_where_clause( + dnf_plan, + 0, + views_before_move, + views_after_move, + shape.where.used_refs + ) + + rows = + Querying.query_move_in( + conn, + "dummy-stack-id", + "dummy-shape-handle", + shape, + {where, params} + ) + |> Enum.map(fn [_key, _tags, json] -> json end) + |> decode_stream() + + assert rows |> Enum.map(& &1.value.id) == ["4"] + end + + test "handles repeated negated dependency positions by comparing before and after views", + %{db_conn: conn} do + for statement <- [ + "CREATE TABLE dep (id INTEGER PRIMARY KEY, value INTEGER)", + "CREATE TABLE child (id INTEGER PRIMARY KEY, x INTEGER, y INTEGER)", + "INSERT INTO dep (id, value) VALUES (1, 10), (2, 20)", + "INSERT INTO child (id, x, y) VALUES (1, 1, 1), (2, 1, 3), (3, 3, 1), (4, 3, 3)" + ], + do: Postgrex.query!(conn, statement) + + shape = + Shape.new!("child", + where: "NOT x IN (SELECT id FROM dep) AND NOT y IN (SELECT id FROM dep)", + inspector: {DirectInspector, conn} + ) + |> fill_handles() + + assert length(shape.shape_dependencies) == 1 + + {:ok, dnf_plan} = DnfPlan.compile(shape) + views_before_move = %{["$sublink", "0"] => MapSet.new([1, 2])} + views_after_move = %{["$sublink", "0"] => MapSet.new([2])} + + {where, params} = + Querying.move_in_where_clause( + dnf_plan, + 0, + views_before_move, + views_after_move, + shape.where.used_refs + ) + + rows = + Querying.query_move_in( + conn, + "dummy-stack-id", + "dummy-shape-handle", + shape, + {where, params} + ) + |> Enum.map(fn [_key, _tags, json] -> json end) + |> decode_stream() + + assert rows |> Enum.map(& &1.value.id) |> Enum.sort() == ["1", "2", "3"] + end + end + + describe "move_in_where_clause/5 - x IN sq1 OR y IN sq2" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "move on dep 0 generates candidate for sq1 and exclusion for sq2", + %{plan: plan, where: where} do + views_before_move = %{ + ["$sublink", "0"] => MapSet.new([10]), + ["$sublink", "1"] => MapSet.new([20, 30]) + } + + views_after_move = %{ + ["$sublink", "0"] => MapSet.new([1, 2, 3, 10]), + ["$sublink", "1"] => MapSet.new([20, 30]) + } + + {sql, params} = + Querying.move_in_where_clause( + plan, + 0, + views_before_move, + views_after_move, + where.used_refs + ) + + assert sql =~ "= ANY ($1::" + assert sql =~ "AND NOT" + assert sql =~ "= ANY ($2::" + assert sql =~ "= ANY ($3::" + assert length(params) == 3 + assert Enum.sort(Enum.at(params, 0)) == [1, 2, 3, 10] + assert Enum.at(params, 1) == [10] + assert Enum.sort(Enum.at(params, 2)) == [20, 30] + end + + test "move on dep 1 generates candidate for sq2 and exclusion for sq1", + %{plan: plan, where: where} do + views_before_move = %{ + ["$sublink", "0"] => MapSet.new([5]), + ["$sublink", "1"] => MapSet.new([10]) + } + + views_after_move = %{ + ["$sublink", "0"] => MapSet.new([5]), + ["$sublink", "1"] => MapSet.new([10, 100]) + } + + {sql, params} = + Querying.move_in_where_clause( + plan, + 1, + views_before_move, + views_after_move, + where.used_refs + ) + + assert sql =~ "AND NOT" + assert length(params) == 3 + assert Enum.sort(Enum.at(params, 0)) == [10, 100] + assert Enum.at(params, 1) == [10] + assert Enum.at(params, 2) == [5] + end + end + + describe "move_in_where_clause/5 - (x IN sq1 AND status = 'open') OR y IN sq2" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "move on dep 0 includes row predicate in candidate", + %{plan: plan, where: where} do + views_before_move = %{ + ["$sublink", "0"] => MapSet.new([10]), + ["$sublink", "1"] => MapSet.new([20]) + } + + views_after_move = %{ + ["$sublink", "0"] => MapSet.new([1, 2, 10]), + ["$sublink", "1"] => MapSet.new([20]) + } + + {sql, params} = + Querying.move_in_where_clause( + plan, + 0, + views_before_move, + views_after_move, + where.used_refs + ) + + assert sql =~ "= ANY ($1::" + assert sql =~ "= ANY ($2::" + assert sql =~ ~s|"status" = 'open'| + assert sql =~ "AND NOT" + assert length(params) == 3 + end + end + + describe "move_in_where_clause/5 - negated subqueries" do + test "compares before and after views for x NOT IN sq1" do + {where, deps} = + parse_where_with_sublinks(~S"NOT x IN (SELECT id FROM dep1)", 1) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + {sql, params} = + Querying.move_in_where_clause( + plan, + 0, + %{["$sublink", "0"] => MapSet.new([1, 2, 3])}, + %{["$sublink", "0"] => MapSet.new([3])}, + where.used_refs + ) + + assert sql =~ ~s|NOT ("x" = ANY ($1::| + assert sql =~ ~s|AND NOT (NOT ("x" = ANY ($2::| + assert params == [[3], [1, 2, 3]] + end + + test "compares before and after views for nested negated subqueries" do + {where, deps} = + parse_where_with_sublinks(~S"NOT (x = 7 OR y IN (SELECT id FROM dep1))", 1) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + {sql, params} = + Querying.move_in_where_clause( + plan, + 0, + %{["$sublink", "0"] => MapSet.new([5, 6])}, + %{["$sublink", "0"] => MapSet.new([6])}, + where.used_refs + ) + + assert sql =~ ~s|NOT ("x" = 7)| + assert sql =~ ~s|NOT ("y" = ANY ($1::| + assert sql =~ ~s|NOT ("y" = ANY ($2::| + assert params == [[6], [5, 6]] + end + end + + describe "active_conditions_sql/1" do + test "generates per-position boolean SQL expressions" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + sqls = Querying.active_conditions_sql(plan) + + assert length(sqls) == plan.position_count + + Enum.each(sqls, fn sql -> + assert sql =~ "::boolean" + end) + end + end + + describe "tags_sql/3" do + test "generates per-disjunct tag SQL with position slots" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + sqls = Querying.tags_sql(plan, @stack_id, @shape_handle) + + assert length(sqls) == length(plan.disjuncts) + + Enum.each(sqls, fn sql -> + assert sql =~ "'/' ||" + end) + + [tag0_sql, _tag1_sql] = sqls + assert tag0_sql =~ "md5(" + assert tag0_sql =~ "'1'" + end end defp decode_stream(stream), @@ -681,4 +1120,65 @@ defmodule Electric.Shapes.QueryingTest do handles = Enum.map(filled_deps, &Shape.generate_id/1) %{shape | shape_dependencies: filled_deps, shape_dependencies_handles: handles} end + + defp parse_where_with_sublinks(where_clause, num_deps, opts \\ []) do + sublink_refs = + Keyword.get_lazy(opts, :sublink_refs, fn -> + Map.new(0..(num_deps - 1), fn i -> + {["$sublink", "#{i}"], {:array, :int4}} + end) + end) + + dep_columns = Keyword.get(opts, :dep_columns, nil) + + sublink_queries = + Map.new(0..(num_deps - 1), fn i -> + cols = + if dep_columns do + Enum.at(dep_columns, i) |> Enum.join(", ") + else + "id" + end + + {i, "SELECT #{cols} FROM dep#{i + 1}"} + end) + + all_refs = Map.merge(@refs, sublink_refs) + {:ok, pgquery} = Parser.parse_query(where_clause) + + {:ok, expr} = + Parser.validate_where_ast(pgquery, + refs: all_refs, + sublink_queries: sublink_queries + ) + + deps = + Enum.map(0..(num_deps - 1), fn _i -> + %Shape{ + root_table: {"public", "dep"}, + root_table_id: 100, + root_pk: ["id"], + root_column_count: 1, + where: nil, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + end) + + {expr, deps} + end + + defp make_shape(where, deps) do + %Shape{ + root_table: {"public", "test"}, + root_table_id: 1, + root_pk: ["id"], + root_column_count: 5, + where: where, + selected_columns: ["id", "x", "y", "status"], + explicitly_selected_columns: ["id", "x", "y", "status"], + shape_dependencies: deps, + shape_dependencies_handles: Enum.with_index(deps, fn _, i -> "dep_handle_#{i}" end) + } + end end diff --git a/packages/sync-service/test/electric/shapes/shape/subquery_moves_test.exs b/packages/sync-service/test/electric/shapes/shape/subquery_moves_test.exs deleted file mode 100644 index 46063cb284..0000000000 --- a/packages/sync-service/test/electric/shapes/shape/subquery_moves_test.exs +++ /dev/null @@ -1,218 +0,0 @@ -defmodule Electric.Shapes.Shape.SubqueryMovesTest do - use ExUnit.Case, async: true - - alias Electric.Replication.Eval - alias Electric.Shapes.Shape - alias Electric.Shapes.Shape.SubqueryMoves - - @inspector Support.StubInspector.new( - tables: ["parent", "child"], - columns: [ - %{name: "id", type: "int8", pk_position: 0, type_id: {20, 1}}, - %{name: "value", type: "text", pk_position: nil, type_id: {28, 1}}, - %{name: "parent_id", type: "int8", pk_position: nil, type_id: {20, 1}} - ] - ) - - @composite_inspector Support.StubInspector.new( - tables: ["parent", "child"], - columns: [ - %{name: "id1", type: "int4", pk_position: 0, type_id: {23, 1}}, - %{name: "id2", type: "text", pk_position: 1, type_id: {28, 1}}, - %{name: "col1", type: "int4", pk_position: nil, type_id: {23, 1}}, - %{name: "col2", type: "text", pk_position: nil, type_id: {28, 1}}, - %{name: "value", type: "int4", pk_position: nil, type_id: {23, 1}} - ] - ) - - describe "move_in_where_clause/3" do - test "generates ANY clause for single column subquery" do - # Query in the shape is normalized on casing, and we're matching that casing for this test - shape = - Shape.new!("child", - where: "parent_id IN (SELECT id FROM public.parent WHERE value = '1')", - inspector: @inspector - ) - |> fill_handles() - - move_ins = ["1", "2", "3"] - - {query, params} = - SubqueryMoves.move_in_where_clause( - shape, - Enum.at(shape.shape_dependencies_handles, 0), - move_ins - ) - - assert query == "parent_id = ANY ($1::text[]::int8[])" - assert params == [["1", "2", "3"]] - end - - test "generates unnest clause for composite key subquery" do - shape = - Shape.new!("child", - where: "(col1, col2) IN (SELECT id1, id2 FROM public.parent WHERE value = 1)", - inspector: @composite_inspector - ) - |> fill_handles() - - # Move-ins for composite keys come as tuples - move_ins = [{"1", "a"}, {"2", "b"}] - - {query, params} = - SubqueryMoves.move_in_where_clause( - shape, - Enum.at(shape.shape_dependencies_handles, 0), - move_ins - ) - - assert query == - "(col1, col2) IN (SELECT * FROM unnest($1::text[]::int4[], $2::text[]::text[]))" - - assert params == [["1", "2"], ["a", "b"]] - end - - test "handles shape without where clause in dependency" do - shape = - Shape.new!("child", - where: "parent_id IN (SELECT id FROM public.parent)", - inspector: @inspector - ) - |> fill_handles() - - move_ins = ["1"] - - {query, params} = - SubqueryMoves.move_in_where_clause( - shape, - Enum.at(shape.shape_dependencies_handles, 0), - move_ins - ) - - assert query == "parent_id = ANY ($1::text[]::int8[])" - assert params == [["1"]] - end - end - - describe "make_move_out_control_message/2" do - test "creates control message with patterns for single values" do - shape = %Shape{ - root_table: {"public", "child"}, - root_table_id: 1, - shape_dependencies_handles: ["dep-handle-1"], - tag_structure: [["parent_id"]] - } - - move_outs = [{"dep-handle-1", [{1, "1"}, {2, "2"}, {3, "3"}]}] - - message = - SubqueryMoves.make_move_out_control_message(shape, "stack-id", "shape-handle", move_outs) - - tag1 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:1") - |> Base.encode16(case: :lower) - - tag2 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:2") - |> Base.encode16(case: :lower) - - tag3 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "v:3") - |> Base.encode16(case: :lower) - - assert message == %{ - headers: %{ - event: "move-out", - patterns: [ - %{pos: 0, value: tag1}, - %{pos: 0, value: tag2}, - %{pos: 0, value: tag3} - ] - } - } - end - - test "creates patterns for composite values" do - shape = %Shape{ - root_table: {"public", "child"}, - root_table_id: 1, - shape_dependencies_handles: ["dep-handle-1"], - tag_structure: [[{:hash_together, ["col1", "col2"]}]] - } - - # Composite keys are represented as lists - move_outs = [{"dep-handle-1", [{{1, "a"}, {"1", "a"}}, {{2, "b"}, {"2", "b"}}]}] - - message = - SubqueryMoves.make_move_out_control_message(shape, "stack-id", "shape-handle", move_outs) - - tag1 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "col1:v:1col2:v:a") - |> Base.encode16(case: :lower) - - tag2 = - :crypto.hash(:md5, "stack-id" <> "shape-handle" <> "col1:v:2col2:v:b") - |> Base.encode16(case: :lower) - - assert message == %{ - headers: %{ - event: "move-out", - patterns: [%{pos: 0, value: tag1}, %{pos: 0, value: tag2}] - } - } - end - end - - describe "move_in_tag_structure/1" do - test "returns empty list for shape without where clause" do - shape = Shape.new!("child", inspector: @inspector) - - assert SubqueryMoves.move_in_tag_structure(shape) == {[], %{}} - end - - test "returns empty list for shape without dependencies" do - shape = Shape.new!("child", where: "parent_id > 5", inspector: @inspector) - - assert SubqueryMoves.move_in_tag_structure(shape) == {[], %{}} - end - - test "extracts single column reference from sublink" do - shape = - Shape.new!("child", - where: "parent_id IN (SELECT id FROM parent)", - inspector: @inspector - ) - - result = SubqueryMoves.move_in_tag_structure(shape) - - assert {[["parent_id"]], - %{["$sublink", "0"] => %Eval.Expr{eval: %Eval.Parser.Ref{path: ["parent_id"]}}}} = - result - end - - test "extracts composite key references from row expression" do - shape = - Shape.new!("child", - where: "(col1, col2) IN (SELECT id1, id2 FROM parent)", - inspector: @composite_inspector - ) - - result = SubqueryMoves.move_in_tag_structure(shape) - - assert {[[{:hash_together, ["col1", "col2"]}]], - %{ - ["$sublink", "0"] => %Eval.Expr{ - eval: %Eval.Parser.RowExpr{ - elements: [%Eval.Parser.Ref{path: ["col1"]}, %Eval.Parser.Ref{path: ["col2"]}] - } - } - }} = result - end - end - - defp fill_handles(shape) do - filled_deps = Enum.map(shape.shape_dependencies, &fill_handles/1) - handles = Enum.map(filled_deps, &Shape.generate_id/1) - %{shape | shape_dependencies: filled_deps, shape_dependencies_handles: handles} - end -end diff --git a/packages/sync-service/test/electric/shapes/shape_test.exs b/packages/sync-service/test/electric/shapes/shape_test.exs index ee772356ee..d2bc900e69 100644 --- a/packages/sync-service/test/electric/shapes/shape_test.exs +++ b/packages/sync-service/test/electric/shapes/shape_test.exs @@ -4,6 +4,7 @@ defmodule Electric.Shapes.ShapeTest do alias Electric.Replication.Changes.{NewRecord, DeletedRecord, UpdatedRecord} alias Electric.Replication.Eval.Parser alias Electric.Replication.Changes + alias Electric.Shapes.DnfPlan alias Electric.Shapes.Shape @where Parser.parse_and_validate_expression!("value ILIKE '%matches%'", @@ -13,6 +14,18 @@ defmodule Electric.Shapes.ShapeTest do refs: %{["data"] => {:array, :int4}} ) @relation_id 1 + @refs %{ + ["id"] => :int4, + ["x"] => :int4, + ["y"] => :int4, + ["z"] => :int4, + ["status"] => :text, + ["name"] => :text, + ["a"] => :int4, + ["b"] => :int4 + } + @stack_id "test_stack" + @shape_handle "test_shape" describe "convert_change/2" do test "skips changes for other tables" do @@ -296,6 +309,109 @@ defmodule Electric.Shapes.ShapeTest do ] end + test "uses DNF metadata for streamed changes when a subquery is combined with OR" do + where = + Parser.parse_and_validate_expression!( + "inner_id IN (SELECT id FROM inner_table WHERE include_inner = true) OR include_outer = true", + refs: %{ + ["inner_id"] => :int4, + ["include_outer"] => :bool, + ["$sublink", "0"] => {:array, :int4} + }, + sublink_queries: %{0 => "SELECT id FROM inner_table WHERE include_inner = true"} + ) + + shape = %Shape{ + root_table: {"public", "outer_table"}, + root_table_id: @relation_id, + where: where, + selected_columns: ["id", "inner_id", "include_outer"], + explicitly_selected_columns: ["id", "inner_id", "include_outer"], + shape_dependencies: [ + %Shape{ + root_table: {"public", "inner_table"}, + root_table_id: 2, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + ] + } + + {:ok, dnf_plan} = DnfPlan.compile(shape) + + [converted] = + Shape.convert_change( + shape, + %NewRecord{ + relation: {"public", "outer_table"}, + record: %{"id" => "1", "inner_id" => "1", "include_outer" => "true"} + }, + stack_id: "test_stack", + shape_handle: "test_handle", + extra_refs: + {%{["$sublink", "0"] => MapSet.new()}, %{["$sublink", "0"] => MapSet.new()}}, + dnf_plan: dnf_plan + ) + + subquery_tag = + :crypto.hash(:md5, "test_stack" <> "test_handle" <> "v:1") + |> Base.encode16(case: :lower) + + assert converted.move_tags == [subquery_tag <> "/", "/1"] + assert converted.active_conditions == [false, true] + end + + test "keeps updates when only active_conditions change after column filtering" do + where = + Parser.parse_and_validate_expression!( + "inner_id IN (SELECT id FROM inner_table WHERE include_inner = true) OR include_outer = true", + refs: %{ + ["inner_id"] => :int4, + ["include_outer"] => :bool, + ["$sublink", "0"] => {:array, :int4} + }, + sublink_queries: %{0 => "SELECT id FROM inner_table WHERE include_inner = true"} + ) + + shape = %Shape{ + root_table: {"public", "outer_table"}, + root_table_id: @relation_id, + where: where, + selected_columns: ["id"], + explicitly_selected_columns: ["id"], + shape_dependencies: [ + %Shape{ + root_table: {"public", "inner_table"}, + root_table_id: 2, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + ] + } + + {:ok, dnf_plan} = DnfPlan.compile(shape) + + [converted] = + Shape.convert_change( + shape, + %UpdatedRecord{ + relation: {"public", "outer_table"}, + old_record: %{"id" => "1", "inner_id" => "1", "include_outer" => "true"}, + record: %{"id" => "1", "inner_id" => "1", "include_outer" => "true"} + }, + stack_id: "test_stack", + shape_handle: "test_handle", + extra_refs: + {%{["$sublink", "0"] => MapSet.new([1])}, %{["$sublink", "0"] => MapSet.new()}}, + dnf_plan: dnf_plan + ) + + assert converted.old_record == %{"id" => "1"} + assert converted.record == %{"id" => "1"} + assert converted.active_conditions == [false, true] + assert converted.removed_move_tags == [] + end + test "correctly converts updates to deleted records with subqueries if the referenced set has changed" do shape = %Shape{ root_table: {"public", "table"}, @@ -610,6 +726,37 @@ defmodule Electric.Shapes.ShapeTest do ) end + @tag with_sql: [ + "CREATE TABLE IF NOT EXISTS accounts (id INT PRIMARY KEY)", + "CREATE TABLE IF NOT EXISTS users (id INT PRIMARY KEY, account_id INT REFERENCES accounts(id), active BOOLEAN NOT NULL DEFAULT false)" + ] + test "deduplicates identical subqueries onto one dependency", %{inspector: inspector} do + assert {:ok, + %Shape{ + where: where, + shape_dependencies: [ + %Shape{ + root_table: {"public", "accounts"}, + where: %{query: "id > 5"} + } + ], + subquery_comparison_expressions: comparison_expressions + }} = + Shape.new("users", + inspector: inspector, + where: + "(active = true OR account_id IN (SELECT id FROM accounts WHERE id > 5)) AND account_id IN (SELECT id FROM accounts WHERE id > 5)" + ) + + assert where.used_refs == %{ + ["active"] => :bool, + ["account_id"] => :int4, + ["$sublink", "0"] => {:array, :int4} + } + + assert Map.keys(comparison_expressions) == [["$sublink", "0"]] + end + @tag with_sql: [ "CREATE TABLE IF NOT EXISTS project (id INT PRIMARY KEY, value INT NOT NULL)", "CREATE TABLE IF NOT EXISTS item (id INT PRIMARY KEY, value INT NOT NULL)" @@ -1015,4 +1162,325 @@ defmodule Electric.Shapes.ShapeTest do refute Shape.comparable(shape1) === Shape.comparable(shape2) end end + + describe "get_row_metadata/6 - single subquery" do + test "row included when value is in subquery view" do + {where, deps} = parse_where_with_sublinks(~S"x IN (SELECT id FROM dep)", 1) + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5])} + + assert {:ok, true, tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [true] + assert length(tags) == 1 + end + + test "row excluded when value is not in subquery view" do + {where, deps} = parse_where_with_sublinks(~S"x IN (SELECT id FROM dep)", 1) + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([99])} + + assert {:ok, false, _tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [false] + end + end + + describe "get_row_metadata/6 - OR with subqueries" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "included via first disjunct only", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, true, tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [true, false] + assert length(tags) == 2 + end + + test "included via second disjunct only", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, _tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [false, true] + end + + test "included via both disjuncts", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, _tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [true, true] + end + + test "excluded when neither disjunct satisfied", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([99]), ["$sublink", "1"] => MapSet.new([99])} + + assert {:ok, false, _tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert active_conditions == [false, false] + end + end + + describe "get_row_metadata/6 - mixed row predicate and subquery" do + setup do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + %{plan: plan, where: where} + end + + test "included via first disjunct when subquery matches and row predicate true", + %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, true, _tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert Enum.count(active_conditions, & &1) == 2 + end + + test "excluded from first disjunct when row predicate false", %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "closed"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, false, _tags, active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + row_pred_pos = + plan.positions + |> Enum.find(fn {_pos, info} -> not info.is_subquery end) + |> elem(0) + + refute Enum.at(active_conditions, row_pred_pos) + end + + test "included via second disjunct even when first disjunct row predicate false", + %{plan: plan, where: where} do + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "closed"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, _tags, _active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + end + end + + describe "get_row_metadata/6 - tags" do + test "tags have correct structure with slots per position" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + assert {:ok, true, tags, _active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + assert length(tags) == 2 + + [tag0, tag1] = tags + [slot0_0, slot0_1] = String.split(tag0, "/") + assert slot0_0 != "" + assert slot0_1 == "" + + [slot1_0, slot1_1] = String.split(tag1, "/") + assert slot1_0 == "" + assert slot1_1 != "" + end + + test "row predicate positions get sentinel value in tags" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + + record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([])} + + assert {:ok, true, tags, _active_conditions} = + Shape.get_row_metadata(plan, record, views, where, @stack_id, @shape_handle) + + [tag0 | _] = tags + slots = String.split(tag0, "/") + + row_pred_pos = + plan.positions + |> Enum.find(fn {_pos, info} -> not info.is_subquery end) + |> elem(0) + + assert Enum.at(slots, row_pred_pos) == "1" + end + end + + describe "get_row_metadata/6 - update scenarios" do + test "update that changes which disjuncts are satisfied" do + {where, deps} = + parse_where_with_sublinks( + ~S"(x IN (SELECT id FROM dep1) AND status = 'open') OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + views = %{["$sublink", "0"] => MapSet.new([5]), ["$sublink", "1"] => MapSet.new([10])} + + old_record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + + assert {:ok, true, old_tags, old_ac} = + Shape.get_row_metadata(plan, old_record, views, where, @stack_id, @shape_handle) + + new_record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "closed"} + + assert {:ok, true, new_tags, new_ac} = + Shape.get_row_metadata(plan, new_record, views, where, @stack_id, @shape_handle) + + row_pred_pos = + plan.positions + |> Enum.find(fn {_pos, info} -> not info.is_subquery end) + |> elem(0) + + assert Enum.at(old_ac, row_pred_pos) == true + assert Enum.at(new_ac, row_pred_pos) == false + + removed_tags = old_tags -- new_tags + assert removed_tags == [] or length(removed_tags) >= 0 + end + + test "correct removed_tags when column values change" do + {where, deps} = + parse_where_with_sublinks( + ~S"x IN (SELECT id FROM dep1) OR y IN (SELECT id FROM dep2)", + 2 + ) + + shape = make_shape(where, deps) + {:ok, plan} = DnfPlan.compile(shape) + views = %{["$sublink", "0"] => MapSet.new([5, 99]), ["$sublink", "1"] => MapSet.new([10])} + + old_record = %{"id" => "1", "x" => "5", "y" => "10", "status" => "open"} + + {:ok, _old_incl, old_tags, _old_ac} = + Shape.get_row_metadata(plan, old_record, views, where, @stack_id, @shape_handle) + + new_record = %{"id" => "1", "x" => "99", "y" => "10", "status" => "open"} + + {:ok, _new_incl, new_tags, _new_ac} = + Shape.get_row_metadata(plan, new_record, views, where, @stack_id, @shape_handle) + + [old_tag0, _] = old_tags + [new_tag0, _] = new_tags + assert old_tag0 != new_tag0 + + [_, old_tag1] = old_tags + [_, new_tag1] = new_tags + assert old_tag1 == new_tag1 + + removed_tags = old_tags -- new_tags + assert length(removed_tags) == 1 + end + end + + defp parse_where_with_sublinks(where_clause, num_deps, opts \\ []) do + sublink_refs = + Keyword.get_lazy(opts, :sublink_refs, fn -> + Map.new(0..(num_deps - 1), fn i -> + {["$sublink", "#{i}"], {:array, :int4}} + end) + end) + + dep_columns = Keyword.get(opts, :dep_columns, nil) + + sublink_queries = + Map.new(0..(num_deps - 1), fn i -> + cols = + if dep_columns do + Enum.at(dep_columns, i) |> Enum.join(", ") + else + "id" + end + + {i, "SELECT #{cols} FROM dep#{i + 1}"} + end) + + all_refs = Map.merge(@refs, sublink_refs) + {:ok, pgquery} = Parser.parse_query(where_clause) + + {:ok, expr} = + Parser.validate_where_ast(pgquery, + refs: all_refs, + sublink_queries: sublink_queries + ) + + deps = + Enum.map(0..(num_deps - 1), fn _i -> + %Shape{ + root_table: {"public", "dep"}, + root_table_id: 100, + root_pk: ["id"], + root_column_count: 1, + where: nil, + selected_columns: ["id"], + explicitly_selected_columns: ["id"] + } + end) + + {expr, deps} + end + + defp make_shape(where, deps) do + %Shape{ + root_table: {"public", "test"}, + root_table_id: 1, + root_pk: ["id"], + root_column_count: 5, + where: where, + selected_columns: ["id", "x", "y", "status"], + explicitly_selected_columns: ["id", "x", "y", "status"], + shape_dependencies: deps, + shape_dependencies_handles: Enum.with_index(deps, fn _, i -> "dep_handle_#{i}" end) + } + end end diff --git a/packages/sync-service/test/integration/subquery_move_out_test.exs b/packages/sync-service/test/integration/subquery_move_out_test.exs index ffacbbe1ea..ce6e1e86a0 100644 --- a/packages/sync-service/test/integration/subquery_move_out_test.exs +++ b/packages/sync-service/test/integration/subquery_move_out_test.exs @@ -357,6 +357,90 @@ defmodule Electric.Integration.SubqueryMoveOutTest do end end + describe "negated subquery move-in and move-out" do + setup [:with_unique_db, :with_inner_outer_tables, :with_sql_execute] + setup :with_complete_stack + setup :with_electric_client + + # Shape: outer rows whose inner row is NOT active + @negated_where "inner_id NOT IN (SELECT id FROM inner_table WHERE active = true)" + + setup _ctx do + shape = ShapeDefinition.new!("outer_table", where: @negated_where) + %{shape: shape} + end + + @tag with_sql: [ + "INSERT INTO inner_table (id, active) VALUES ('inner-1', true)", + "INSERT INTO outer_table (id, inner_id, value) VALUES ('outer-1', 'inner-1', 'test value')" + ] + test "outer row enters shape when inner row becomes inactive (negated move-in)", %{ + client: client, + shape: shape, + db_conn: db_conn + } do + stream = Client.stream(client, shape, live: true) + + with_consumer stream do + # Initially outer-1 is not in shape because inner-1 is active. + # NOT IN active inner rows means only outer rows linked to inactive + # inner rows are included. + assert_up_to_date(consumer) + + # Deactivate inner-1 and outer-1 should enter the shape. + Postgrex.query!(db_conn, "UPDATE inner_table SET active = false WHERE id = 'inner-1'", []) + + assert_insert(consumer, %{"id" => "outer-1"}) + end + end + + @tag with_sql: [ + "INSERT INTO inner_table (id, active) VALUES ('inner-1', false)", + "INSERT INTO outer_table (id, inner_id, value) VALUES ('outer-1', 'inner-1', 'test value')" + ] + test "outer row leaves shape when inner row becomes active (negated move-out)", %{ + client: client, + shape: shape, + db_conn: db_conn + } do + stream = Client.stream(client, shape, live: true) + + with_consumer stream do + # Initially outer-1 is in shape because inner-1 is inactive. + assert_insert(consumer, %{"id" => "outer-1"}) + assert_up_to_date(consumer) + + # Activate inner-1 and outer-1 should leave the shape. + Postgrex.query!(db_conn, "UPDATE inner_table SET active = true WHERE id = 'inner-1'", []) + + assert_delete(consumer, %{"id" => "outer-1"}) + end + end + end + + # Helper to set up inner/outer tables for subquery tests + def with_inner_outer_tables(%{db_conn: conn} = _context) do + statements = [ + """ + CREATE TABLE inner_table ( + id TEXT PRIMARY KEY, + active BOOLEAN NOT NULL DEFAULT true + ) + """, + """ + CREATE TABLE outer_table ( + id TEXT PRIMARY KEY, + inner_id TEXT NOT NULL REFERENCES inner_table(id) ON DELETE CASCADE, + value TEXT NOT NULL + ) + """ + ] + + Enum.each(statements, &Postgrex.query!(conn, &1, [])) + + %{tables: [{"public", "inner_table"}, {"public", "outer_table"}]} + end + # Helper to set up parent/child tables for subquery tests def with_parent_child_tables(%{db_conn: conn} = _context) do statements = [ diff --git a/packages/sync-service/test/support/oracle_harness.ex b/packages/sync-service/test/support/oracle_harness.ex index 7e3d3660fe..fb382577c0 100644 --- a/packages/sync-service/test/support/oracle_harness.ex +++ b/packages/sync-service/test/support/oracle_harness.ex @@ -77,7 +77,7 @@ defmodule Support.OracleHarness do @spec test_against_oracle(map(), [shape()], [batch()], map()) :: :ok def test_against_oracle(ctx, shapes, batches, opts \\ %{}) do opts = Map.merge(default_opts_from_env(), opts) - timeout_ms = opts[:timeout_ms] || @default_timeout_ms + timeout_ms = opts[:timeout_ms] || env_int("CHECK_TIMEOUT") || @default_timeout_ms log_test_config(shapes, batches) diff --git a/packages/sync-service/test/support/oracle_harness/shape_checker.ex b/packages/sync-service/test/support/oracle_harness/shape_checker.ex index 425c3b123f..82161fc2cb 100644 --- a/packages/sync-service/test/support/oracle_harness/shape_checker.ex +++ b/packages/sync-service/test/support/oracle_harness/shape_checker.ex @@ -29,8 +29,6 @@ defmodule Support.OracleHarness.ShapeChecker do alias Electric.Client.ShapeDefinition alias Electric.Client.ShapeState - @default_timeout_ms 10_000 - defstruct [ :name, :table, @@ -90,7 +88,7 @@ defmodule Support.OracleHarness.ShapeChecker do Enum.each(shape.pk, &validate_identifier!(&1, "pk column")) shape_def = ShapeDefinition.new!(shape.table, where: shape.where) - timeout_ms = opts[:timeout_ms] || @default_timeout_ms + timeout_ms = Keyword.fetch!(opts, :timeout_ms) state = %__MODULE__{ name: shape.name, diff --git a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex index 0b8fd06025..54527bfb81 100644 --- a/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex +++ b/packages/sync-service/test/support/oracle_harness/where_clause_generator.ex @@ -105,24 +105,24 @@ defmodule Support.OracleHarness.WhereClauseGenerator do defp equality_gen do one_of([ # level_3_id = 'l3-X' - level_3_id_gen() |> map(&{"level_3_id = '#{&1}'", false}), + level_3_id_gen() |> map(&{"level_3_id = '#{&1}'", true}), # id = 'l4-X' - level_4_id_gen() |> map(&{"id = '#{&1}'", false}), + level_4_id_gen() |> map(&{"id = '#{&1}'", true}), # value = 'vX' - value_literal_gen() |> map(&{"value = '#{&1}'", false}) + value_literal_gen() |> map(&{"value = '#{&1}'", true}) ]) end # col > 'val', col <> 'val', etc. defp comparison_gen do bind({member_of(["<", ">", "<=", ">=", "<>"]), value_literal_gen()}, fn {op, val} -> - constant({"value #{op} '#{val}'", false}) + constant({"value #{op} '#{val}'", true}) end) end # col LIKE 'pattern' / col NOT LIKE 'pattern' defp like_gen do - bind({member_of([{"LIKE", false}, {"NOT LIKE", false}]), like_pattern_gen()}, fn + bind({member_of([{"LIKE", true}, {"NOT LIKE", true}]), like_pattern_gen()}, fn {{op, optimized}, pattern} -> constant({"value #{op} '#{pattern}'", optimized}) end) @@ -131,7 +131,7 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # col BETWEEN 'a' AND 'b' / col NOT BETWEEN 'a' AND 'b' defp between_gen do bind( - {member_of([{"BETWEEN", false}, {"NOT BETWEEN", false}]), value_literal_gen(), + {member_of([{"BETWEEN", true}, {"NOT BETWEEN", true}]), value_literal_gen(), value_literal_gen()}, fn {{op, optimized}, v1, v2} -> # Ensure v1 <= v2 for valid BETWEEN @@ -147,12 +147,12 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # level_3_id IN ('l3-1', 'l3-2', ...) bind(list_of(level_3_id_gen(), min_length: 2, max_length: 4), fn ids -> values = ids |> Enum.uniq() |> Enum.map(&"'#{&1}'") |> Enum.join(", ") - constant({"level_3_id IN (#{values})", false}) + constant({"level_3_id IN (#{values})", true}) end), # id IN ('l4-1', 'l4-2', ...) bind(list_of(level_4_id_gen(), min_length: 2, max_length: 4), fn ids -> values = ids |> Enum.uniq() |> Enum.map(&"'#{&1}'") |> Enum.join(", ") - constant({"id IN (#{values})", false}) + constant({"id IN (#{values})", true}) end) ]) end @@ -176,12 +176,12 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # Filter by active flag bool_gen() |> map(fn active -> - {"level_3_id IN (SELECT id FROM level_3 WHERE active = #{active})", false} + {"level_3_id IN (SELECT id FROM level_3 WHERE active = #{active})", true} end), # Filter by level_2_id level_2_id_gen() |> map(fn l2_id -> - {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", false} + {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", true} end) ]) end @@ -193,14 +193,14 @@ defmodule Support.OracleHarness.WhereClauseGenerator do bind({bool_gen(), bool_gen()}, fn {active_l3, active_l2} -> constant( {"level_3_id IN (SELECT id FROM level_3 WHERE active = #{active_l3} AND level_2_id IN (SELECT id FROM level_2 WHERE active = #{active_l2}))", - false} + true} ) end), # Through specific level_1_id level_1_id_gen() |> map(fn l1_id -> {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT id FROM level_2 WHERE level_1_id = '#{l1_id}'))", - false} + true} end) ]) end @@ -210,7 +210,7 @@ defmodule Support.OracleHarness.WhereClauseGenerator do bool_gen() |> map(fn active_l1 -> {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT id FROM level_2 WHERE level_1_id IN (SELECT id FROM level_1 WHERE active = #{active_l1})))", - false} + true} end) end @@ -220,19 +220,19 @@ defmodule Support.OracleHarness.WhereClauseGenerator do case level do 1 -> constant( - {"level_3_id IN (SELECT level_3_id FROM level_3_tags WHERE tag = '#{tag}')", false} + {"level_3_id IN (SELECT level_3_id FROM level_3_tags WHERE tag = '#{tag}')", true} ) 2 -> constant( {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT level_2_id FROM level_2_tags WHERE tag = '#{tag}'))", - false} + true} ) 3 -> constant( {"level_3_id IN (SELECT id FROM level_3 WHERE level_2_id IN (SELECT id FROM level_2 WHERE level_1_id IN (SELECT level_1_id FROM level_1_tags WHERE tag = '#{tag}')))", - false} + true} ) end end) @@ -247,12 +247,12 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # NOT IN with active flag bool_gen() |> map(fn active -> - {"level_3_id NOT IN (SELECT id FROM level_3 WHERE active = #{active})", false} + {"level_3_id NOT IN (SELECT id FROM level_3 WHERE active = #{active})", true} end), # NOT IN with level_2_id level_2_id_gen() |> map(fn l2_id -> - {"level_3_id NOT IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", false} + {"level_3_id NOT IN (SELECT id FROM level_3 WHERE level_2_id = '#{l2_id}')", true} end) ]) end @@ -266,13 +266,8 @@ defmodule Support.OracleHarness.WhereClauseGenerator do defp and_composition(depth) do bind({base_expr_gen(depth - 1), base_expr_gen(depth - 1)}, fn - {{left, left_opt}, {right, right_opt}} -> - has_left_subquery = contains_subquery?(left) - has_right_subquery = contains_subquery?(right) - - # AND with multiple subqueries at same level is NOT optimized - optimized = left_opt and right_opt and not (has_left_subquery and has_right_subquery) - constant({"(#{left}) AND (#{right})", optimized}) + {{left, _left_opt}, {right, _right_opt}} -> + constant({"(#{left}) AND (#{right})", true}) end) end @@ -281,13 +276,8 @@ defmodule Support.OracleHarness.WhereClauseGenerator do defp or_composition(depth) do bind({base_expr_gen(depth - 1), base_expr_gen(depth - 1)}, fn - {{left, left_opt}, {right, right_opt}} -> - # OR with subqueries is typically not optimized - optimized = - left_opt and right_opt and not contains_subquery?(left) and - not contains_subquery?(right) - - constant({"(#{left}) OR (#{right})", optimized}) + {{left, _left_opt}, {right, _right_opt}} -> + constant({"(#{left}) OR (#{right})", true}) end) end @@ -303,22 +293,23 @@ defmodule Support.OracleHarness.WhereClauseGenerator do # a OR b OR c (multiple ORs) {1, multi_or_composition(depth)}, # Subquery OR simple condition - {2, subquery_or_simple(depth)} + {2, subquery_or_simple(depth)}, + # (expr OR expr) AND (expr OR expr) + {2, or_branches_and_composition()} ]) end defp and_or_composition(depth) do bind({or_composition(depth - 1), base_expr_gen(depth - 1)}, fn - {{or_expr, or_opt}, {simple, simple_opt}} -> - optimized = or_opt and simple_opt - constant({"(#{or_expr}) AND (#{simple})", optimized}) + {{or_expr, _or_opt}, {simple, _simple_opt}} -> + constant({"(#{or_expr}) AND (#{simple})", true}) end) end defp not_composition(_depth) do one_of([ # NOT (simple condition) - atomic_with_meta() |> map(fn {expr, _} -> {"NOT (#{expr})", false} end), + atomic_with_meta() |> map(fn {expr, _} -> {"NOT (#{expr})", true} end), # NOT IN subquery not_in_subquery_gen() ]) @@ -328,16 +319,32 @@ defmodule Support.OracleHarness.WhereClauseGenerator do bind(list_of(base_expr_gen(depth - 1), min_length: 2, max_length: 3), fn exprs -> clauses = Enum.map(exprs, fn {expr, _} -> "(#{expr})" end) combined = Enum.join(clauses, " OR ") - # Multi-OR is generally not optimized - constant({combined, false}) + constant({combined, true}) end) end + # (expr OR expr) AND (expr OR expr) — each expr is a subquery or atomic + defp or_branches_and_composition do + bind( + {subquery_or_atomic(), subquery_or_atomic(), subquery_or_atomic(), subquery_or_atomic()}, + fn {{s1, _}, {s2, _}, {s3, _}, {s4, _}} -> + constant({"(#{s1} OR #{s2}) AND (#{s3} OR #{s4})", true}) + end + ) + end + + defp subquery_or_atomic do + frequency([ + {2, subquery_1_level_gen()}, + {1, tag_subquery_gen()}, + {2, atomic_with_meta()} + ]) + end + defp subquery_or_simple(_depth) do bind({subquery_1_level_gen(), atomic_with_meta()}, fn {{subq, _}, {simple, _}} -> - # OR with subquery is not optimized - constant({"(#{subq}) OR (#{simple})", false}) + constant({"(#{subq}) OR (#{simple})", true}) end) end @@ -357,7 +364,4 @@ defmodule Support.OracleHarness.WhereClauseGenerator do {1, or_composition(depth - 1)} ]) end - - # Helper to detect if expression contains a subquery - defp contains_subquery?(expr), do: String.contains?(expr, "SELECT") end diff --git a/packages/sync-service/test/support/pg_expression_generator.ex b/packages/sync-service/test/support/pg_expression_generator.ex index 65ba8a00e5..0c582295cd 100644 --- a/packages/sync-service/test/support/pg_expression_generator.ex +++ b/packages/sync-service/test/support/pg_expression_generator.ex @@ -259,4 +259,112 @@ defmodule Support.PgExpressionGenerator do ] |> one_of() end + + @doc """ + Generates arbitrary WHERE clause expressions by nesting datatype expressions + (including column references) with AND, OR, NOT, and boolean predicates at + multiple depth levels. + + Returns `{sql, refs}` tuples where `refs` is the map of column references + needed to parse the expression. + + This exercises the full combinatorial space — numeric, string, bool, and array + expressions mixed together with column refs and logical connectives — so that + any parseable WHERE clause can be verified for round-trip fidelity. + """ + def where_clause_generator do + # Column-ref-aware leaf expressions that mix literals with column references + refs = %{ + ["int_col"] => :int4, + ["float_col"] => :float8, + ["text_col"] => :text, + ["bool_col"] => :bool, + ["int_arr"] => {:array, :int4}, + ["text_arr"] => {:array, :text} + } + + int_ref = constant(~s|"int_col"|) + float_ref = constant(~s|"float_col"|) + text_ref = constant(~s|"text_col"|) + bool_ref = constant(~s|"bool_col"|) + int_arr_ref = constant(~s|"int_arr"|) + text_arr_ref = constant(~s|"text_arr"|) + + # Mix column refs with literals for each type + int_or_ref = one_of([int_gen(), int_ref]) + numeric_or_ref = one_of([numeric_gen(), int_ref, float_ref]) + str_or_ref = one_of([str_gen(), text_ref]) + bool_or_ref = one_of([bool_gen(), bool_ref]) + + numeric_with_refs = + one_of([ + expression_gen(nullable_type_gen(numeric_or_ref), [ + {:combine_op, numeric_op_gen()}, + {:unary_op, numeric_unary_op_gen()}, + {:comparison_op, comparison_op_gen()}, + {:range_op, range_comparison_op_gen()}, + {:membership_op, membership_op_gen()} + ]), + expression_gen(nullable_type_gen(int_or_ref), [ + {:combine_op, int_op_gen()}, + {:unary_op, int_unary_op_gen()}, + {:comparison_op, comparison_op_gen()}, + {:range_op, range_comparison_op_gen()}, + {:membership_op, membership_op_gen()} + ]) + ]) + + string_with_refs = + expression_gen(nullable_type_gen(str_or_ref), [ + {:combine_op, string_op_gen()}, + {:function_op, string_function_op_gen()}, + {:comparison_op, string_comparison_op_gen()}, + {:comparison_op, comparison_op_gen()}, + {:range_op, range_comparison_op_gen()}, + {:membership_op, membership_op_gen()} + ]) + + bool_with_refs = + expression_gen(nullable_type_gen(bool_or_ref), [ + {:comparison_op, bool_comparison_op_gen()}, + {:unary_op, bool_unary_op_gen()}, + {:predicate_op, predicate_op_gen()} + ]) + + array_with_refs = + one_of([ + expression_gen(one_of([int_arr_ref, array_gen(int_gen(), dimension: 1)]), [ + {:comparison_op, array_comparison_op_gen()}, + {:function_op, array_function_op_gen()}, + {:membership_op, membership_op_gen()} + ]), + expression_gen(one_of([text_arr_ref, array_gen(str_gen(), dimension: 1)]), [ + {:comparison_op, array_comparison_op_gen()}, + {:function_op, array_function_op_gen()}, + {:membership_op, membership_op_gen()} + ]) + ]) + + leaf = + one_of([ + numeric_with_refs, + string_with_refs, + bool_with_refs, + array_with_refs + ]) + + sql_gen = + nested_expression_gen( + leaf, + [ + {:combine_op, bool_comparison_op_gen()}, + {:unary_op, bool_unary_op_gen()}, + {:predicate_op, predicate_op_gen()} + ], + max_nesting: 3 + ) + + # Return {sql, refs} tuples + map(sql_gen, &{&1, refs}) + end end diff --git a/packages/sync-service/test/test_helper.exs b/packages/sync-service/test/test_helper.exs index 58be1f25b5..3055b0198c 100644 --- a/packages/sync-service/test/test_helper.exs +++ b/packages/sync-service/test/test_helper.exs @@ -13,35 +13,44 @@ ExUnit.start(assert_receive_timeout: 400, exclude: [:slow, :oracle], capture_log # causing a deadlock when OTP tries to start both applications. {:ok, _} = Electric.Client.Application.start(:normal, []) +skip_repatch_prewarm? = System.get_env("SKIP_REPATCH_PREWARM") == "true" + # Repatch in async tests has lazy recompilation issues, so as a temporary fix # we force recompilation in the setup. The issue is tracked here: # https://github.com/hissssst/repatch/issues/2 -Repatch.setup( - recompile: [ - # IMPORTANT: When adding a new Repatch.patch(..., mode: :shared, ...) in any - # test file, add the target module to this list. Omitting it causes rare async - # test failures because Repatch recompiles modules on first patch, which - # destroys Erlang trace patterns, invalidates ETS table references, and breaks - # anonymous function closures in concurrent tests. Pre-warming here triggers - # the recompilation once at startup so subsequent patches only update ETS hooks. - Postgrex, - Plug.Conn, - Electric.StatusMonitor, - Electric.Telemetry.Sampler, - Electric.Connection.Manager, - Electric.Connection.Restarter, - Electric.Postgres.Configuration, - Electric.Postgres.Inspector, - Electric.Replication.PublicationManager, - Electric.Replication.ShapeLogCollector, - Electric.ShapeCache, - Electric.ShapeCache.PureFileStorage, - Electric.ShapeCache.ShapeCleaner, - Electric.ShapeCache.ShapeStatus, - Electric.ShapeCache.Storage, - Electric.Shapes.Consumer.Snapshotter, - Electric.Shapes.DynamicConsumerSupervisor, - Electric.Shapes.Shape, - :otel_tracer - ] -) +# +# Repatch does slow down execution so if needed this prewarm can be skipped +# by setting the SKIP_REPATCH_PREWARM environment variable to "true". +# Skipping the prewarm is useful for the postgres oracle tests where performance +# is critical for high load. +if not skip_repatch_prewarm? do + Repatch.setup( + recompile: [ + # IMPORTANT: When adding a new Repatch.patch(..., mode: :shared, ...) in any + # test file, add the target module to this list. Omitting it causes rare async + # test failures because Repatch recompiles modules on first patch, which + # destroys Erlang trace patterns, invalidates ETS table references, and breaks + # anonymous function closures in concurrent tests. Pre-warming here triggers + # the recompilation once at startup so subsequent patches only update ETS hooks. + Postgrex, + Plug.Conn, + Electric.StatusMonitor, + Electric.Telemetry.Sampler, + Electric.Connection.Manager, + Electric.Connection.Restarter, + Electric.Postgres.Configuration, + Electric.Postgres.Inspector, + Electric.Replication.PublicationManager, + Electric.Replication.ShapeLogCollector, + Electric.ShapeCache, + Electric.ShapeCache.PureFileStorage, + Electric.ShapeCache.ShapeCleaner, + Electric.ShapeCache.ShapeStatus, + Electric.ShapeCache.Storage, + Electric.Shapes.Consumer.Snapshotter, + Electric.Shapes.DynamicConsumerSupervisor, + Electric.Shapes.Shape, + :otel_tracer + ] + ) +end