From e834673abf180a792dcb45fd11f0fcf9ea2ebe6a Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Sun, 10 May 2026 14:38:36 +0200 Subject: [PATCH 1/4] Implement test for trie-join dead branch pruning Add a test for trie-join to handle dead branches correctly. --- test/erdos/algo/leapfrog/triejoin_test.clj | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/erdos/algo/leapfrog/triejoin_test.clj b/test/erdos/algo/leapfrog/triejoin_test.clj index 2687545..eefd941 100644 --- a/test/erdos/algo/leapfrog/triejoin_test.clj +++ b/test/erdos/algo/leapfrog/triejoin_test.clj @@ -239,6 +239,21 @@ (is (= 8 (-> seeked ->next (->seek 8) get-key))))))) +(deftest trie-join-prunes-dead-branches + ;; Regression: when one relation matches a key at a level but its nested + ;; structure can't descend (e.g., another constraint inside an inner + ;; trie-join rejects), the outer join must propagate that failure rather + ;; than fall back to the stale iter (which would leak its current-depth + ;; key into later levels' leapfrog and produce phantom truncated routes). + (let [r1 (test-trie-iter [:e :a1 :v1] [[1 :name "Janos"] [2 :name "Bela"]]) + r2 (test-trie-iter [:e :a2 :v2] [[1 :age 30] [3 :age 25]]) + c1 (test-trie-iter [:a1] [[:name]]) + c2 (test-trie-iter [:a2] [[:age]]) + joined (trie-join [:e :a1 :v1 :a2 :v2] [r1 c1 r2 c2])] + ;; Only e=1 has both :name and :age in this storage. + (is (= [{:e 1 :a1 :name :v1 "Janos" :a2 :age :v2 30}] + (relations joined))))) + (def exponents (test-trie-iter [:n1 :n2 :n3] [[1 1 1] From b187b9bd6777c9e9ead6bbdb138ef5537f8bf6e1 Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Sun, 10 May 2026 14:47:32 +0200 Subject: [PATCH 2/4] Update triejoin_test.clj --- test/erdos/algo/leapfrog/triejoin_test.clj | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/erdos/algo/leapfrog/triejoin_test.clj b/test/erdos/algo/leapfrog/triejoin_test.clj index eefd941..bfe5e71 100644 --- a/test/erdos/algo/leapfrog/triejoin_test.clj +++ b/test/erdos/algo/leapfrog/triejoin_test.clj @@ -254,6 +254,36 @@ (is (= [{:e 1 :a1 :name :v1 "Janos" :a2 :age :v2 30}] (relations joined))))) +(deftest nested-trie-join-prunes-dead-branches-2 + ;; Regression: when an *inner* trie-join's leapfrog matches a key at + ;; depth d but its leapfrog at d+1 fails (e.g., a Constants demand + ;; rejects the key's children), the inner trie-open returns nil. The + ;; outer trie-join must propagate that failure; falling back to the + ;; stale inner iter would leak its d-level key into the outer's d+1 + ;; leapfrog (single-iter leapfrog trivially "matches" anything) and + ;; trie-routes/relations would emit a phantom complete-looking tuple + ;; whose later slots are filled with the stale iter's wrong-depth key. + ;; + ;; Structure: outer trie-join over two *nested* trie-joins, each + ;; filtering the same eav by an attribute constant. + (let [eav-tuples [[1 :name "Janos"] [1 :age 30] + [2 :name "Bela"] + [3 :age 25]] + named (trie-join [:e :a1 :v1] + [(test-trie-iter [:e :a1 :v1] eav-tuples) + (test-trie-iter [:a1] [[:name]])]) + aged (trie-join [:e :a2 :v2] + [(test-trie-iter [:e :a2 :v2] eav-tuples) + (test-trie-iter [:a2] [[:age]])]) + outer (trie-join [:e :a1 :v1 :a2 :v2] [named aged])] + ;; Only e=1 has both :name and :age. e=2 has only :name; e=3 has only + ;; :age. With the old (or trie-open i) i) fallback, e=2 and e=3 each + ;; produce a phantom tuple whose unbound slots are filled with the + ;; entity id (e.g. {:e 2 :a1 :name :v1 "Bela" :a2 2 :v2 2}). + (is (= [{:e 1 :a1 :name :v1 "Janos" :a2 :age :v2 30}] + (relations outer))))) + + (def exponents (test-trie-iter [:n1 :n2 :n3] [[1 1 1] From 4b987b3c4cfad2872f8fe4adf0856f1b5d9c51f5 Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Sun, 10 May 2026 14:58:52 +0200 Subject: [PATCH 3/4] Update triejoin_test.clj --- test/erdos/algo/leapfrog/triejoin_test.clj | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/erdos/algo/leapfrog/triejoin_test.clj b/test/erdos/algo/leapfrog/triejoin_test.clj index bfe5e71..0fb625f 100644 --- a/test/erdos/algo/leapfrog/triejoin_test.clj +++ b/test/erdos/algo/leapfrog/triejoin_test.clj @@ -283,6 +283,20 @@ (is (= [{:e 1 :a1 :name :v1 "Janos" :a2 :age :v2 30}] (relations outer))))) +(deftest nested-trie-join-prunes-dead-branches + ;; Regression: when an inner trie-join's leapfrog matches at depth d + ;; but trie-open at d→d+1 fails, the outer must propagate that nil. + ;; Falling back to the stale inner iter would leak its d-level key + ;; into the outer's later leapfrogs and emit a phantom tuple with the + ;; entity id leaking into a sibling's slot. + (let [inner (trie-join [:e :a] [(test-trie-iter [:e :a] [[1 :ok] [2 :no]]) + (test-trie-iter [:a] [[:ok]])]) + sib (test-trie-iter [:e :x] [[1 10] [2 20]]) + outer (trie-join [:e :a :x] [inner sib])] + ;; Only e=1 has :a=:ok. Without the fix, e=2 leaks as the phantom + ;; {:e 2 :a 2 :x 20} — :a slot filled with the stale entity id. + (is (= [{:e 1 :a :ok :x 10}] + (relations outer))))) (def exponents (test-trie-iter [:n1 :n2 :n3] From f02ae3bf045516b43823fb1da9f5b2b0530f0f04 Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Sun, 10 May 2026 15:00:40 +0200 Subject: [PATCH 4/4] Update triejoin_test.clj --- test/erdos/algo/leapfrog/triejoin_test.clj | 48 +--------------------- 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/test/erdos/algo/leapfrog/triejoin_test.clj b/test/erdos/algo/leapfrog/triejoin_test.clj index 0fb625f..6a175ef 100644 --- a/test/erdos/algo/leapfrog/triejoin_test.clj +++ b/test/erdos/algo/leapfrog/triejoin_test.clj @@ -239,50 +239,6 @@ (is (= 8 (-> seeked ->next (->seek 8) get-key))))))) -(deftest trie-join-prunes-dead-branches - ;; Regression: when one relation matches a key at a level but its nested - ;; structure can't descend (e.g., another constraint inside an inner - ;; trie-join rejects), the outer join must propagate that failure rather - ;; than fall back to the stale iter (which would leak its current-depth - ;; key into later levels' leapfrog and produce phantom truncated routes). - (let [r1 (test-trie-iter [:e :a1 :v1] [[1 :name "Janos"] [2 :name "Bela"]]) - r2 (test-trie-iter [:e :a2 :v2] [[1 :age 30] [3 :age 25]]) - c1 (test-trie-iter [:a1] [[:name]]) - c2 (test-trie-iter [:a2] [[:age]]) - joined (trie-join [:e :a1 :v1 :a2 :v2] [r1 c1 r2 c2])] - ;; Only e=1 has both :name and :age in this storage. - (is (= [{:e 1 :a1 :name :v1 "Janos" :a2 :age :v2 30}] - (relations joined))))) - -(deftest nested-trie-join-prunes-dead-branches-2 - ;; Regression: when an *inner* trie-join's leapfrog matches a key at - ;; depth d but its leapfrog at d+1 fails (e.g., a Constants demand - ;; rejects the key's children), the inner trie-open returns nil. The - ;; outer trie-join must propagate that failure; falling back to the - ;; stale inner iter would leak its d-level key into the outer's d+1 - ;; leapfrog (single-iter leapfrog trivially "matches" anything) and - ;; trie-routes/relations would emit a phantom complete-looking tuple - ;; whose later slots are filled with the stale iter's wrong-depth key. - ;; - ;; Structure: outer trie-join over two *nested* trie-joins, each - ;; filtering the same eav by an attribute constant. - (let [eav-tuples [[1 :name "Janos"] [1 :age 30] - [2 :name "Bela"] - [3 :age 25]] - named (trie-join [:e :a1 :v1] - [(test-trie-iter [:e :a1 :v1] eav-tuples) - (test-trie-iter [:a1] [[:name]])]) - aged (trie-join [:e :a2 :v2] - [(test-trie-iter [:e :a2 :v2] eav-tuples) - (test-trie-iter [:a2] [[:age]])]) - outer (trie-join [:e :a1 :v1 :a2 :v2] [named aged])] - ;; Only e=1 has both :name and :age. e=2 has only :name; e=3 has only - ;; :age. With the old (or trie-open i) i) fallback, e=2 and e=3 each - ;; produce a phantom tuple whose unbound slots are filled with the - ;; entity id (e.g. {:e 2 :a1 :name :v1 "Bela" :a2 2 :v2 2}). - (is (= [{:e 1 :a1 :name :v1 "Janos" :a2 :age :v2 30}] - (relations outer))))) - (deftest nested-trie-join-prunes-dead-branches ;; Regression: when an inner trie-join's leapfrog matches at depth d ;; but trie-open at d→d+1 fails, the outer must propagate that nil. @@ -295,8 +251,8 @@ outer (trie-join [:e :a :x] [inner sib])] ;; Only e=1 has :a=:ok. Without the fix, e=2 leaks as the phantom ;; {:e 2 :a 2 :x 20} — :a slot filled with the stale entity id. - (is (= [{:e 1 :a :ok :x 10}] - (relations outer))))) + (is (= [{:e 1 :a :ok :x 10}] (relations outer))))) + (def exponents (test-trie-iter [:n1 :n2 :n3]