Skip to content

Commit 9b4b9c1

Browse files
authored
Merge pull request #455 from hoijnet/issue/2284-set-operators
Support for WOQL set operators
2 parents 5991c22 + 93986a7 commit 9b4b9c1

File tree

4 files changed

+360
-7
lines changed

4 files changed

+360
-7
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ terminusdb_client_coverage/
3535
*~
3636

3737
venv/
38+
.venv/
3839

3940
# due to using tox and pytest
4041
.tox

CONTRIBUTING.md

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,58 @@ Thanks for interested to contribute to TerminusDB Client, to get started, fork t
44

55
## Setting up dev environment 💻
66

7-
Make sure you have Python>=3.9 installed. We use [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment, to install pipenv:
7+
Make sure you have Python>=3.9 and <3.13 installed.
88

9-
`pip3 install pipenv --upgrade`
9+
[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then set up your development environment using one of the methods below.
1010

11-
[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then in your local repo:
11+
### Option 1: Using venv (recommended)
1212

13-
`pipenv install --dev --pre` or `make init`
13+
Create and activate a virtual environment:
1414

15-
To “editable” install the local Terminus Client Python:
15+
```bash
16+
# Create venv with Python 3.12 (or any version 3.9-3.12)
17+
python3.12 -m venv .venv
1618

17-
`pip3 install -e .`
19+
# Activate the virtual environment
20+
source .venv/bin/activate # On macOS/Linux
21+
# .venv\Scripts\activate # On Windows
1822

19-
**to be able to run integration tests, local installation of docker is required**
23+
# Install the package in editable mode with dev dependencies
24+
pip install -e ".[dev]"
25+
26+
# Install pytest for running tests
27+
pip install pytest
28+
```
29+
30+
### Option 2: Using pipenv
31+
32+
We also support [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment:
33+
34+
```bash
35+
pip install pipenv --upgrade
36+
pipenv install --dev --pre
37+
```
38+
39+
Or simply run `make init`.
40+
41+
To "editable" install the local Terminus Client Python:
42+
43+
`pip install -e .`
44+
45+
### Running a local TerminusDB server
46+
47+
**To run integration tests, you need either Docker or a local TerminusDB server.**
48+
49+
For integration tests, you can either:
50+
51+
1. **Use Docker** (automatic): Tests will automatically start a Docker container if no server is detected
52+
2. **Use a local server**: Start the TerminusDB test server from the main terminusdb repository:
53+
```bash
54+
cd /path/to/terminusdb
55+
./tests/terminusdb-test-server.sh start
56+
```
57+
58+
The test configuration will automatically detect and use an available server.
2059

2160
We use [shed](https://pypi.org/project/shed/) to lint our code. Although you can do it manually by running `shed`, we highly recommend setting up the pre-commit hook to do the linting automatically.
2261

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""
2+
Integration tests for WOQL set operations.
3+
4+
These tests verify the new set operations:
5+
- set_difference
6+
- set_intersection
7+
- set_union
8+
- set_member
9+
- list_to_set
10+
"""
11+
12+
import time
13+
14+
import pytest
15+
16+
from terminusdb_client import Client
17+
from terminusdb_client.woqlquery.woql_query import WOQLQuery
18+
19+
test_user_agent = "terminusdb-client-python-tests"
20+
21+
22+
def extract_values(result_list):
23+
"""Extract raw values from a list of typed literals."""
24+
if not result_list:
25+
return []
26+
return [
27+
item["@value"] if isinstance(item, dict) and "@value" in item else item
28+
for item in result_list
29+
]
30+
31+
32+
class TestWOQLSetOperations:
33+
"""Tests for WOQL set operations."""
34+
35+
@pytest.fixture(autouse=True)
36+
def setup_teardown(self, docker_url):
37+
"""Setup and teardown for each test."""
38+
self.client = Client(docker_url, user_agent=test_user_agent)
39+
self.client.connect()
40+
self.db_name = "test_woql_set_operations"
41+
42+
# Create database for tests
43+
if self.db_name in self.client.list_databases():
44+
self.client.delete_database(self.db_name)
45+
self.client.create_database(self.db_name)
46+
47+
yield
48+
49+
# Cleanup
50+
self.client.delete_database(self.db_name)
51+
52+
def test_set_difference_basic(self):
53+
"""Test set_difference computes difference between two lists."""
54+
query = WOQLQuery().woql_and(
55+
WOQLQuery().eq("v:ListA", [1, 2, 3, 4]),
56+
WOQLQuery().eq("v:ListB", [2, 4]),
57+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
58+
)
59+
60+
result = self.client.query(query)
61+
assert len(result["bindings"]) == 1
62+
assert extract_values(result["bindings"][0]["Diff"]) == [1, 3]
63+
64+
def test_set_difference_subset(self):
65+
"""Test set_difference returns empty when first list is subset."""
66+
query = WOQLQuery().woql_and(
67+
WOQLQuery().eq("v:ListA", [1, 2]),
68+
WOQLQuery().eq("v:ListB", [1, 2, 3]),
69+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
70+
)
71+
72+
result = self.client.query(query)
73+
assert len(result["bindings"]) == 1
74+
assert result["bindings"][0]["Diff"] == []
75+
76+
def test_set_difference_empty_list(self):
77+
"""Test set_difference handles empty lists."""
78+
query = WOQLQuery().woql_and(
79+
WOQLQuery().eq("v:ListA", []),
80+
WOQLQuery().eq("v:ListB", [1]),
81+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
82+
)
83+
84+
result = self.client.query(query)
85+
assert len(result["bindings"]) == 1
86+
assert result["bindings"][0]["Diff"] == []
87+
88+
def test_set_intersection_basic(self):
89+
"""Test set_intersection computes intersection of two lists."""
90+
query = WOQLQuery().woql_and(
91+
WOQLQuery().eq("v:ListA", [1, 2, 3]),
92+
WOQLQuery().eq("v:ListB", [2, 3, 4]),
93+
WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common"),
94+
)
95+
96+
result = self.client.query(query)
97+
assert len(result["bindings"]) == 1
98+
assert extract_values(result["bindings"][0]["Common"]) == [2, 3]
99+
100+
def test_set_intersection_no_common(self):
101+
"""Test set_intersection returns empty when no common elements."""
102+
query = WOQLQuery().woql_and(
103+
WOQLQuery().eq("v:ListA", [1, 2]),
104+
WOQLQuery().eq("v:ListB", [3, 4]),
105+
WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common"),
106+
)
107+
108+
result = self.client.query(query)
109+
assert len(result["bindings"]) == 1
110+
assert result["bindings"][0]["Common"] == []
111+
112+
def test_set_union_basic(self):
113+
"""Test set_union computes union of two lists."""
114+
query = WOQLQuery().woql_and(
115+
WOQLQuery().eq("v:ListA", [1, 2]),
116+
WOQLQuery().eq("v:ListB", [2, 3]),
117+
WOQLQuery().set_union("v:ListA", "v:ListB", "v:All"),
118+
)
119+
120+
result = self.client.query(query)
121+
assert len(result["bindings"]) == 1
122+
assert extract_values(result["bindings"][0]["All"]) == [1, 2, 3]
123+
124+
def test_set_union_removes_duplicates(self):
125+
"""Test set_union removes duplicates."""
126+
query = WOQLQuery().woql_and(
127+
WOQLQuery().eq("v:ListA", [1, 1, 2]),
128+
WOQLQuery().eq("v:ListB", [2, 2]),
129+
WOQLQuery().set_union("v:ListA", "v:ListB", "v:All"),
130+
)
131+
132+
result = self.client.query(query)
133+
assert len(result["bindings"]) == 1
134+
assert extract_values(result["bindings"][0]["All"]) == [1, 2]
135+
136+
def test_set_member_success(self):
137+
"""Test set_member succeeds for element in set."""
138+
query = WOQLQuery().woql_and(
139+
WOQLQuery().eq("v:MySet", [1, 2, 3]), WOQLQuery().set_member(2, "v:MySet")
140+
)
141+
142+
result = self.client.query(query)
143+
assert len(result["bindings"]) == 1
144+
145+
def test_set_member_failure(self):
146+
"""Test set_member fails for element not in set."""
147+
query = WOQLQuery().woql_and(
148+
WOQLQuery().eq("v:MySet", [1, 2, 3]), WOQLQuery().set_member(5, "v:MySet")
149+
)
150+
151+
result = self.client.query(query)
152+
assert len(result["bindings"]) == 0
153+
154+
def test_list_to_set(self):
155+
"""Test list_to_set removes duplicates and sorts."""
156+
query = WOQLQuery().woql_and(
157+
WOQLQuery().eq("v:MyList", [3, 1, 2, 1]),
158+
WOQLQuery().list_to_set("v:MyList", "v:MySet"),
159+
)
160+
161+
result = self.client.query(query)
162+
assert len(result["bindings"]) == 1
163+
assert extract_values(result["bindings"][0]["MySet"]) == [1, 2, 3]
164+
165+
def test_performance_large_sets(self):
166+
"""Test set operations handle large sets efficiently."""
167+
list_a = list(range(1000))
168+
list_b = list(range(500, 1500))
169+
170+
query = WOQLQuery().woql_and(
171+
WOQLQuery().eq("v:ListA", list_a),
172+
WOQLQuery().eq("v:ListB", list_b),
173+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff"),
174+
)
175+
176+
start_time = time.time()
177+
result = self.client.query(query)
178+
elapsed = time.time() - start_time
179+
180+
assert len(result["bindings"]) == 1
181+
assert len(result["bindings"][0]["Diff"]) == 500
182+
183+
# Should complete in under 1 second with O(n log n) algorithm
184+
assert elapsed < 1.0

terminusdb_client/woqlquery/woql_query.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,135 @@ def member(self, member, mem_list):
24262426
self._cursor["list"] = self._value_list(mem_list)
24272427
return self
24282428

2429+
def set_difference(self, list_a, list_b, result):
2430+
"""Computes the set difference between two lists (elements in list_a but not in list_b)
2431+
2432+
Parameters
2433+
----------
2434+
list_a : str or list
2435+
First list or variable
2436+
list_b : str or list
2437+
Second list or variable
2438+
result : str
2439+
Variable to store the result
2440+
2441+
Returns
2442+
-------
2443+
WOQLQuery object
2444+
query object that can be chained and/or execute
2445+
"""
2446+
if list_a and list_a == "args":
2447+
return ["list_a", "list_b", "result"]
2448+
if self._cursor.get("@type"):
2449+
self._wrap_cursor_with_and()
2450+
self._cursor["@type"] = "SetDifference"
2451+
self._cursor["list_a"] = self._value_list(list_a)
2452+
self._cursor["list_b"] = self._value_list(list_b)
2453+
self._cursor["result"] = self._value_list(result)
2454+
return self
2455+
2456+
def set_intersection(self, list_a, list_b, result):
2457+
"""Computes the set intersection of two lists (elements in both list_a and list_b)
2458+
2459+
Parameters
2460+
----------
2461+
list_a : str or list
2462+
First list or variable
2463+
list_b : str or list
2464+
Second list or variable
2465+
result : str
2466+
Variable to store the result
2467+
2468+
Returns
2469+
-------
2470+
WOQLQuery object
2471+
query object that can be chained and/or execute
2472+
"""
2473+
if list_a and list_a == "args":
2474+
return ["list_a", "list_b", "result"]
2475+
if self._cursor.get("@type"):
2476+
self._wrap_cursor_with_and()
2477+
self._cursor["@type"] = "SetIntersection"
2478+
self._cursor["list_a"] = self._value_list(list_a)
2479+
self._cursor["list_b"] = self._value_list(list_b)
2480+
self._cursor["result"] = self._value_list(result)
2481+
return self
2482+
2483+
def set_union(self, list_a, list_b, result):
2484+
"""Computes the set union of two lists (all unique elements from both lists)
2485+
2486+
Parameters
2487+
----------
2488+
list_a : str or list
2489+
First list or variable
2490+
list_b : str or list
2491+
Second list or variable
2492+
result : str
2493+
Variable to store the result
2494+
2495+
Returns
2496+
-------
2497+
WOQLQuery object
2498+
query object that can be chained and/or execute
2499+
"""
2500+
if list_a and list_a == "args":
2501+
return ["list_a", "list_b", "result"]
2502+
if self._cursor.get("@type"):
2503+
self._wrap_cursor_with_and()
2504+
self._cursor["@type"] = "SetUnion"
2505+
self._cursor["list_a"] = self._value_list(list_a)
2506+
self._cursor["list_b"] = self._value_list(list_b)
2507+
self._cursor["result"] = self._value_list(result)
2508+
return self
2509+
2510+
def set_member(self, element, set_list):
2511+
"""Checks if an element is a member of a set (efficient O(log n) lookup)
2512+
2513+
Parameters
2514+
----------
2515+
element : any
2516+
Element to check
2517+
set_list : str or list
2518+
Set (list) to check membership in
2519+
2520+
Returns
2521+
-------
2522+
WOQLQuery object
2523+
query object that can be chained and/or execute
2524+
"""
2525+
if element and element == "args":
2526+
return ["element", "set"]
2527+
if self._cursor.get("@type"):
2528+
self._wrap_cursor_with_and()
2529+
self._cursor["@type"] = "SetMember"
2530+
self._cursor["element"] = self._clean_object(element)
2531+
self._cursor["set"] = self._value_list(set_list)
2532+
return self
2533+
2534+
def list_to_set(self, input_list, result_set):
2535+
"""Converts a list to a set (removes duplicates and sorts)
2536+
2537+
Parameters
2538+
----------
2539+
input_list : str or list
2540+
Input list or variable
2541+
result_set : str
2542+
Variable to store the resulting set
2543+
2544+
Returns
2545+
-------
2546+
WOQLQuery object
2547+
query object that can be chained and/or execute
2548+
"""
2549+
if input_list and input_list == "args":
2550+
return ["list", "set"]
2551+
if self._cursor.get("@type"):
2552+
self._wrap_cursor_with_and()
2553+
self._cursor["@type"] = "ListToSet"
2554+
self._cursor["list"] = self._value_list(input_list)
2555+
self._cursor["set"] = self._value_list(result_set)
2556+
return self
2557+
24292558
def concat(self, concat_list, result):
24302559
"""Concatenates the list of variables into a string and saves the result in v
24312560

0 commit comments

Comments
 (0)