Skip to content

Commit 82bc07b

Browse files
committed
Support for set WOQL operators
1 parent e23e38b commit 82bc07b

File tree

4 files changed

+359
-7
lines changed

4 files changed

+359
-7
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ terminusdb_client_coverage/
3535
*~
3636

3737
venv/
38+
.venv/
3839

3940
# due to using tox and pytest
4041
.tox

CONTRIBUTING.md

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,58 @@ Thanks for interested to contribute to TerminusDB Client, to get started, fork t
44

55
## Setting up dev environment 💻
66

7-
Make sure you have Python>=3.9 installed. We use [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment, to install pipenv:
7+
Make sure you have Python>=3.9 and <3.13 installed.
88

9-
`pip3 install pipenv --upgrade`
9+
[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then set up your development environment using one of the methods below.
1010

11-
[Fork and clone](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) this repo, then in your local repo:
11+
### Option 1: Using venv (recommended)
1212

13-
`pipenv install --dev --pre` or `make init`
13+
Create and activate a virtual environment:
1414

15-
To “editable” install the local Terminus Client Python:
15+
```bash
16+
# Create venv with Python 3.12 (or any version 3.9-3.12)
17+
python3.12 -m venv .venv
1618

17-
`pip3 install -e .`
19+
# Activate the virtual environment
20+
source .venv/bin/activate # On macOS/Linux
21+
# .venv\Scripts\activate # On Windows
1822

19-
**to be able to run integration tests, local installation of docker is required**
23+
# Install the package in editable mode with dev dependencies
24+
pip install -e ".[dev]"
25+
26+
# Install pytest for running tests
27+
pip install pytest
28+
```
29+
30+
### Option 2: Using pipenv
31+
32+
We also support [pipenv](https://pipenv-fork.readthedocs.io/en/latest/) for dev environment:
33+
34+
```bash
35+
pip install pipenv --upgrade
36+
pipenv install --dev --pre
37+
```
38+
39+
Or simply run `make init`.
40+
41+
To "editable" install the local Terminus Client Python:
42+
43+
`pip install -e .`
44+
45+
### Running a local TerminusDB server
46+
47+
**To run integration tests, you need either Docker or a local TerminusDB server.**
48+
49+
For integration tests, you can either:
50+
51+
1. **Use Docker** (automatic): Tests will automatically start a Docker container if no server is detected
52+
2. **Use a local server**: Start the TerminusDB test server from the main terminusdb repository:
53+
```bash
54+
cd /path/to/terminusdb
55+
./tests/terminusdb-test-server.sh start
56+
```
57+
58+
The test configuration will automatically detect and use an available server.
2059

2160
We use [shed](https://pypi.org/project/shed/) to lint our code. Although you can do it manually by running `shed`, we highly recommend setting up the pre-commit hook to do the linting automatically.
2261

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
"""
2+
Integration tests for WOQL set operations.
3+
4+
These tests verify the new set operations:
5+
- set_difference
6+
- set_intersection
7+
- set_union
8+
- set_member
9+
- list_to_set
10+
"""
11+
import time
12+
13+
import pytest
14+
15+
from terminusdb_client import Client
16+
from terminusdb_client.woqlquery.woql_query import WOQLQuery
17+
18+
test_user_agent = "terminusdb-client-python-tests"
19+
20+
21+
def extract_values(result_list):
22+
"""Extract raw values from a list of typed literals."""
23+
if not result_list:
24+
return []
25+
return [item["@value"] if isinstance(item, dict) and "@value" in item else item
26+
for item in result_list]
27+
28+
29+
class TestWOQLSetOperations:
30+
"""Tests for WOQL set operations."""
31+
32+
@pytest.fixture(autouse=True)
33+
def setup_teardown(self, docker_url):
34+
"""Setup and teardown for each test."""
35+
self.client = Client(docker_url, user_agent=test_user_agent)
36+
self.client.connect()
37+
self.db_name = "test_woql_set_operations"
38+
39+
# Create database for tests
40+
if self.db_name in self.client.list_databases():
41+
self.client.delete_database(self.db_name)
42+
self.client.create_database(self.db_name)
43+
44+
yield
45+
46+
# Cleanup
47+
self.client.delete_database(self.db_name)
48+
49+
def test_set_difference_basic(self):
50+
"""Test set_difference computes difference between two lists."""
51+
query = WOQLQuery().woql_and(
52+
WOQLQuery().eq("v:ListA", [1, 2, 3, 4]),
53+
WOQLQuery().eq("v:ListB", [2, 4]),
54+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff")
55+
)
56+
57+
result = self.client.query(query)
58+
assert len(result["bindings"]) == 1
59+
assert extract_values(result["bindings"][0]["Diff"]) == [1, 3]
60+
61+
def test_set_difference_subset(self):
62+
"""Test set_difference returns empty when first list is subset."""
63+
query = WOQLQuery().woql_and(
64+
WOQLQuery().eq("v:ListA", [1, 2]),
65+
WOQLQuery().eq("v:ListB", [1, 2, 3]),
66+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff")
67+
)
68+
69+
result = self.client.query(query)
70+
assert len(result["bindings"]) == 1
71+
assert result["bindings"][0]["Diff"] == []
72+
73+
def test_set_difference_empty_list(self):
74+
"""Test set_difference handles empty lists."""
75+
query = WOQLQuery().woql_and(
76+
WOQLQuery().eq("v:ListA", []),
77+
WOQLQuery().eq("v:ListB", [1]),
78+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff")
79+
)
80+
81+
result = self.client.query(query)
82+
assert len(result["bindings"]) == 1
83+
assert result["bindings"][0]["Diff"] == []
84+
85+
def test_set_intersection_basic(self):
86+
"""Test set_intersection computes intersection of two lists."""
87+
query = WOQLQuery().woql_and(
88+
WOQLQuery().eq("v:ListA", [1, 2, 3]),
89+
WOQLQuery().eq("v:ListB", [2, 3, 4]),
90+
WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common")
91+
)
92+
93+
result = self.client.query(query)
94+
assert len(result["bindings"]) == 1
95+
assert extract_values(result["bindings"][0]["Common"]) == [2, 3]
96+
97+
def test_set_intersection_no_common(self):
98+
"""Test set_intersection returns empty when no common elements."""
99+
query = WOQLQuery().woql_and(
100+
WOQLQuery().eq("v:ListA", [1, 2]),
101+
WOQLQuery().eq("v:ListB", [3, 4]),
102+
WOQLQuery().set_intersection("v:ListA", "v:ListB", "v:Common")
103+
)
104+
105+
result = self.client.query(query)
106+
assert len(result["bindings"]) == 1
107+
assert result["bindings"][0]["Common"] == []
108+
109+
def test_set_union_basic(self):
110+
"""Test set_union computes union of two lists."""
111+
query = WOQLQuery().woql_and(
112+
WOQLQuery().eq("v:ListA", [1, 2]),
113+
WOQLQuery().eq("v:ListB", [2, 3]),
114+
WOQLQuery().set_union("v:ListA", "v:ListB", "v:All")
115+
)
116+
117+
result = self.client.query(query)
118+
assert len(result["bindings"]) == 1
119+
assert extract_values(result["bindings"][0]["All"]) == [1, 2, 3]
120+
121+
def test_set_union_removes_duplicates(self):
122+
"""Test set_union removes duplicates."""
123+
query = WOQLQuery().woql_and(
124+
WOQLQuery().eq("v:ListA", [1, 1, 2]),
125+
WOQLQuery().eq("v:ListB", [2, 2]),
126+
WOQLQuery().set_union("v:ListA", "v:ListB", "v:All")
127+
)
128+
129+
result = self.client.query(query)
130+
assert len(result["bindings"]) == 1
131+
assert extract_values(result["bindings"][0]["All"]) == [1, 2]
132+
133+
def test_set_member_success(self):
134+
"""Test set_member succeeds for element in set."""
135+
query = WOQLQuery().woql_and(
136+
WOQLQuery().eq("v:MySet", [1, 2, 3]),
137+
WOQLQuery().set_member(2, "v:MySet")
138+
)
139+
140+
result = self.client.query(query)
141+
assert len(result["bindings"]) == 1
142+
143+
def test_set_member_failure(self):
144+
"""Test set_member fails for element not in set."""
145+
query = WOQLQuery().woql_and(
146+
WOQLQuery().eq("v:MySet", [1, 2, 3]),
147+
WOQLQuery().set_member(5, "v:MySet")
148+
)
149+
150+
result = self.client.query(query)
151+
assert len(result["bindings"]) == 0
152+
153+
def test_list_to_set(self):
154+
"""Test list_to_set removes duplicates and sorts."""
155+
query = WOQLQuery().woql_and(
156+
WOQLQuery().eq("v:MyList", [3, 1, 2, 1]),
157+
WOQLQuery().list_to_set("v:MyList", "v:MySet")
158+
)
159+
160+
result = self.client.query(query)
161+
assert len(result["bindings"]) == 1
162+
assert extract_values(result["bindings"][0]["MySet"]) == [1, 2, 3]
163+
164+
def test_performance_large_sets(self):
165+
"""Test set operations handle large sets efficiently."""
166+
list_a = list(range(1000))
167+
list_b = list(range(500, 1500))
168+
169+
query = WOQLQuery().woql_and(
170+
WOQLQuery().eq("v:ListA", list_a),
171+
WOQLQuery().eq("v:ListB", list_b),
172+
WOQLQuery().set_difference("v:ListA", "v:ListB", "v:Diff")
173+
)
174+
175+
start_time = time.time()
176+
result = self.client.query(query)
177+
elapsed = time.time() - start_time
178+
179+
assert len(result["bindings"]) == 1
180+
assert len(result["bindings"][0]["Diff"]) == 500
181+
182+
# Should complete in under 1 second with O(n log n) algorithm
183+
assert elapsed < 1.0

terminusdb_client/woqlquery/woql_query.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,135 @@ def member(self, member, mem_list):
24262426
self._cursor["list"] = self._value_list(mem_list)
24272427
return self
24282428

2429+
def set_difference(self, list_a, list_b, result):
2430+
"""Computes the set difference between two lists (elements in list_a but not in list_b)
2431+
2432+
Parameters
2433+
----------
2434+
list_a : str or list
2435+
First list or variable
2436+
list_b : str or list
2437+
Second list or variable
2438+
result : str
2439+
Variable to store the result
2440+
2441+
Returns
2442+
-------
2443+
WOQLQuery object
2444+
query object that can be chained and/or execute
2445+
"""
2446+
if list_a and list_a == "args":
2447+
return ["list_a", "list_b", "result"]
2448+
if self._cursor.get("@type"):
2449+
self._wrap_cursor_with_and()
2450+
self._cursor["@type"] = "SetDifference"
2451+
self._cursor["list_a"] = self._value_list(list_a)
2452+
self._cursor["list_b"] = self._value_list(list_b)
2453+
self._cursor["result"] = self._value_list(result)
2454+
return self
2455+
2456+
def set_intersection(self, list_a, list_b, result):
2457+
"""Computes the set intersection of two lists (elements in both list_a and list_b)
2458+
2459+
Parameters
2460+
----------
2461+
list_a : str or list
2462+
First list or variable
2463+
list_b : str or list
2464+
Second list or variable
2465+
result : str
2466+
Variable to store the result
2467+
2468+
Returns
2469+
-------
2470+
WOQLQuery object
2471+
query object that can be chained and/or execute
2472+
"""
2473+
if list_a and list_a == "args":
2474+
return ["list_a", "list_b", "result"]
2475+
if self._cursor.get("@type"):
2476+
self._wrap_cursor_with_and()
2477+
self._cursor["@type"] = "SetIntersection"
2478+
self._cursor["list_a"] = self._value_list(list_a)
2479+
self._cursor["list_b"] = self._value_list(list_b)
2480+
self._cursor["result"] = self._value_list(result)
2481+
return self
2482+
2483+
def set_union(self, list_a, list_b, result):
2484+
"""Computes the set union of two lists (all unique elements from both lists)
2485+
2486+
Parameters
2487+
----------
2488+
list_a : str or list
2489+
First list or variable
2490+
list_b : str or list
2491+
Second list or variable
2492+
result : str
2493+
Variable to store the result
2494+
2495+
Returns
2496+
-------
2497+
WOQLQuery object
2498+
query object that can be chained and/or execute
2499+
"""
2500+
if list_a and list_a == "args":
2501+
return ["list_a", "list_b", "result"]
2502+
if self._cursor.get("@type"):
2503+
self._wrap_cursor_with_and()
2504+
self._cursor["@type"] = "SetUnion"
2505+
self._cursor["list_a"] = self._value_list(list_a)
2506+
self._cursor["list_b"] = self._value_list(list_b)
2507+
self._cursor["result"] = self._value_list(result)
2508+
return self
2509+
2510+
def set_member(self, element, set_list):
2511+
"""Checks if an element is a member of a set (efficient O(log n) lookup)
2512+
2513+
Parameters
2514+
----------
2515+
element : any
2516+
Element to check
2517+
set_list : str or list
2518+
Set (list) to check membership in
2519+
2520+
Returns
2521+
-------
2522+
WOQLQuery object
2523+
query object that can be chained and/or execute
2524+
"""
2525+
if element and element == "args":
2526+
return ["element", "set"]
2527+
if self._cursor.get("@type"):
2528+
self._wrap_cursor_with_and()
2529+
self._cursor["@type"] = "SetMember"
2530+
self._cursor["element"] = self._clean_object(element)
2531+
self._cursor["set"] = self._value_list(set_list)
2532+
return self
2533+
2534+
def list_to_set(self, input_list, result_set):
2535+
"""Converts a list to a set (removes duplicates and sorts)
2536+
2537+
Parameters
2538+
----------
2539+
input_list : str or list
2540+
Input list or variable
2541+
result_set : str
2542+
Variable to store the resulting set
2543+
2544+
Returns
2545+
-------
2546+
WOQLQuery object
2547+
query object that can be chained and/or execute
2548+
"""
2549+
if input_list and input_list == "args":
2550+
return ["list", "set"]
2551+
if self._cursor.get("@type"):
2552+
self._wrap_cursor_with_and()
2553+
self._cursor["@type"] = "ListToSet"
2554+
self._cursor["list"] = self._value_list(input_list)
2555+
self._cursor["set"] = self._value_list(result_set)
2556+
return self
2557+
24292558
def concat(self, concat_list, result):
24302559
"""Concatenates the list of variables into a string and saves the result in v
24312560

0 commit comments

Comments
 (0)