Skip to content

Commit aade1e0

Browse files
committed
adding search/search all functionality to workspace
1 parent 1eb8238 commit aade1e0

File tree

5 files changed

+303
-1
lines changed

5 files changed

+303
-1
lines changed

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ convention = "google"
8888
"E402", # Module level import not at top of file
8989
"F401", # Imported but unused
9090
]
91+
"tests/manual/*.py" = [
92+
"INP001", # Manual scripts don't need __init__.py
93+
]
9194

9295
[tool.ruff.lint.pyupgrade]
9396
# Preserve types, even if a file imports `from __future__ import annotations`.

roboflow/adapters/rfapi.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,46 @@ def get_search_export(api_key: str, workspace_url: str, export_id: str, session:
199199
return response.json()
200200

201201

202+
def workspace_search(
203+
api_key: str,
204+
workspace_url: str,
205+
query: str,
206+
page_size: int = 50,
207+
fields: Optional[List[str]] = None,
208+
continuation_token: Optional[str] = None,
209+
) -> dict:
210+
"""Search across all images in a workspace using RoboQL syntax.
211+
212+
Args:
213+
api_key: Roboflow API key.
214+
workspace_url: Workspace slug/url.
215+
query: RoboQL search query (e.g. ``"tag:review"``, ``"project:false"``).
216+
page_size: Number of results per page (default 50).
217+
fields: Fields to include in each result.
218+
continuation_token: Token for fetching the next page.
219+
220+
Returns:
221+
Parsed JSON response with ``results``, ``total``, and ``continuationToken``.
222+
223+
Raises:
224+
RoboflowError: On non-200 response status codes.
225+
"""
226+
url = f"{API_URL}/{workspace_url}/search/v1?api_key={api_key}"
227+
payload: Dict[str, Union[str, int, List[str]]] = {
228+
"query": query,
229+
"pageSize": page_size,
230+
}
231+
if fields is not None:
232+
payload["fields"] = fields
233+
if continuation_token is not None:
234+
payload["continuationToken"] = continuation_token
235+
236+
response = requests.post(url, json=payload)
237+
if response.status_code != 200:
238+
raise RoboflowError(response.text)
239+
return response.json()
240+
241+
202242
def upload_image(
203243
api_key,
204244
project_url,

roboflow/core/workspace.py

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import os
77
import sys
88
import time
9-
from typing import Any, Dict, List, Optional
9+
from typing import Any, Dict, Generator, List, Optional
1010

1111
import requests
1212
from PIL import Image
@@ -666,6 +666,89 @@ def _upload_zip(
666666
except Exception as e:
667667
print(f"An error occured when uploading the model: {e}")
668668

669+
def search(
670+
self,
671+
query: str,
672+
page_size: int = 50,
673+
fields: Optional[List[str]] = None,
674+
continuation_token: Optional[str] = None,
675+
) -> dict:
676+
"""Search across all images in the workspace using RoboQL syntax.
677+
678+
Args:
679+
query: RoboQL search query (e.g. ``"tag:review"``, ``"project:false"``
680+
for orphan images, or free-text for semantic CLIP search).
681+
page_size: Number of results per page (default 50).
682+
fields: Fields to include in each result.
683+
Defaults to ``["tags", "projects", "filename"]``.
684+
continuation_token: Token returned by a previous call for fetching
685+
the next page.
686+
687+
Returns:
688+
Dict with ``results`` (list), ``total`` (int), and
689+
``continuationToken`` (str or None).
690+
691+
Example:
692+
>>> ws = rf.workspace()
693+
>>> page = ws.search("tag:review", page_size=10)
694+
>>> print(page["total"])
695+
>>> for img in page["results"]:
696+
... print(img["filename"])
697+
"""
698+
if fields is None:
699+
fields = ["tags", "projects", "filename"]
700+
701+
return rfapi.workspace_search(
702+
api_key=self.__api_key,
703+
workspace_url=self.url,
704+
query=query,
705+
page_size=page_size,
706+
fields=fields,
707+
continuation_token=continuation_token,
708+
)
709+
710+
def search_all(
711+
self,
712+
query: str,
713+
page_size: int = 50,
714+
fields: Optional[List[str]] = None,
715+
) -> Generator[List[dict], None, None]:
716+
"""Paginated search across all images in the workspace.
717+
718+
Yields one page of results at a time, automatically following
719+
``continuationToken`` until all results have been returned.
720+
721+
Args:
722+
query: RoboQL search query.
723+
page_size: Number of results per page (default 50).
724+
fields: Fields to include in each result.
725+
Defaults to ``["tags", "projects", "filename"]``.
726+
727+
Yields:
728+
A list of result dicts for each page.
729+
730+
Example:
731+
>>> ws = rf.workspace()
732+
>>> for page in ws.search_all("tag:review"):
733+
... for img in page:
734+
... print(img["filename"])
735+
"""
736+
token = None
737+
while True:
738+
response = self.search(
739+
query=query,
740+
page_size=page_size,
741+
fields=fields,
742+
continuation_token=token,
743+
)
744+
results = response.get("results", [])
745+
if not results:
746+
break
747+
yield results
748+
token = response.get("continuationToken")
749+
if not token:
750+
break
751+
669752
def search_export(
670753
self,
671754
query: str,
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""Manual demo for workspace-level search (DATAMAN-163).
2+
3+
Usage:
4+
python tests/manual/demo_workspace_search.py
5+
6+
Uses staging credentials from CLAUDE.md.
7+
"""
8+
9+
import roboflow
10+
11+
API_KEY = "some api key"
12+
WORKSPACE = "model-evaluation-workspace"
13+
14+
rf = roboflow.Roboflow(api_key=API_KEY)
15+
ws = rf.workspace(WORKSPACE)
16+
17+
# --- Single page search ---
18+
print("=== Single page search ===")
19+
page = ws.search("*", page_size=5)
20+
print(f"Total results: {page['total']}")
21+
print(f"Results in this page: {len(page['results'])}")
22+
print(f"Continuation token: {page.get('continuationToken')}")
23+
for img in page["results"]:
24+
print(f" - {img.get('filename', 'N/A')}")
25+
26+
# --- Paginated search_all ---
27+
print("\n=== Paginated search_all (page_size=3, max 2 pages) ===")
28+
count = 0
29+
for page_results in ws.search_all("*", page_size=3):
30+
count += 1
31+
print(f"Page {count}: {len(page_results)} results")
32+
for img in page_results:
33+
print(f" - {img.get('filename', 'N/A')}")
34+
if count >= 2:
35+
print("(stopping after 2 pages for demo)")
36+
break
37+
38+
print("\nDone.")

tests/test_workspace_search.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import json
2+
import unittest
3+
4+
import responses
5+
6+
from roboflow.adapters.rfapi import RoboflowError
7+
from roboflow.config import API_URL
8+
9+
10+
class TestWorkspaceSearch(unittest.TestCase):
11+
API_KEY = "test_key"
12+
WORKSPACE = "test-ws"
13+
SEARCH_URL = f"{API_URL}/{WORKSPACE}/search/v1?api_key={API_KEY}"
14+
15+
def _make_workspace(self):
16+
from roboflow.core.workspace import Workspace
17+
18+
info = {
19+
"workspace": {
20+
"name": "Test",
21+
"url": self.WORKSPACE,
22+
"projects": [],
23+
"members": [],
24+
}
25+
}
26+
return Workspace(info, api_key=self.API_KEY, default_workspace=self.WORKSPACE, model_format="yolov8")
27+
28+
# --- search() tests ---
29+
30+
@responses.activate
31+
def test_search_basic(self):
32+
body = {
33+
"results": [{"filename": "a.jpg"}, {"filename": "b.jpg"}],
34+
"total": 2,
35+
"continuationToken": None,
36+
}
37+
responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
38+
39+
ws = self._make_workspace()
40+
result = ws.search("tag:review")
41+
42+
self.assertEqual(result["total"], 2)
43+
self.assertEqual(len(result["results"]), 2)
44+
self.assertIsNone(result["continuationToken"])
45+
46+
# Verify request payload
47+
sent = json.loads(responses.calls[0].request.body)
48+
self.assertEqual(sent["query"], "tag:review")
49+
self.assertEqual(sent["pageSize"], 50)
50+
self.assertEqual(sent["fields"], ["tags", "projects", "filename"])
51+
self.assertNotIn("continuationToken", sent)
52+
53+
@responses.activate
54+
def test_search_with_continuation_token(self):
55+
body = {"results": [{"filename": "c.jpg"}], "total": 3, "continuationToken": None}
56+
responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
57+
58+
ws = self._make_workspace()
59+
ws.search("*", continuation_token="tok_abc")
60+
61+
sent = json.loads(responses.calls[0].request.body)
62+
self.assertEqual(sent["continuationToken"], "tok_abc")
63+
64+
@responses.activate
65+
def test_search_custom_fields(self):
66+
body = {"results": [], "total": 0, "continuationToken": None}
67+
responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
68+
69+
ws = self._make_workspace()
70+
ws.search("*", fields=["filename", "embedding"])
71+
72+
sent = json.loads(responses.calls[0].request.body)
73+
self.assertEqual(sent["fields"], ["filename", "embedding"])
74+
75+
@responses.activate
76+
def test_search_api_error(self):
77+
responses.add(responses.POST, self.SEARCH_URL, json={"error": "unauthorized"}, status=401)
78+
79+
ws = self._make_workspace()
80+
with self.assertRaises(RoboflowError):
81+
ws.search("tag:review")
82+
83+
# --- search_all() tests ---
84+
85+
@responses.activate
86+
def test_search_all_single_page(self):
87+
body = {
88+
"results": [{"filename": "a.jpg"}, {"filename": "b.jpg"}],
89+
"total": 2,
90+
"continuationToken": None,
91+
}
92+
responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
93+
94+
ws = self._make_workspace()
95+
pages = list(ws.search_all("*"))
96+
97+
self.assertEqual(len(pages), 1)
98+
self.assertEqual(len(pages[0]), 2)
99+
100+
@responses.activate
101+
def test_search_all_multiple_pages(self):
102+
page1 = {
103+
"results": [{"filename": "a.jpg"}],
104+
"total": 2,
105+
"continuationToken": "tok_page2",
106+
}
107+
page2 = {
108+
"results": [{"filename": "b.jpg"}],
109+
"total": 2,
110+
"continuationToken": None,
111+
}
112+
responses.add(responses.POST, self.SEARCH_URL, json=page1, status=200)
113+
responses.add(responses.POST, self.SEARCH_URL, json=page2, status=200)
114+
115+
ws = self._make_workspace()
116+
pages = list(ws.search_all("*", page_size=1))
117+
118+
self.assertEqual(len(pages), 2)
119+
self.assertEqual(pages[0][0]["filename"], "a.jpg")
120+
self.assertEqual(pages[1][0]["filename"], "b.jpg")
121+
122+
# Verify second request used the continuation token
123+
sent2 = json.loads(responses.calls[1].request.body)
124+
self.assertEqual(sent2["continuationToken"], "tok_page2")
125+
126+
@responses.activate
127+
def test_search_all_empty_results(self):
128+
body = {"results": [], "total": 0, "continuationToken": None}
129+
responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
130+
131+
ws = self._make_workspace()
132+
pages = list(ws.search_all("*"))
133+
134+
self.assertEqual(len(pages), 0)
135+
136+
137+
if __name__ == "__main__":
138+
unittest.main()

0 commit comments

Comments
 (0)