adding search/search all functionality to workspace

digaobarbosa · digaobarbosa · commit aade1e055546 · 2026-02-27T15:37:29.000-03:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -88,6 +88,9 @@ convention = "google"
     "E402", # Module level import not at top of file
     "F401", # Imported but unused
 ]
+"tests/manual/*.py" = [
+    "INP001", # Manual scripts don't need __init__.py
+]
 
 [tool.ruff.lint.pyupgrade]
 # Preserve types, even if a file imports `from __future__ import annotations`.
diff --git a/roboflow/adapters/rfapi.py b/roboflow/adapters/rfapi.py
@@ -199,6 +199,46 @@ def get_search_export(api_key: str, workspace_url: str, export_id: str, session:
     return response.json()
 
 
+def workspace_search(
+    api_key: str,
+    workspace_url: str,
+    query: str,
+    page_size: int = 50,
+    fields: Optional[List[str]] = None,
+    continuation_token: Optional[str] = None,
+) -> dict:
+    """Search across all images in a workspace using RoboQL syntax.
+
+    Args:
+        api_key: Roboflow API key.
+        workspace_url: Workspace slug/url.
+        query: RoboQL search query (e.g. ``"tag:review"``, ``"project:false"``).
+        page_size: Number of results per page (default 50).
+        fields: Fields to include in each result.
+        continuation_token: Token for fetching the next page.
+
+    Returns:
+        Parsed JSON response with ``results``, ``total``, and ``continuationToken``.
+
+    Raises:
+        RoboflowError: On non-200 response status codes.
+    """
+    url = f"{API_URL}/{workspace_url}/search/v1?api_key={api_key}"
+    payload: Dict[str, Union[str, int, List[str]]] = {
+        "query": query,
+        "pageSize": page_size,
+    }
+    if fields is not None:
+        payload["fields"] = fields
+    if continuation_token is not None:
+        payload["continuationToken"] = continuation_token
+
+    response = requests.post(url, json=payload)
+    if response.status_code != 200:
+        raise RoboflowError(response.text)
+    return response.json()
+
+
 def upload_image(
     api_key,
     project_url,
diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py
@@ -6,7 +6,7 @@
 import os
 import sys
 import time
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Generator, List, Optional
 
 import requests
 from PIL import Image
@@ -666,6 +666,89 @@ def _upload_zip(
         except Exception as e:
             print(f"An error occured when uploading the model: {e}")
 
+    def search(
+        self,
+        query: str,
+        page_size: int = 50,
+        fields: Optional[List[str]] = None,
+        continuation_token: Optional[str] = None,
+    ) -> dict:
+        """Search across all images in the workspace using RoboQL syntax.
+
+        Args:
+            query: RoboQL search query (e.g. ``"tag:review"``, ``"project:false"``
+                for orphan images, or free-text for semantic CLIP search).
+            page_size: Number of results per page (default 50).
+            fields: Fields to include in each result.
+                Defaults to ``["tags", "projects", "filename"]``.
+            continuation_token: Token returned by a previous call for fetching
+                the next page.
+
+        Returns:
+            Dict with ``results`` (list), ``total`` (int), and
+            ``continuationToken`` (str or None).
+
+        Example:
+            >>> ws = rf.workspace()
+            >>> page = ws.search("tag:review", page_size=10)
+            >>> print(page["total"])
+            >>> for img in page["results"]:
+            ...     print(img["filename"])
+        """
+        if fields is None:
+            fields = ["tags", "projects", "filename"]
+
+        return rfapi.workspace_search(
+            api_key=self.__api_key,
+            workspace_url=self.url,
+            query=query,
+            page_size=page_size,
+            fields=fields,
+            continuation_token=continuation_token,
+        )
+
+    def search_all(
+        self,
+        query: str,
+        page_size: int = 50,
+        fields: Optional[List[str]] = None,
+    ) -> Generator[List[dict], None, None]:
+        """Paginated search across all images in the workspace.
+
+        Yields one page of results at a time, automatically following
+        ``continuationToken`` until all results have been returned.
+
+        Args:
+            query: RoboQL search query.
+            page_size: Number of results per page (default 50).
+            fields: Fields to include in each result.
+                Defaults to ``["tags", "projects", "filename"]``.
+
+        Yields:
+            A list of result dicts for each page.
+
+        Example:
+            >>> ws = rf.workspace()
+            >>> for page in ws.search_all("tag:review"):
+            ...     for img in page:
+            ...         print(img["filename"])
+        """
+        token = None
+        while True:
+            response = self.search(
+                query=query,
+                page_size=page_size,
+                fields=fields,
+                continuation_token=token,
+            )
+            results = response.get("results", [])
+            if not results:
+                break
+            yield results
+            token = response.get("continuationToken")
+            if not token:
+                break
+
     def search_export(
         self,
         query: str,
diff --git a/tests/manual/demo_workspace_search.py b/tests/manual/demo_workspace_search.py
@@ -0,0 +1,38 @@
+"""Manual demo for workspace-level search (DATAMAN-163).
+
+Usage:
+    python tests/manual/demo_workspace_search.py
+
+Uses staging credentials from CLAUDE.md.
+"""
+
+import roboflow
+
+API_KEY = "some api key"
+WORKSPACE = "model-evaluation-workspace"
+
+rf = roboflow.Roboflow(api_key=API_KEY)
+ws = rf.workspace(WORKSPACE)
+
+# --- Single page search ---
+print("=== Single page search ===")
+page = ws.search("*", page_size=5)
+print(f"Total results: {page['total']}")
+print(f"Results in this page: {len(page['results'])}")
+print(f"Continuation token: {page.get('continuationToken')}")
+for img in page["results"]:
+    print(f"  - {img.get('filename', 'N/A')}")
+
+# --- Paginated search_all ---
+print("\n=== Paginated search_all (page_size=3, max 2 pages) ===")
+count = 0
+for page_results in ws.search_all("*", page_size=3):
+    count += 1
+    print(f"Page {count}: {len(page_results)} results")
+    for img in page_results:
+        print(f"  - {img.get('filename', 'N/A')}")
+    if count >= 2:
+        print("(stopping after 2 pages for demo)")
+        break
+
+print("\nDone.")
diff --git a/tests/test_workspace_search.py b/tests/test_workspace_search.py
@@ -0,0 +1,138 @@
+import json
+import unittest
+
+import responses
+
+from roboflow.adapters.rfapi import RoboflowError
+from roboflow.config import API_URL
+
+
+class TestWorkspaceSearch(unittest.TestCase):
+    API_KEY = "test_key"
+    WORKSPACE = "test-ws"
+    SEARCH_URL = f"{API_URL}/{WORKSPACE}/search/v1?api_key={API_KEY}"
+
+    def _make_workspace(self):
+        from roboflow.core.workspace import Workspace
+
+        info = {
+            "workspace": {
+                "name": "Test",
+                "url": self.WORKSPACE,
+                "projects": [],
+                "members": [],
+            }
+        }
+        return Workspace(info, api_key=self.API_KEY, default_workspace=self.WORKSPACE, model_format="yolov8")
+
+    # --- search() tests ---
+
+    @responses.activate
+    def test_search_basic(self):
+        body = {
+            "results": [{"filename": "a.jpg"}, {"filename": "b.jpg"}],
+            "total": 2,
+            "continuationToken": None,
+        }
+        responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
+
+        ws = self._make_workspace()
+        result = ws.search("tag:review")
+
+        self.assertEqual(result["total"], 2)
+        self.assertEqual(len(result["results"]), 2)
+        self.assertIsNone(result["continuationToken"])
+
+        # Verify request payload
+        sent = json.loads(responses.calls[0].request.body)
+        self.assertEqual(sent["query"], "tag:review")
+        self.assertEqual(sent["pageSize"], 50)
+        self.assertEqual(sent["fields"], ["tags", "projects", "filename"])
+        self.assertNotIn("continuationToken", sent)
+
+    @responses.activate
+    def test_search_with_continuation_token(self):
+        body = {"results": [{"filename": "c.jpg"}], "total": 3, "continuationToken": None}
+        responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
+
+        ws = self._make_workspace()
+        ws.search("*", continuation_token="tok_abc")
+
+        sent = json.loads(responses.calls[0].request.body)
+        self.assertEqual(sent["continuationToken"], "tok_abc")
+
+    @responses.activate
+    def test_search_custom_fields(self):
+        body = {"results": [], "total": 0, "continuationToken": None}
+        responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
+
+        ws = self._make_workspace()
+        ws.search("*", fields=["filename", "embedding"])
+
+        sent = json.loads(responses.calls[0].request.body)
+        self.assertEqual(sent["fields"], ["filename", "embedding"])
+
+    @responses.activate
+    def test_search_api_error(self):
+        responses.add(responses.POST, self.SEARCH_URL, json={"error": "unauthorized"}, status=401)
+
+        ws = self._make_workspace()
+        with self.assertRaises(RoboflowError):
+            ws.search("tag:review")
+
+    # --- search_all() tests ---
+
+    @responses.activate
+    def test_search_all_single_page(self):
+        body = {
+            "results": [{"filename": "a.jpg"}, {"filename": "b.jpg"}],
+            "total": 2,
+            "continuationToken": None,
+        }
+        responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
+
+        ws = self._make_workspace()
+        pages = list(ws.search_all("*"))
+
+        self.assertEqual(len(pages), 1)
+        self.assertEqual(len(pages[0]), 2)
+
+    @responses.activate
+    def test_search_all_multiple_pages(self):
+        page1 = {
+            "results": [{"filename": "a.jpg"}],
+            "total": 2,
+            "continuationToken": "tok_page2",
+        }
+        page2 = {
+            "results": [{"filename": "b.jpg"}],
+            "total": 2,
+            "continuationToken": None,
+        }
+        responses.add(responses.POST, self.SEARCH_URL, json=page1, status=200)
+        responses.add(responses.POST, self.SEARCH_URL, json=page2, status=200)
+
+        ws = self._make_workspace()
+        pages = list(ws.search_all("*", page_size=1))
+
+        self.assertEqual(len(pages), 2)
+        self.assertEqual(pages[0][0]["filename"], "a.jpg")
+        self.assertEqual(pages[1][0]["filename"], "b.jpg")
+
+        # Verify second request used the continuation token
+        sent2 = json.loads(responses.calls[1].request.body)
+        self.assertEqual(sent2["continuationToken"], "tok_page2")
+
+    @responses.activate
+    def test_search_all_empty_results(self):
+        body = {"results": [], "total": 0, "continuationToken": None}
+        responses.add(responses.POST, self.SEARCH_URL, json=body, status=200)
+
+        ws = self._make_workspace()
+        pages = list(ws.search_all("*"))
+
+        self.assertEqual(len(pages), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -88,6 +88,9 @@ convention = "google"`
`88`	`88`	`"E402", # Module level import not at top of file`
`89`	`89`	`"F401", # Imported but unused`
`90`	`90`	`]`
	`91`	`+"tests/manual/*.py" = [`
	`92`	`+ "INP001", # Manual scripts don't need __init__.py`
	`93`	`+]`
`91`	`94`
`92`	`95`	`[tool.ruff.lint.pyupgrade]`
`93`	`96`	# Preserve types, even if a file imports `from __future__ import annotations`.