From 9cceae044b6718d078c07ff5c0adb4bda3c0e315 Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Mon, 11 May 2026 17:02:34 -0700 Subject: [PATCH 1/3] fix output for query cli --- .../src/nemo_retriever/adapters/cli/sdk_workflow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py b/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py index 7892e239d5..73ace17319 100644 --- a/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py +++ b/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py @@ -81,4 +81,6 @@ def query_documents( ) -> list[dict[str, Any]]: """Run the minimal SDK query path used by the root CLI.""" retriever = Retriever(top_k=top_k, vdb_kwargs={"uri": lancedb_uri, "table_name": table_name}) - return retriever.query(query) + hits = retriever.query(query) + hits = [{"text": hit["text"], "source": hit["source"], "page_number": hit["page_number"]} for hit in hits] + return hits From b8649acd75bff6c78cd43b43bb2024f9a6c503c7 Mon Sep 17 00:00:00 2001 From: Julio Perez <37191411+jperez999@users.noreply.github.com> Date: Mon, 11 May 2026 20:41:07 -0400 Subject: [PATCH 2/3] Update nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py b/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py index 73ace17319..e1193d1200 100644 --- a/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py +++ b/nemo_retriever/src/nemo_retriever/adapters/cli/sdk_workflow.py @@ -82,5 +82,5 @@ def query_documents( """Run the minimal SDK query path used by the root CLI.""" retriever = Retriever(top_k=top_k, vdb_kwargs={"uri": lancedb_uri, "table_name": table_name}) hits = retriever.query(query) - hits = [{"text": hit["text"], "source": hit["source"], "page_number": hit["page_number"]} for hit in hits] + hits = [{"text": hit.get("text", ""), "source": hit.get("source", ""), "page_number": hit.get("page_number")} for hit in hits] return hits From e821ef7b681ba2db3719e6339d1149e5156ec0fc Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Mon, 11 May 2026 17:48:34 -0700 Subject: [PATCH 3/3] fix format of results --- nemo_retriever/tests/test_root_cli_workflow.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/nemo_retriever/tests/test_root_cli_workflow.py b/nemo_retriever/tests/test_root_cli_workflow.py index 2fb6701511..44d10aea82 100644 --- a/nemo_retriever/tests/test_root_cli_workflow.py +++ b/nemo_retriever/tests/test_root_cli_workflow.py @@ -127,9 +127,13 @@ def fail_create_ingestor(**_kwargs: Any) -> Any: def test_root_query_passes_query_options_and_prints_json(monkeypatch) -> None: retriever_calls: list[dict[str, Any]] = [] query_calls: list[str] = [] - hits = [ - {"text": "passage", "page_number": 1, "_distance": 0.2}, - {"text": "other", "page_number": 2, "_distance": 0.4}, + raw_hits = [ + {"text": "passage", "source": "a.pdf", "page_number": 1, "_distance": 0.2}, + {"text": "other", "source": "b.pdf", "page_number": 2, "_distance": 0.4}, + ] + # query_documents exposes only text / source / page_number (no scores or extra keys). + public_hits = [ + {"text": h["text"], "source": h["source"], "page_number": h["page_number"]} for h in raw_hits ] class FakeRetriever: @@ -138,7 +142,7 @@ def __init__(self, **kwargs: Any) -> None: def query(self, query: str) -> list[dict[str, Any]]: query_calls.append(query) - return hits + return raw_hits monkeypatch.setattr(sdk_workflow, "Retriever", FakeRetriever) @@ -159,5 +163,5 @@ def query(self, query: str) -> list[dict[str, Any]]: assert result.exit_code == 0 assert retriever_calls == [{"top_k": 3, "vdb_kwargs": {"uri": "/tmp/lancedb", "table_name": "docs"}}] assert query_calls == ["Which animal is responsible for typos?"] - assert json.loads(result.output) == hits - assert result.output == json.dumps(hits, indent=2, sort_keys=True, default=str) + "\n" + assert json.loads(result.output) == public_hits + assert result.output == json.dumps(public_hits, indent=2, sort_keys=True, default=str) + "\n"