Merge pull request #25 from Ontos-AI/fix/wangbinqi/retrieval-response-contract

suguanYang · web-flow · commit d439490fbf88 · 2026-05-28T23:03:28.000+08:00
fix: sync retrieval response SDK contract
diff --git a/README.md b/README.md
@@ -66,6 +66,13 @@ response = client.retrieval.query(
 )
 
 print(response.router_used)
+print(response.answer_text)
+print(response.evidence_text)
+print(response.stop_reason)
+print(response.failure_reason)
+
+for reference in response.referenced_chunks:
+    print(reference.chunk_id, reference.document_id, reference.asset_url)
 
 for result in response.results:
     print(result.content)
diff --git a/docs/usage.md b/docs/usage.md
@@ -482,8 +482,12 @@ response = client.retrieval.query(
 )
 print(response.answer_text)          # LLM-generated natural-language answer
 print(response.router_used)          # "workflow_single_step", "small_kb_all", etc.
+print(response.evidence_text)        # rendered evidence context, when returned
+print(response.stop_reason)          # agentic termination reason, when returned
+print(response.failure_reason)       # no-answer reason, when returned
 for ref in response.referenced_chunks:
-    print(ref.get("chunk_id"), ref.get("asset_url"))
+    print(ref.chunk_id, ref.document_id, ref.chunk_type)
+    print(ref.section_path, ref.file_path, ref.job_id, ref.asset_url)
 
 # Legacy results are always available
 for result in response.results:
diff --git a/src/knowhere/__init__.py b/src/knowhere/__init__.py
@@ -49,6 +49,7 @@
 from knowhere.types.retrieval import (
     RetrievalChannel,
     RetrievalFilterMode,
+    RetrievalReferencedChunk,
     RetrievalSectionExclusion,
     RetrievalSource,
     RetrievalQueryResponse,
@@ -115,6 +116,7 @@
     # Retrieval types
     "RetrievalChannel",
     "RetrievalFilterMode",
+    "RetrievalReferencedChunk",
     "RetrievalSectionExclusion",
     "RetrievalSource",
     "RetrievalQueryResponse",
diff --git a/src/knowhere/types/__init__.py b/src/knowhere/types/__init__.py
@@ -16,6 +16,7 @@
 from knowhere.types.retrieval import (
     RetrievalChannel,
     RetrievalFilterMode,
+    RetrievalReferencedChunk,
     RetrievalSectionExclusion,
     RetrievalSource,
     RetrievalQueryResponse,
@@ -56,6 +57,7 @@
     # retrieval
     "RetrievalChannel",
     "RetrievalFilterMode",
+    "RetrievalReferencedChunk",
     "RetrievalSectionExclusion",
     "RetrievalSource",
     "RetrievalQueryResponse",
diff --git a/src/knowhere/types/retrieval.py b/src/knowhere/types/retrieval.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Dict, List, Literal, Optional, TypedDict
+from typing import Literal, Optional, TypedDict
 
 from pydantic import BaseModel, Field
 
@@ -36,17 +36,31 @@ class RetrievalResult(BaseModel):
     source: RetrievalSource
 
 
+class RetrievalReferencedChunk(BaseModel):
+    """Cited evidence chunk returned by agentic retrieval."""
+
+    chunk_id: str
+    document_id: str
+    chunk_type: str
+    section_path: str
+    file_path: Optional[str] = None
+    job_id: Optional[str] = None
+    asset_url: Optional[str] = None
+
+
 class RetrievalQueryResponse(BaseModel):
     """Response from ``POST /v1/retrieval/query``.
 
-    Agentic fields (``answer_text``, ``referenced_chunks``) are only
-    populated when ``use_agentic=True``.  In legacy retrieval mode they
-    default to ``None`` and ``[]`` respectively.
+    Agentic retrieval may also include ``evidence_text``, ``stop_reason``,
+    and ``failure_reason`` when the server returns workflow diagnostics.
     """
 
     namespace: str
     query: str
-    router_used: Optional[str] = None
+    router_used: str
     answer_text: Optional[str] = None
-    referenced_chunks: List[Dict[str, Any]] = Field(default_factory=list)
+    referenced_chunks: list[RetrievalReferencedChunk] = Field(default_factory=list)
+    evidence_text: Optional[str] = None
+    stop_reason: Optional[str] = None
+    failure_reason: Optional[str] = None
     results: list[RetrievalResult]
diff --git a/tests/test_retrieval.py b/tests/test_retrieval.py
@@ -21,10 +21,17 @@ def _make_retrieval_response() -> Dict[str, Any]:
         "query": "refund policy",
         "router_used": "discovery+agent",
         "answer_text": "Annual plans may be refunded within 30 days of purchase.",
+        "evidence_text": "Rendered retrieval evidence",
+        "stop_reason": "answer_done",
+        "failure_reason": "insufficient evidence",
         "referenced_chunks": [
             {
                 "chunk_id": "chunk_001",
                 "document_id": "doc_123",
+                "chunk_type": "text",
+                "section_path": "Policies / Billing / Refunds",
+                "file_path": None,
+                "job_id": "job_123",
                 "asset_url": "https://example.com/assets/chunk_001",
             }
         ],
@@ -44,11 +51,13 @@ def _make_retrieval_response() -> Dict[str, Any]:
 
 
 def _make_legacy_retrieval_response() -> Dict[str, Any]:
-    """Legacy-mode response without agentic fields (backward compatibility)."""
+    """Legacy-mode response with server-default agentic fields."""
     return {
         "namespace": "support-center",
         "query": "refund policy",
         "router_used": "discovery+legacy",
+        "answer_text": None,
+        "referenced_chunks": [],
         "results": [
             {
                 "chunk_type": "text",
@@ -126,7 +135,12 @@ def test_query_sends_request_and_returns_results(self, sync_client: Any) -> None
             "Annual plans may be refunded within 30 days of purchase."
         )
         assert len(response.referenced_chunks) == 1
-        assert response.referenced_chunks[0]["chunk_id"] == "chunk_001"
+        assert response.evidence_text == "Rendered retrieval evidence"
+        assert response.stop_reason == "answer_done"
+        assert response.failure_reason == "insufficient evidence"
+        assert response.referenced_chunks[0].chunk_id == "chunk_001"
+        assert response.referenced_chunks[0].chunk_type == "text"
+        assert response.referenced_chunks[0].file_path is None
         assert not hasattr(response.results[0], "citation")
         assert not hasattr(response.results[0], "chunk_id")
         assert not hasattr(response.results[0], "section_id")
@@ -188,8 +202,8 @@ def test_use_agentic_omitted_when_none(self, sync_client: Any) -> None:
 
     @respx.mock
     def test_agentic_response_fields(self, sync_client: Any) -> None:
-        """Agentic response exposes answer_text and referenced_chunks."""
-        route = respx.post(RETRIEVAL_QUERY_URL).mock(
+        """Agentic response exposes answer, evidence, and typed references."""
+        respx.post(RETRIEVAL_QUERY_URL).mock(
             return_value=httpx.Response(200, json=_make_retrieval_response())
         )
 
@@ -202,15 +216,23 @@ def test_agentic_response_fields(self, sync_client: Any) -> None:
             "Annual plans may be refunded within 30 days of purchase."
         )
         assert len(response.referenced_chunks) == 1
-        assert response.referenced_chunks[0]["chunk_id"] == "chunk_001"
-        assert response.referenced_chunks[0]["asset_url"] == (
+        assert response.referenced_chunks[0].chunk_id == "chunk_001"
+        assert response.referenced_chunks[0].document_id == "doc_123"
+        assert response.referenced_chunks[0].chunk_type == "text"
+        assert response.referenced_chunks[0].section_path == "Policies / Billing / Refunds"
+        assert response.referenced_chunks[0].file_path is None
+        assert response.referenced_chunks[0].job_id == "job_123"
+        assert response.referenced_chunks[0].asset_url == (
             "https://example.com/assets/chunk_001"
         )
+        assert response.evidence_text == "Rendered retrieval evidence"
+        assert response.stop_reason == "answer_done"
+        assert response.failure_reason == "insufficient evidence"
 
     @respx.mock
     def test_legacy_response_without_agentic_fields(self, sync_client: Any) -> None:
-        """Legacy-mode response (no agentic fields) parses without error."""
-        route = respx.post(RETRIEVAL_QUERY_URL).mock(
+        """Legacy-mode response defaults agentic fields to null and empty references."""
+        respx.post(RETRIEVAL_QUERY_URL).mock(
             return_value=httpx.Response(
                 200, json=_make_legacy_retrieval_response()
             )