From 4cd0215d5bf359210345290fc866790da342fd84 Mon Sep 17 00:00:00 2001 From: are-ces <195810094+are-ces@users.noreply.github.com> Date: Tue, 9 Dec 2025 13:46:05 +0100 Subject: [PATCH] Added rag_chunks to turn summary --- src/app/endpoints/query_v2.py | 37 +++++++++- .../endpoints/test_query_v2_integration.py | 1 + tests/unit/app/endpoints/test_query_v2.py | 71 ++++++++++++++----- 3 files changed, 87 insertions(+), 22 deletions(-) diff --git a/src/app/endpoints/query_v2.py b/src/app/endpoints/query_v2.py index 5e0a8c87c..36dd6a220 100644 --- a/src/app/endpoints/query_v2.py +++ b/src/app/endpoints/query_v2.py @@ -40,12 +40,12 @@ get_system_prompt, get_topic_summary_system_prompt, ) -from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id from utils.mcp_headers import mcp_headers_dependency from utils.responses import extract_text_from_response_output_item from utils.shields import detect_shield_violations, get_available_shields +from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id from utils.token_counter import TokenCounter -from utils.types import ToolCallSummary, ToolResultSummary, TurnSummary +from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary, TurnSummary logger = logging.getLogger("app.endpoints.handlers") router = APIRouter(tags=["query_v1"]) @@ -419,11 +419,14 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche len(llm_response), ) + # Extract rag chunks + rag_chunks = parse_rag_chunks_from_responses_api(response) + summary = TurnSummary( llm_response=llm_response, tool_calls=tool_calls, tool_results=tool_results, - rag_chunks=[], + rag_chunks=rag_chunks, ) # Extract referenced documents and token usage from Responses API response @@ -449,6 +452,34 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche return (summary, normalized_conversation_id, referenced_documents, token_usage) +def parse_rag_chunks_from_responses_api(response_obj: Any) -> list[RAGChunk]: + """ + Extract rag_chunks from the llama-stack OpenAI response. + + Args: + response_obj: The ResponseObject from OpenAI compatible response API in llama-stack. + + Returns: + List of RAGChunk with content, source, score + """ + rag_chunks = [] + + for output_item in response_obj.output: + if ( + hasattr(output_item, "type") + and output_item.type == "file_search_call" + and hasattr(output_item, "results") + ): + + for result in output_item.results: + rag_chunk = RAGChunk( + content=result.text, source="file_search", score=result.score + ) + rag_chunks.append(rag_chunk) + + return rag_chunks + + def parse_referenced_documents_from_responses_api( response: OpenAIResponseObject, # pylint: disable=unused-argument ) -> list[ReferencedDocument]: diff --git a/tests/integration/endpoints/test_query_v2_integration.py b/tests/integration/endpoints/test_query_v2_integration.py index 5091ec61f..72c1db9b4 100644 --- a/tests/integration/endpoints/test_query_v2_integration.py +++ b/tests/integration/endpoints/test_query_v2_integration.py @@ -347,6 +347,7 @@ async def test_query_v2_endpoint_with_tool_calls( mock_result.file_id = "doc-1" mock_result.filename = "ansible-docs.txt" mock_result.score = 0.95 + mock_result.text = "Ansible is an open-source automation tool..." mock_result.attributes = { "doc_url": "https://example.com/ansible-docs.txt", "link": "https://example.com/ansible-docs.txt", diff --git a/tests/unit/app/endpoints/test_query_v2.py b/tests/unit/app/endpoints/test_query_v2.py index 38330eaaf..a0a866b0e 100644 --- a/tests/unit/app/endpoints/test_query_v2.py +++ b/tests/unit/app/endpoints/test_query_v2.py @@ -787,17 +787,12 @@ async def test_retrieve_response_no_violation_with_shields( validation_metric.inc.assert_not_called() -@pytest.mark.asyncio -async def test_retrieve_response_parses_referenced_documents( - mocker: MockerFixture, -) -> None: - """Test that retrieve_response correctly parses referenced documents from response.""" - mock_client = mocker.AsyncMock() - +def _create_message_output_with_citations(mocker: MockerFixture) -> Any: + """Create mock message output item with content annotations (citations).""" # 1. Output item with message content annotations (citations) - output_item_1 = mocker.Mock() - output_item_1.type = "message" - output_item_1.role = "assistant" + output_item = mocker.Mock() + output_item.type = "message" + output_item.role = "assistant" # Mock content with annotations content_part = mocker.Mock() @@ -816,19 +811,48 @@ async def test_retrieve_response_parses_referenced_documents( annotation2.title = None content_part.annotations = [annotation1, annotation2] - output_item_1.content = [content_part] + output_item.content = [content_part] + return output_item + +def _create_file_search_output(mocker: MockerFixture) -> Any: + """Create mock file search tool call output with results.""" # 2. Output item with file search tool call results - output_item_2 = mocker.Mock() - output_item_2.type = "file_search_call" - output_item_2.queries = ( + output_item = mocker.Mock() + output_item.type = "file_search_call" + output_item.queries = ( [] ) # Ensure queries is a list to avoid iteration error in tool summary - output_item_2.status = "completed" - output_item_2.results = [ - {"filename": "file2.pdf", "attributes": {"url": "http://example.com/doc2"}}, - {"filename": "file3.docx", "attributes": {}}, # No URL - ] + output_item.status = "completed" + # Create mock result objects with proper attributes matching real llama-stack response + result_1 = mocker.Mock() + result_1.filename = "file2.pdf" + result_1.attributes = {"url": "http://example.com/doc2"} + result_1.text = "Sample text from file2.pdf" + result_1.score = 0.95 + result_1.file_id = "file-123" + + result_2 = mocker.Mock() + result_2.filename = "file3.docx" + result_2.attributes = {} + result_2.text = "Sample text from file3.docx" + result_2.score = 0.85 + result_2.file_id = "file-456" + + output_item.results = [result_1, result_2] + return output_item + + +@pytest.mark.asyncio +async def test_retrieve_response_parses_referenced_documents( + mocker: MockerFixture, +) -> None: + """Test that retrieve_response correctly parses referenced documents from response.""" + mock_client = mocker.AsyncMock() + + # Create output items using helper functions + output_item_1 = _create_message_output_with_citations(mocker) + output_item_2 = _create_file_search_output(mocker) response_obj = mocker.Mock() response_obj.id = "resp-docs" @@ -870,3 +894,12 @@ async def test_retrieve_response_parses_referenced_documents( doc4 = next((d for d in referenced_docs if d.doc_title == "file3.docx"), None) assert doc4 assert doc4.doc_url is None + + # Verify RAG chunks were extracted from file_search_call results + assert len(_summary.rag_chunks) == 2 + assert _summary.rag_chunks[0].content == "Sample text from file2.pdf" + assert _summary.rag_chunks[0].source == "file_search" + assert _summary.rag_chunks[0].score == 0.95 + assert _summary.rag_chunks[1].content == "Sample text from file3.docx" + assert _summary.rag_chunks[1].source == "file_search" + assert _summary.rag_chunks[1].score == 0.85