Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions src/app/endpoints/query_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@
get_system_prompt,
get_topic_summary_system_prompt,
)
from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id
from utils.mcp_headers import mcp_headers_dependency
from utils.responses import extract_text_from_response_output_item
from utils.shields import detect_shield_violations, get_available_shields
from utils.suid import normalize_conversation_id, to_llama_stack_conversation_id
from utils.token_counter import TokenCounter
from utils.types import ToolCallSummary, ToolResultSummary, TurnSummary
from utils.types import RAGChunk, ToolCallSummary, ToolResultSummary, TurnSummary

logger = logging.getLogger("app.endpoints.handlers")
router = APIRouter(tags=["query_v1"])
Expand Down Expand Up @@ -419,11 +419,14 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
len(llm_response),
)

# Extract rag chunks
rag_chunks = parse_rag_chunks_from_responses_api(response)

summary = TurnSummary(
llm_response=llm_response,
tool_calls=tool_calls,
tool_results=tool_results,
rag_chunks=[],
rag_chunks=rag_chunks,
)

# Extract referenced documents and token usage from Responses API response
Expand All @@ -449,6 +452,34 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
return (summary, normalized_conversation_id, referenced_documents, token_usage)


def parse_rag_chunks_from_responses_api(response_obj: Any) -> list[RAGChunk]:
"""
Extract rag_chunks from the llama-stack OpenAI response.

Args:
response_obj: The ResponseObject from OpenAI compatible response API in llama-stack.

Returns:
List of RAGChunk with content, source, score
"""
rag_chunks = []

for output_item in response_obj.output:
if (
hasattr(output_item, "type")
and output_item.type == "file_search_call"
and hasattr(output_item, "results")
):

for result in output_item.results:
rag_chunk = RAGChunk(
content=result.text, source="file_search", score=result.score
)
rag_chunks.append(rag_chunk)

return rag_chunks


def parse_referenced_documents_from_responses_api(
response: OpenAIResponseObject, # pylint: disable=unused-argument
) -> list[ReferencedDocument]:
Expand Down
1 change: 1 addition & 0 deletions tests/integration/endpoints/test_query_v2_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ async def test_query_v2_endpoint_with_tool_calls(
mock_result.file_id = "doc-1"
mock_result.filename = "ansible-docs.txt"
mock_result.score = 0.95
mock_result.text = "Ansible is an open-source automation tool..."
mock_result.attributes = {
"doc_url": "https://example.com/ansible-docs.txt",
"link": "https://example.com/ansible-docs.txt",
Expand Down
71 changes: 52 additions & 19 deletions tests/unit/app/endpoints/test_query_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,17 +787,12 @@ async def test_retrieve_response_no_violation_with_shields(
validation_metric.inc.assert_not_called()


@pytest.mark.asyncio
async def test_retrieve_response_parses_referenced_documents(
mocker: MockerFixture,
) -> None:
"""Test that retrieve_response correctly parses referenced documents from response."""
mock_client = mocker.AsyncMock()

def _create_message_output_with_citations(mocker: MockerFixture) -> Any:
"""Create mock message output item with content annotations (citations)."""
# 1. Output item with message content annotations (citations)
output_item_1 = mocker.Mock()
output_item_1.type = "message"
output_item_1.role = "assistant"
output_item = mocker.Mock()
output_item.type = "message"
output_item.role = "assistant"

# Mock content with annotations
content_part = mocker.Mock()
Expand All @@ -816,19 +811,48 @@ async def test_retrieve_response_parses_referenced_documents(
annotation2.title = None

content_part.annotations = [annotation1, annotation2]
output_item_1.content = [content_part]
output_item.content = [content_part]
return output_item


def _create_file_search_output(mocker: MockerFixture) -> Any:
"""Create mock file search tool call output with results."""
# 2. Output item with file search tool call results
output_item_2 = mocker.Mock()
output_item_2.type = "file_search_call"
output_item_2.queries = (
output_item = mocker.Mock()
output_item.type = "file_search_call"
output_item.queries = (
[]
) # Ensure queries is a list to avoid iteration error in tool summary
output_item_2.status = "completed"
output_item_2.results = [
{"filename": "file2.pdf", "attributes": {"url": "http://example.com/doc2"}},
{"filename": "file3.docx", "attributes": {}}, # No URL
]
output_item.status = "completed"
# Create mock result objects with proper attributes matching real llama-stack response
result_1 = mocker.Mock()
result_1.filename = "file2.pdf"
result_1.attributes = {"url": "http://example.com/doc2"}
result_1.text = "Sample text from file2.pdf"
result_1.score = 0.95
result_1.file_id = "file-123"

result_2 = mocker.Mock()
result_2.filename = "file3.docx"
result_2.attributes = {}
result_2.text = "Sample text from file3.docx"
result_2.score = 0.85
result_2.file_id = "file-456"

output_item.results = [result_1, result_2]
return output_item


@pytest.mark.asyncio
async def test_retrieve_response_parses_referenced_documents(
mocker: MockerFixture,
) -> None:
"""Test that retrieve_response correctly parses referenced documents from response."""
mock_client = mocker.AsyncMock()

# Create output items using helper functions
output_item_1 = _create_message_output_with_citations(mocker)
output_item_2 = _create_file_search_output(mocker)

response_obj = mocker.Mock()
response_obj.id = "resp-docs"
Expand Down Expand Up @@ -870,3 +894,12 @@ async def test_retrieve_response_parses_referenced_documents(
doc4 = next((d for d in referenced_docs if d.doc_title == "file3.docx"), None)
assert doc4
assert doc4.doc_url is None

# Verify RAG chunks were extracted from file_search_call results
assert len(_summary.rag_chunks) == 2
assert _summary.rag_chunks[0].content == "Sample text from file2.pdf"
assert _summary.rag_chunks[0].source == "file_search"
assert _summary.rag_chunks[0].score == 0.95
assert _summary.rag_chunks[1].content == "Sample text from file3.docx"
assert _summary.rag_chunks[1].source == "file_search"
assert _summary.rag_chunks[1].score == 0.85
Loading