Skip to content

Commit 64abf2d

Browse files
authored
Add tests for adapters (#188)
* Add tests for adapters * fix test
1 parent fc15602 commit 64abf2d

File tree

4 files changed

+1012
-10
lines changed

4 files changed

+1012
-10
lines changed
Lines changed: 373 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
import os
2+
from types import SimpleNamespace
3+
from typing import Any, Dict, List
4+
from unittest.mock import Mock
5+
6+
import pytest
7+
import requests
8+
9+
from eval_protocol.adapters.braintrust import BraintrustAdapter
10+
from eval_protocol.models import Message
11+
12+
13+
class MockResponse:
14+
"""Mock response object for requests.post"""
15+
16+
def __init__(self, json_data: Dict[str, Any], status_code: int = 200):
17+
self.json_data = json_data
18+
self.status_code = status_code
19+
20+
def json(self) -> Dict[str, Any]:
21+
return self.json_data
22+
23+
def raise_for_status(self) -> None:
24+
if self.status_code >= 400:
25+
raise requests.HTTPError(f"HTTP {self.status_code}")
26+
27+
28+
@pytest.fixture
29+
def mock_requests_post(monkeypatch):
30+
"""Mock requests.post to return sample data"""
31+
32+
def fake_post(url: str, headers=None, json=None):
33+
# Return a simplified response for basic tests
34+
return MockResponse(
35+
{
36+
"data": [
37+
{
38+
"id": "trace1",
39+
"input": [{"role": "user", "content": "Hello"}],
40+
"output": [{"message": {"role": "assistant", "content": "Hi there!"}}],
41+
}
42+
]
43+
}
44+
)
45+
46+
monkeypatch.setattr("requests.post", fake_post)
47+
return fake_post
48+
49+
50+
def test_basic_btql_query_returns_evaluation_rows(mock_requests_post):
51+
"""Test basic BTQL query execution and conversion to evaluation rows"""
52+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
53+
54+
btql_query = "select: * from: project_logs('test_project') traces limit: 1"
55+
rows = adapter.get_evaluation_rows(btql_query)
56+
57+
assert len(rows) == 1
58+
assert len(rows[0].messages) == 2
59+
assert rows[0].messages[0].role == "user"
60+
assert rows[0].messages[0].content == "Hello"
61+
assert rows[0].messages[1].role == "assistant"
62+
assert rows[0].messages[1].content == "Hi there!"
63+
64+
65+
def test_trace_with_tool_calls_preserved(monkeypatch):
66+
"""Test that tool calls are properly preserved in converted messages"""
67+
68+
def mock_post(url: str, headers=None, json=None):
69+
return MockResponse(
70+
{
71+
"data": [
72+
{
73+
"id": "trace_with_tools",
74+
"input": [{"role": "user", "content": "Get reservation details for 7KJ2PL"}],
75+
"output": [
76+
{
77+
"message": {
78+
"role": "assistant",
79+
"content": None,
80+
"tool_calls": [
81+
{
82+
"id": "call_123",
83+
"type": "function",
84+
"function": {
85+
"name": "get_reservation_details",
86+
"arguments": '{"reservation_id": "7KJ2PL"}',
87+
},
88+
}
89+
],
90+
}
91+
}
92+
],
93+
}
94+
]
95+
}
96+
)
97+
98+
monkeypatch.setattr("requests.post", mock_post)
99+
100+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
101+
rows = adapter.get_evaluation_rows("test query")
102+
103+
assert len(rows) == 1
104+
msgs = rows[0].messages
105+
106+
# Find assistant message with tool calls
107+
assistant_msgs = [m for m in msgs if m.role == "assistant" and m.tool_calls]
108+
assert len(assistant_msgs) == 1
109+
110+
assert assistant_msgs[0].tool_calls is not None
111+
tool_call = assistant_msgs[0].tool_calls[0]
112+
assert tool_call.id == "call_123"
113+
assert tool_call.function.name == "get_reservation_details"
114+
assert '{"reservation_id": "7KJ2PL"}' in tool_call.function.arguments
115+
116+
117+
def test_trace_with_tool_response_messages(monkeypatch):
118+
"""Test that tool response messages are properly handled"""
119+
120+
def mock_post(url: str, headers=None, json=None):
121+
return MockResponse(
122+
{
123+
"data": [
124+
{
125+
"id": "trace_with_tool_response",
126+
"input": [
127+
{"role": "user", "content": "Check reservation"},
128+
{
129+
"role": "assistant",
130+
"content": None,
131+
"tool_calls": [
132+
{
133+
"id": "call_456",
134+
"type": "function",
135+
"function": {
136+
"name": "get_reservation_details",
137+
"arguments": '{"reservation_id": "ABC123"}',
138+
},
139+
}
140+
],
141+
},
142+
{
143+
"role": "tool",
144+
"tool_call_id": "call_456",
145+
"content": '{"reservation_id": "ABC123", "status": "confirmed"}',
146+
},
147+
],
148+
"output": [
149+
{"message": {"role": "assistant", "content": "Your reservation ABC123 is confirmed."}}
150+
],
151+
}
152+
]
153+
}
154+
)
155+
156+
monkeypatch.setattr("requests.post", mock_post)
157+
158+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
159+
rows = adapter.get_evaluation_rows("test query")
160+
161+
assert len(rows) == 1
162+
msgs = rows[0].messages
163+
164+
# Should have user, assistant with tool_calls, tool response, and final assistant
165+
roles = [m.role for m in msgs]
166+
assert "user" in roles
167+
assert "tool" in roles
168+
assert roles.count("assistant") == 2 # One with tool_calls, one final response
169+
170+
# Check tool message
171+
tool_msgs = [m for m in msgs if m.role == "tool"]
172+
assert len(tool_msgs) == 1
173+
assert tool_msgs[0].tool_call_id == "call_456"
174+
assert tool_msgs[0].content is not None
175+
assert "ABC123" in tool_msgs[0].content
176+
177+
178+
def test_tools_extracted_from_metadata_variants(monkeypatch):
179+
"""Test that tools are extracted from different metadata locations"""
180+
181+
def mock_post_with_tools_in_metadata(url: str, headers=None, json=None):
182+
return MockResponse(
183+
{
184+
"data": [
185+
{
186+
"id": "trace_with_metadata_tools",
187+
"input": [{"role": "user", "content": "Test"}],
188+
"output": [{"message": {"role": "assistant", "content": "Response"}}],
189+
"metadata": {
190+
"tools": [
191+
{
192+
"type": "function",
193+
"function": {"name": "get_weather", "description": "Get weather info"},
194+
}
195+
]
196+
},
197+
}
198+
]
199+
}
200+
)
201+
202+
monkeypatch.setattr("requests.post", mock_post_with_tools_in_metadata)
203+
204+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
205+
rows = adapter.get_evaluation_rows("test query")
206+
207+
assert len(rows) == 1
208+
assert rows[0].tools is not None
209+
assert len(rows[0].tools) == 1
210+
assert rows[0].tools[0]["function"]["name"] == "get_weather"
211+
212+
213+
def test_tools_extracted_from_hidden_params(monkeypatch):
214+
"""Test that tools are extracted from nested hidden_params location"""
215+
216+
def mock_post_with_hidden_tools(url: str, headers=None, json=None):
217+
return MockResponse(
218+
{
219+
"data": [
220+
{
221+
"id": "trace_with_hidden_tools",
222+
"input": [{"role": "user", "content": "Test"}],
223+
"output": [{"message": {"role": "assistant", "content": "Response"}}],
224+
"metadata": {
225+
"hidden_params": {
226+
"optional_params": {
227+
"tools": [
228+
{
229+
"type": "function",
230+
"function": {
231+
"name": "transfer_to_human_agents",
232+
"description": "Transfer to human",
233+
},
234+
}
235+
]
236+
}
237+
}
238+
},
239+
}
240+
]
241+
}
242+
)
243+
244+
monkeypatch.setattr("requests.post", mock_post_with_hidden_tools)
245+
246+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
247+
rows = adapter.get_evaluation_rows("test query")
248+
249+
assert len(rows) == 1
250+
assert rows[0].tools is not None
251+
assert len(rows[0].tools) == 1
252+
assert rows[0].tools[0]["function"]["name"] == "transfer_to_human_agents"
253+
254+
255+
def test_empty_btql_response_returns_empty_list(monkeypatch):
256+
"""Test that empty BTQL response returns empty list"""
257+
258+
def mock_empty_post(url: str, headers=None, json=None):
259+
return MockResponse({"data": []})
260+
261+
monkeypatch.setattr("requests.post", mock_empty_post)
262+
263+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
264+
rows = adapter.get_evaluation_rows("test query")
265+
266+
assert len(rows) == 0
267+
268+
269+
def test_trace_without_meaningful_conversation_skipped(monkeypatch):
270+
"""Test that traces without input or output are skipped"""
271+
272+
def mock_post_incomplete_trace(url: str, headers=None, json=None):
273+
return MockResponse(
274+
{
275+
"data": [
276+
{"id": "incomplete_trace", "input": None, "output": []},
277+
{
278+
"id": "valid_trace",
279+
"input": [{"role": "user", "content": "Hello"}],
280+
"output": [{"message": {"role": "assistant", "content": "Hi"}}],
281+
},
282+
]
283+
}
284+
)
285+
286+
monkeypatch.setattr("requests.post", mock_post_incomplete_trace)
287+
288+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
289+
rows = adapter.get_evaluation_rows("test query")
290+
291+
# Should only get the valid trace
292+
assert len(rows) == 1
293+
assert rows[0].input_metadata is not None
294+
assert rows[0].input_metadata.session_data is not None
295+
assert rows[0].input_metadata.session_data["braintrust_trace_id"] == "valid_trace"
296+
297+
298+
def test_custom_converter_used_when_provided(monkeypatch):
299+
"""Test that custom converter is used when provided"""
300+
301+
def mock_post(url: str, headers=None, json=None):
302+
return MockResponse(
303+
{
304+
"data": [
305+
{
306+
"id": "custom_trace",
307+
"input": [{"role": "user", "content": "Test"}],
308+
"output": [{"message": {"role": "assistant", "content": "Response"}}],
309+
}
310+
]
311+
}
312+
)
313+
314+
monkeypatch.setattr("requests.post", mock_post)
315+
316+
def custom_converter(trace: Dict[str, Any], include_tool_calls: bool):
317+
# Custom converter that adds a special message
318+
from eval_protocol.models import EvaluationRow, InputMetadata
319+
320+
return EvaluationRow(
321+
messages=[Message(role="system", content="Custom converted message")],
322+
input_metadata=InputMetadata(session_data={"custom": True}),
323+
)
324+
325+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
326+
rows = adapter.get_evaluation_rows("test query", converter=custom_converter)
327+
328+
assert len(rows) == 1
329+
assert rows[0].messages[0].role == "system"
330+
assert rows[0].messages[0].content == "Custom converted message"
331+
assert rows[0].input_metadata is not None
332+
assert rows[0].input_metadata.session_data is not None
333+
assert rows[0].input_metadata.session_data["custom"] is True
334+
335+
336+
def test_api_authentication_error_handling(monkeypatch):
337+
"""Test that API authentication errors are handled properly"""
338+
339+
def mock_auth_error(url: str, headers=None, json=None):
340+
return MockResponse({}, status_code=401)
341+
342+
monkeypatch.setattr("requests.post", mock_auth_error)
343+
344+
adapter = BraintrustAdapter(api_key="invalid_key", project_id="test_project")
345+
346+
with pytest.raises(requests.HTTPError):
347+
adapter.get_evaluation_rows("test query")
348+
349+
350+
def test_session_data_includes_trace_id(mock_requests_post):
351+
"""Test that session_data includes the Braintrust trace ID"""
352+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
353+
rows = adapter.get_evaluation_rows("test query")
354+
355+
assert len(rows) == 1
356+
assert rows[0].input_metadata is not None
357+
assert rows[0].input_metadata.session_data is not None
358+
assert rows[0].input_metadata.session_data["braintrust_trace_id"] == "trace1"
359+
360+
361+
def test_missing_required_env_vars(monkeypatch):
362+
"""Test that missing required environment variables raise errors"""
363+
# Mock environment variables to be None
364+
monkeypatch.setenv("BRAINTRUST_API_KEY", "")
365+
monkeypatch.setenv("BRAINTRUST_PROJECT_ID", "")
366+
367+
# Test missing API key
368+
with pytest.raises(ValueError, match="BRAINTRUST_API_KEY"):
369+
BraintrustAdapter(api_key=None, project_id="test_project")
370+
371+
# Test missing project ID
372+
with pytest.raises(ValueError, match="BRAINTRUST_PROJECT_ID"):
373+
BraintrustAdapter(api_key="test_key", project_id=None)

0 commit comments

Comments
 (0)