Skip to content

Commit 24a6ba3

Browse files
committed
Merge branch 'derekx/quick-start' of https://github.com/eval-protocol/python-sdk into derekx/quick-start
2 parents 5175cfa + 6ef8e45 commit 24a6ba3

File tree

3 files changed

+51
-1
lines changed

3 files changed

+51
-1
lines changed

tests/chinook/pydantic/agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def setup_agent(orchestrator_agent_model: Model):
2727
"""
2828

2929
agent = Agent(
30-
system_prompt=SYSTEM_PROMPT,
30+
instructions=SYSTEM_PROMPT,
3131
model=orchestrator_agent_model,
3232
instrument=True,
3333
)

tests/chinook/pydantic/test_pydantic_chinook.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ async def test_simple_query(row: EvaluationRow) -> EvaluationRow:
6161
assert hasattr(row, "tools"), "Row missing 'tools' attribute"
6262
assert row.tools == expected_tools, f"Tools validation failed. Expected: {expected_tools}, Got: {row.tools}"
6363

64+
# assert that there is a system message
65+
assert row.messages[0].role == "system"
66+
6467
last_assistant_message = row.last_assistant_message()
6568
if last_assistant_message is None:
6669
row.evaluation_result = EvaluateResult(
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from collections.abc import Awaitable, Callable
2+
import os
3+
from typing_extensions import cast
4+
from pydantic_ai import Agent
5+
from pydantic_ai.models.openai import OpenAIResponsesModel, OpenAIResponsesModelSettings
6+
import pytest
7+
8+
from eval_protocol.models import EvaluationRow
9+
from eval_protocol.pytest import evaluation_test
10+
from eval_protocol.pytest.types import RolloutProcessorConfig
11+
from tests.chinook.dataset import collect_dataset
12+
from tests.chinook.pydantic.agent import setup_agent
13+
from tests.pytest.test_pydantic_agent import PydanticAgentRolloutProcessor
14+
15+
# IMPORTANT: import must be renamed to something without the "test_" prefix to
16+
# avoid pytest discovering the import as a test
17+
from tests.chinook.pydantic.test_pydantic_complex_queries import test_pydantic_complex_queries as eval
18+
19+
20+
def agent_factory(config: RolloutProcessorConfig) -> Agent:
21+
model_name = config.completion_params["model"]
22+
model_settings = OpenAIResponsesModelSettings()
23+
model = OpenAIResponsesModel(model_name)
24+
return setup_agent(model)
25+
26+
27+
@pytest.mark.skipif(
28+
os.environ.get("CI") == "true",
29+
reason="This was only run locally to generate traces in Responses API",
30+
)
31+
@pytest.mark.asyncio
32+
@evaluation_test(
33+
input_rows=[collect_dataset()],
34+
completion_params=[
35+
{
36+
"model": "gpt-4o",
37+
},
38+
],
39+
rollout_processor=PydanticAgentRolloutProcessor(agent_factory),
40+
)
41+
async def test_pydantic_complex_queries_responses(row: EvaluationRow) -> EvaluationRow:
42+
"""
43+
Evaluation of complex queries for the Chinook database using PydanticAI
44+
"""
45+
casted_evaluation_test = cast(Callable[[EvaluationRow], Awaitable[EvaluationRow]], eval)
46+
evaluated_row = await casted_evaluation_test(row)
47+
return evaluated_row

0 commit comments

Comments
 (0)