diff --git a/README.md b/README.md index d9ae57a30..13f4bf6bb 100644 --- a/README.md +++ b/README.md @@ -90,3 +90,6 @@ Some examples require extra dependencies. See each sample's directory for specif To run the tests: uv run poe test + +Note that this will skip running `openai_agents` tests against real OpenAI API calls if an API key is not found, and use only mocked models. +To run with real model calls, set `OPENAI_API_KEY` in your environment. diff --git a/tests/openai_agents/__init__.py b/tests/openai_agents/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/openai_agents/basic/test_agent_lifecycle_workflow.py b/tests/openai_agents/basic/test_agent_lifecycle_workflow.py new file mode 100644 index 000000000..c8f2cca36 --- /dev/null +++ b/tests/openai_agents/basic/test_agent_lifecycle_workflow.py @@ -0,0 +1,54 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.workflows.agent_lifecycle_workflow import ( + AgentLifecycleWorkflow, +) + + +def agent_lifecycle_test_model(): + return TestModel.returning_responses( + [ResponseBuilders.output_message('{"number": 10}')] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=agent_lifecycle_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[AgentLifecycleWorkflow], + activity_executor=ThreadPoolExecutor(5), + ): + result = await client.execute_workflow( + AgentLifecycleWorkflow.run, + 10, # max_number parameter + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result has the expected structure + assert isinstance(result.number, int) + assert ( + 0 <= result.number <= 20 + ) # Should be between 0 and max*2 due to multiply operation diff --git a/tests/openai_agents/basic/test_dynamic_system_prompt_workflow.py b/tests/openai_agents/basic/test_dynamic_system_prompt_workflow.py new file mode 100644 index 000000000..2fd155a25 --- /dev/null +++ b/tests/openai_agents/basic/test_dynamic_system_prompt_workflow.py @@ -0,0 +1,87 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.workflows.dynamic_system_prompt_workflow import ( + DynamicSystemPromptWorkflow, +) + + +def dynamic_system_prompt_test_model(): + return TestModel.returning_responses( + [ + ResponseBuilders.output_message( + "Style: haiku\nResponse: The weather is cloudy with a chance of meatballs." + ) + ] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow_with_random_style(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=dynamic_system_prompt_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[DynamicSystemPromptWorkflow], + activity_executor=ThreadPoolExecutor(5), + ): + result = await client.execute_workflow( + DynamicSystemPromptWorkflow.run, + "Tell me about the weather today.", + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result has the expected format + assert "Style:" in result + assert "Response:" in result + assert any(style in result for style in ["haiku", "pirate", "robot"]) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow_with_specific_style(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=dynamic_system_prompt_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[DynamicSystemPromptWorkflow], + activity_executor=ThreadPoolExecutor(5), + ): + result = await client.execute_workflow( + DynamicSystemPromptWorkflow.run, + args=["Tell me about the weather today.", "haiku"], + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result has the expected format and style + assert "Style: haiku" in result + assert "Response:" in result diff --git a/tests/openai_agents/basic/test_hello_world_workflow.py b/tests/openai_agents/basic/test_hello_world_workflow.py new file mode 100644 index 000000000..d461acc2f --- /dev/null +++ b/tests/openai_agents/basic/test_hello_world_workflow.py @@ -0,0 +1,52 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.workflows.hello_world_workflow import HelloWorldAgent + + +def hello_world_test_model(): + return TestModel.returning_responses( + [ResponseBuilders.output_message("This is a haiku (not really)")] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=hello_world_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[HelloWorldAgent], + activity_executor=ThreadPoolExecutor(5), + ): + result = await client.execute_workflow( + HelloWorldAgent.run, + "Write a recursive haiku about recursive haikus.", + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + if mock_model: + assert result == "This is a haiku (not really)" + else: + assert isinstance(result, str) + assert len(result) > 0 diff --git a/tests/openai_agents/basic/test_lifecycle_workflow.py b/tests/openai_agents/basic/test_lifecycle_workflow.py new file mode 100644 index 000000000..be38db196 --- /dev/null +++ b/tests/openai_agents/basic/test_lifecycle_workflow.py @@ -0,0 +1,52 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.workflows.lifecycle_workflow import LifecycleWorkflow + + +def lifecycle_test_model(): + return TestModel.returning_responses( + [ResponseBuilders.output_message('{"number": 10}')] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=lifecycle_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[LifecycleWorkflow], + activity_executor=ThreadPoolExecutor(5), + ): + result = await client.execute_workflow( + LifecycleWorkflow.run, + 10, # max_number parameter + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result has the expected structure + assert isinstance(result.number, int) + assert ( + 0 <= result.number <= 20 + ) # Should be between 0 and max*2 due to multiply operation diff --git a/tests/openai_agents/basic/test_local_image_workflow.py b/tests/openai_agents/basic/test_local_image_workflow.py new file mode 100644 index 000000000..b303de8b1 --- /dev/null +++ b/tests/openai_agents/basic/test_local_image_workflow.py @@ -0,0 +1,84 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.activities.image_activities import read_image_as_base64 +from openai_agents.basic.workflows.local_image_workflow import LocalImageWorkflow + + +def local_image_test_model(): + return TestModel.returning_responses( + [ResponseBuilders.output_message("I can see a bison in the image.")] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow_default_question(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=local_image_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[LocalImageWorkflow], + activity_executor=ThreadPoolExecutor(5), + activities=[read_image_as_base64], + ): + result = await client.execute_workflow( + LocalImageWorkflow.run, + "openai_agents/basic/media/image_bison.jpg", # Path to test image + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result is a string response + assert isinstance(result, str) + assert len(result) > 0 + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow_custom_question(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=local_image_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[LocalImageWorkflow], + activity_executor=ThreadPoolExecutor(5), + activities=[read_image_as_base64], + ): + custom_question = "What animals do you see in this image?" + result = await client.execute_workflow( + LocalImageWorkflow.run, + args=["openai_agents/basic/media/image_bison.jpg", custom_question], + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result is a string response + assert isinstance(result, str) + assert len(result) > 0 diff --git a/tests/openai_agents/basic/test_non_strict_output_workflow.py b/tests/openai_agents/basic/test_non_strict_output_workflow.py new file mode 100644 index 000000000..779f8b845 --- /dev/null +++ b/tests/openai_agents/basic/test_non_strict_output_workflow.py @@ -0,0 +1,70 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.workflows.non_strict_output_workflow import ( + NonStrictOutputWorkflow, +) + + +def non_strict_output_test_model(): + # NOTE: AgentOutputSchema (used in the workflow definition), has a schema where the outer + # object must be "response". Therefore, mocked model responses must use "response", just as the real model does. + return TestModel.returning_responses( + [ + ResponseBuilders.output_message( + '{"response": {"jokes": {"1": "Why do programmers prefer dark mode? Because light attracts bugs!", "2": "How many programmers does it take to change a light bulb? None, that\'s a hardware problem.", "3": "Why do Java developers wear glasses? Because they can\'t C#!"}}}' + ) + ] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=non_strict_output_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[NonStrictOutputWorkflow], + activity_executor=ThreadPoolExecutor(5), + # No external activities needed + ): + result = await client.execute_workflow( + NonStrictOutputWorkflow.run, + "Tell me 3 funny jokes about programming.", + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result has the expected structure + assert isinstance(result, dict) + + assert "strict_error" in result + assert "non_strict_result" in result + + # If there's a strict_error, it should be a string + if "strict_error" in result: + assert isinstance(result["strict_error"], str) + assert len(result["strict_error"]) > 0 + + jokes = result["non_strict_result"]["jokes"] + assert isinstance(jokes, dict) + assert isinstance(jokes[list(jokes.keys())[0]], str) diff --git a/tests/openai_agents/basic/test_previous_response_id_workflow.py b/tests/openai_agents/basic/test_previous_response_id_workflow.py new file mode 100644 index 000000000..e3dea5608 --- /dev/null +++ b/tests/openai_agents/basic/test_previous_response_id_workflow.py @@ -0,0 +1,69 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.workflows.previous_response_id_workflow import ( + PreviousResponseIdWorkflow, +) + + +def previous_response_id_test_model(): + return TestModel.returning_responses( + [ + ResponseBuilders.output_message("The capital of France is Paris."), + ResponseBuilders.output_message( + "Paris has a population of approximately 2.1 million people within the city proper." + ), + ] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=previous_response_id_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[PreviousResponseIdWorkflow], + activity_executor=ThreadPoolExecutor(5), + ): + first_question = "What is the capital of France?" + follow_up_question = "What is the population of that city?" + + result = await client.execute_workflow( + PreviousResponseIdWorkflow.run, + args=[first_question, follow_up_question], + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result is a tuple with two string responses + assert isinstance(result, tuple) + assert len(result) == 2 + + first_response, second_response = result + assert isinstance(first_response, str) + assert isinstance(second_response, str) + assert len(first_response) > 0 + assert len(second_response) > 0 + + # The responses should be different (not identical) + assert first_response != second_response diff --git a/tests/openai_agents/basic/test_remote_image_workflow.py b/tests/openai_agents/basic/test_remote_image_workflow.py new file mode 100644 index 000000000..2bf611d7a --- /dev/null +++ b/tests/openai_agents/basic/test_remote_image_workflow.py @@ -0,0 +1,89 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.workflows.remote_image_workflow import RemoteImageWorkflow + + +def remote_image_test_model(): + return TestModel.returning_responses( + [ + ResponseBuilders.output_message( + "I can see the Golden Gate Bridge, a beautiful suspension bridge in San Francisco." + ) + ] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow_default_question(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=remote_image_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[RemoteImageWorkflow], + activity_executor=ThreadPoolExecutor(5), + ): + test_image_url = "https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg" + + result = await client.execute_workflow( + RemoteImageWorkflow.run, + test_image_url, + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result is a string response + assert isinstance(result, str) + assert len(result) > 0 + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow_custom_question(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=remote_image_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[RemoteImageWorkflow], + activity_executor=ThreadPoolExecutor(5), + ): + test_image_url = "https://upload.wikimedia.org/wikipedia/commons/0/0c/GoldenGateBridge-001.jpg" + custom_question = "What do you see in this image?" + + result = await client.execute_workflow( + RemoteImageWorkflow.run, + args=[test_image_url, custom_question], + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result is a string response + assert isinstance(result, str) + assert len(result) > 0 diff --git a/tests/openai_agents/basic/test_tools_workflow.py b/tests/openai_agents/basic/test_tools_workflow.py new file mode 100644 index 000000000..d0b5b5297 --- /dev/null +++ b/tests/openai_agents/basic/test_tools_workflow.py @@ -0,0 +1,57 @@ +import os +import uuid +from concurrent.futures import ThreadPoolExecutor + +import pytest +from temporalio.client import Client +from temporalio.contrib.openai_agents.testing import ( + AgentEnvironment, + ResponseBuilders, + TestModel, +) +from temporalio.worker import Worker + +from openai_agents.basic.activities.get_weather_activity import get_weather +from openai_agents.basic.workflows.tools_workflow import ToolsWorkflow + + +def tools_test_model(): + return TestModel.returning_responses( + [ + ResponseBuilders.tool_call('{"city": "New York"}', "get_weather"), + ResponseBuilders.output_message( + "The weather in New York is sunny with a temperature of 75°F." + ), + ] + ) + + +@pytest.mark.parametrize("mock_model", [True, False]) +async def test_execute_workflow(client: Client, mock_model: bool): + task_queue_name = str(uuid.uuid4()) + if not mock_model and not os.environ.get("OPENAI_API_KEY"): + pytest.skip( + f"Skipping test (mock_model={mock_model}), because OPENAI_API_KEY is not set" + ) + + async with AgentEnvironment( + model=tools_test_model() if mock_model else None + ) as agent_env: + client = agent_env.applied_on_client(client) + async with Worker( + client, + task_queue=task_queue_name, + workflows=[ToolsWorkflow], + activity_executor=ThreadPoolExecutor(5), + activities=[get_weather], + ): + result = await client.execute_workflow( + ToolsWorkflow.run, + "What's the weather like in New York?", + id=str(uuid.uuid4()), + task_queue=task_queue_name, + ) + + # Verify the result is a string response + assert isinstance(result, str) + assert len(result) > 0 diff --git a/uv.lock b/uv.lock index 73424b77c..594bd343d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10" resolution-markers = [ "python_full_version >= '3.13'", @@ -2098,6 +2098,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ac/4b/7c400506ec484ec999b10133aa8e31af39dfc727042dc6944cd45fd927d0/pytest_asyncio-0.18.3-py3-none-any.whl", hash = "sha256:8fafa6c52161addfd41ee7ab35f11836c5a16ec208f93ee388f752bea3493a84", size = 14597, upload-time = "2022-03-25T09:43:57.106Z" }, ] +[[package]] +name = "pytest-mock" +version = "3.15.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" }, +] + [[package]] name = "pytest-pretty" version = "1.3.0" @@ -2651,6 +2663,7 @@ dev = [ { name = "pyright" }, { name = "pytest" }, { name = "pytest-asyncio" }, + { name = "pytest-mock" }, { name = "pytest-pretty" }, { name = "types-pyyaml" }, ] @@ -2718,6 +2731,7 @@ dev = [ { name = "pyright", specifier = ">=1.1.394" }, { name = "pytest", specifier = ">=7.1.2,<8" }, { name = "pytest-asyncio", specifier = ">=0.18.3,<0.19" }, + { name = "pytest-mock", specifier = ">=3.15.1" }, { name = "pytest-pretty", specifier = ">=1.3.0" }, { name = "types-pyyaml", specifier = ">=6.0.12.20241230,<7" }, ]