From c055b6a01cacae0e12507bff516d07d97d214e2d Mon Sep 17 00:00:00 2001 From: DavidKoleczek <45405824+DavidKoleczek@users.noreply.github.com> Date: Thu, 10 Jul 2025 11:40:12 -0400 Subject: [PATCH] Move GCE based data generation to its own package --- .../.vscode/launch.json | 0 .../.vscode/settings.json | 0 .../Makefile | 0 .../README.md | 10 ++-- .../assistant_data_gen}/__init__.py | 0 .../assistant_data_gen}/assistant_api.py | 0 .../assistant_data_gen}/config.py | 2 +- .../assistant_data_gen}/gce/__init__.py | 0 .../assistant_data_gen}/gce/gce_agent.py | 4 +- .../assistant_data_gen}/gce/prompts.py | 0 .../assistant_data_gen}/pydantic_ai_utils.py | 0 .../document_assistant_example_config.yaml | 2 +- .../pyproject.toml | 4 +- .../scripts/gce_simulation.py | 6 +-- .../scripts/generate_scenario.py | 4 +- .../uv.lock | 50 +++++++++---------- 16 files changed, 40 insertions(+), 42 deletions(-) rename libraries/python/{assistant-evaluations => assistant-data-gen}/.vscode/launch.json (100%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/.vscode/settings.json (100%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/Makefile (100%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/README.md (84%) rename libraries/python/{assistant-evaluations/assistant_evaluations => assistant-data-gen/assistant_data_gen}/__init__.py (100%) rename libraries/python/{assistant-evaluations/assistant_evaluations => assistant-data-gen/assistant_data_gen}/assistant_api.py (100%) rename libraries/python/{assistant-evaluations/assistant_evaluations => assistant-data-gen/assistant_data_gen}/config.py (97%) rename libraries/python/{assistant-evaluations/assistant_evaluations => assistant-data-gen/assistant_data_gen}/gce/__init__.py (100%) rename libraries/python/{assistant-evaluations/assistant_evaluations => assistant-data-gen/assistant_data_gen}/gce/gce_agent.py (99%) rename libraries/python/{assistant-evaluations/assistant_evaluations => assistant-data-gen/assistant_data_gen}/gce/prompts.py (100%) rename libraries/python/{assistant-evaluations/assistant_evaluations => assistant-data-gen/assistant_data_gen}/pydantic_ai_utils.py (100%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/configs/document_assistant_example_config.yaml (99%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/pyproject.toml (91%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/scripts/gce_simulation.py (97%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/scripts/generate_scenario.py (98%) rename libraries/python/{assistant-evaluations => assistant-data-gen}/uv.lock (99%) diff --git a/libraries/python/assistant-evaluations/.vscode/launch.json b/libraries/python/assistant-data-gen/.vscode/launch.json similarity index 100% rename from libraries/python/assistant-evaluations/.vscode/launch.json rename to libraries/python/assistant-data-gen/.vscode/launch.json diff --git a/libraries/python/assistant-evaluations/.vscode/settings.json b/libraries/python/assistant-data-gen/.vscode/settings.json similarity index 100% rename from libraries/python/assistant-evaluations/.vscode/settings.json rename to libraries/python/assistant-data-gen/.vscode/settings.json diff --git a/libraries/python/assistant-evaluations/Makefile b/libraries/python/assistant-data-gen/Makefile similarity index 100% rename from libraries/python/assistant-evaluations/Makefile rename to libraries/python/assistant-data-gen/Makefile diff --git a/libraries/python/assistant-evaluations/README.md b/libraries/python/assistant-data-gen/README.md similarity index 84% rename from libraries/python/assistant-evaluations/README.md rename to libraries/python/assistant-data-gen/README.md index 92761d7b1..d07bb2e72 100644 --- a/libraries/python/assistant-evaluations/README.md +++ b/libraries/python/assistant-data-gen/README.md @@ -1,9 +1,8 @@ -# Evaluation -This is a work-in-progress tool for evaluating Semantic Workbench Assistants for quality. +# Data Generation +This is a tool for generating data for testing Semantic Workbench assistants. -## Automation and Data Generation -There is currently one part to this which is automation to populate a Workbench conversation automatically without human intervention. +The core functionality of this library is an automation to populate a Workbench conversation automatically without human intervention. This is implemented using a specialized version of the guided conversation engine (GCE). The GCE here focuses on the agenda and using an exact resource constraint to force the GCE to have a long running conversation. @@ -12,7 +11,7 @@ There is also a quick `generate_scenario.py` script that can be used to generate ### Setup 1. Run the workbench service running locally (at http://127.0.0.1:3000), an assistant service, and create the assistant you want to test. -2. Have LLM provider configured. Check [pydantic_ai_utils.py](./assistant_evaluations/pydantic_ai_utils.py) for an example of how it is configured for Pydantic AI. +2. Have LLM provider configured. Check [pydantic_ai_utils.py](./assistant_data_gen/pydantic_ai_utils.py) for an example of how it is configured for Pydantic AI. 1. For example, create a `.env` file with your Azure OpenAI endpoint set as `ASSISTANT__AZURE_OPENAI_ENDPOINT=` 3. Create a configuration file. See [document_assistant_example_config.yaml](./configs/document_assistant_example_config.yaml) for an example. 1. The scenarios field is a list that allows you to specify multiple test scenarios (different conversation paths). @@ -33,4 +32,3 @@ python scripts/generate_scenario.py --config path/to/custom_config.yaml ### Recommendations 1. Be as specific as possible with your conversation flows. Generic conversation flows and/or resource constraints that are too high can lead to the agents getting stuck in a thank you loop. - diff --git a/libraries/python/assistant-evaluations/assistant_evaluations/__init__.py b/libraries/python/assistant-data-gen/assistant_data_gen/__init__.py similarity index 100% rename from libraries/python/assistant-evaluations/assistant_evaluations/__init__.py rename to libraries/python/assistant-data-gen/assistant_data_gen/__init__.py diff --git a/libraries/python/assistant-evaluations/assistant_evaluations/assistant_api.py b/libraries/python/assistant-data-gen/assistant_data_gen/assistant_api.py similarity index 100% rename from libraries/python/assistant-evaluations/assistant_evaluations/assistant_api.py rename to libraries/python/assistant-data-gen/assistant_data_gen/assistant_api.py diff --git a/libraries/python/assistant-evaluations/assistant_evaluations/config.py b/libraries/python/assistant-data-gen/assistant_data_gen/config.py similarity index 97% rename from libraries/python/assistant-evaluations/assistant_evaluations/config.py rename to libraries/python/assistant-data-gen/assistant_data_gen/config.py index cb693952b..f9761df26 100644 --- a/libraries/python/assistant-evaluations/assistant_evaluations/config.py +++ b/libraries/python/assistant-data-gen/assistant_data_gen/config.py @@ -6,7 +6,7 @@ import yaml from pydantic import BaseModel, Field -from assistant_evaluations.gce.gce_agent import ResourceConstraintMode +from assistant_data_gen.gce.gce_agent import ResourceConstraintMode class ScenarioConfig(BaseModel): diff --git a/libraries/python/assistant-evaluations/assistant_evaluations/gce/__init__.py b/libraries/python/assistant-data-gen/assistant_data_gen/gce/__init__.py similarity index 100% rename from libraries/python/assistant-evaluations/assistant_evaluations/gce/__init__.py rename to libraries/python/assistant-data-gen/assistant_data_gen/gce/__init__.py diff --git a/libraries/python/assistant-evaluations/assistant_evaluations/gce/gce_agent.py b/libraries/python/assistant-data-gen/assistant_data_gen/gce/gce_agent.py similarity index 99% rename from libraries/python/assistant-evaluations/assistant_evaluations/gce/gce_agent.py rename to libraries/python/assistant-data-gen/assistant_data_gen/gce/gce_agent.py index b6a98c4c3..740668cc8 100644 --- a/libraries/python/assistant-evaluations/assistant_evaluations/gce/gce_agent.py +++ b/libraries/python/assistant-data-gen/assistant_data_gen/gce/gce_agent.py @@ -40,7 +40,7 @@ from pydantic_ai.providers.openai import OpenAIProvider from pydantic_ai.tools import ToolDefinition -from assistant_evaluations.gce.prompts import ( +from assistant_data_gen.gce.prompts import ( AGENDA_SYSTEM_PROMPT, CONVERSATION_SYSTEM_PROMPT, FIRST_USER_MESSAGE, @@ -49,7 +49,7 @@ TERMINATION_INSTRUCTIONS_EXACT, TERMINATION_INSTRUCTIONS_MAXIMUM, ) -from assistant_evaluations.pydantic_ai_utils import create_model +from assistant_data_gen.pydantic_ai_utils import create_model class ResourceConstraintMode(Enum): diff --git a/libraries/python/assistant-evaluations/assistant_evaluations/gce/prompts.py b/libraries/python/assistant-data-gen/assistant_data_gen/gce/prompts.py similarity index 100% rename from libraries/python/assistant-evaluations/assistant_evaluations/gce/prompts.py rename to libraries/python/assistant-data-gen/assistant_data_gen/gce/prompts.py diff --git a/libraries/python/assistant-evaluations/assistant_evaluations/pydantic_ai_utils.py b/libraries/python/assistant-data-gen/assistant_data_gen/pydantic_ai_utils.py similarity index 100% rename from libraries/python/assistant-evaluations/assistant_evaluations/pydantic_ai_utils.py rename to libraries/python/assistant-data-gen/assistant_data_gen/pydantic_ai_utils.py diff --git a/libraries/python/assistant-evaluations/configs/document_assistant_example_config.yaml b/libraries/python/assistant-data-gen/configs/document_assistant_example_config.yaml similarity index 99% rename from libraries/python/assistant-evaluations/configs/document_assistant_example_config.yaml rename to libraries/python/assistant-data-gen/configs/document_assistant_example_config.yaml index 15f005cc8..0bfcdc238 100644 --- a/libraries/python/assistant-evaluations/configs/document_assistant_example_config.yaml +++ b/libraries/python/assistant-data-gen/configs/document_assistant_example_config.yaml @@ -1,5 +1,5 @@ general: - assistant_name: "Document Assistant 6-20 v1" + assistant_name: "Document Assistant 7-7 v1" conversation_title: "GCE - Auto-Generated Conversation" assistant_details: >- The Document assistant you are talking with help you with things like web search and writing documents. diff --git a/libraries/python/assistant-evaluations/pyproject.toml b/libraries/python/assistant-data-gen/pyproject.toml similarity index 91% rename from libraries/python/assistant-evaluations/pyproject.toml rename to libraries/python/assistant-data-gen/pyproject.toml index ca0c9ee52..69f745fc5 100644 --- a/libraries/python/assistant-evaluations/pyproject.toml +++ b/libraries/python/assistant-data-gen/pyproject.toml @@ -1,7 +1,7 @@ [project] -name = "assistant-evaluations" +name = "assistant_data_gen" version = "0.1.0" -description = "Assistant evaluations" +description = "Assistant Data Generation" authors = [{ name = "Semantic Workbench Team" }] readme = "README.md" requires-python = ">=3.11" diff --git a/libraries/python/assistant-evaluations/scripts/gce_simulation.py b/libraries/python/assistant-data-gen/scripts/gce_simulation.py similarity index 97% rename from libraries/python/assistant-evaluations/scripts/gce_simulation.py rename to libraries/python/assistant-data-gen/scripts/gce_simulation.py index 006915a4c..002fcbb77 100644 --- a/libraries/python/assistant-evaluations/scripts/gce_simulation.py +++ b/libraries/python/assistant-data-gen/scripts/gce_simulation.py @@ -6,15 +6,15 @@ import time from pathlib import Path -from assistant_evaluations.assistant_api import ( +from assistant_data_gen.assistant_api import ( create_test_jwt_token, get_all_messages, get_assistant, get_user_from_workbench_db, poll_assistant_status, ) -from assistant_evaluations.config import EvaluationConfig -from assistant_evaluations.gce.gce_agent import ( +from assistant_data_gen.config import EvaluationConfig +from assistant_data_gen.gce.gce_agent import ( Agenda, GuidedConversationInput, GuidedConversationState, diff --git a/libraries/python/assistant-evaluations/scripts/generate_scenario.py b/libraries/python/assistant-data-gen/scripts/generate_scenario.py similarity index 98% rename from libraries/python/assistant-evaluations/scripts/generate_scenario.py rename to libraries/python/assistant-data-gen/scripts/generate_scenario.py index bd3906fee..d3863f1df 100644 --- a/libraries/python/assistant-evaluations/scripts/generate_scenario.py +++ b/libraries/python/assistant-data-gen/scripts/generate_scenario.py @@ -14,8 +14,8 @@ import asyncio from pathlib import Path -from assistant_evaluations.config import EvaluationConfig -from assistant_evaluations.pydantic_ai_utils import create_model +from assistant_data_gen.config import EvaluationConfig +from assistant_data_gen.pydantic_ai_utils import create_model from dotenv import load_dotenv from liquid import render from pydantic import BaseModel, Field diff --git a/libraries/python/assistant-evaluations/uv.lock b/libraries/python/assistant-data-gen/uv.lock similarity index 99% rename from libraries/python/assistant-evaluations/uv.lock rename to libraries/python/assistant-data-gen/uv.lock index 5dc480983..8fec47dba 100644 --- a/libraries/python/assistant-evaluations/uv.lock +++ b/libraries/python/assistant-data-gen/uv.lock @@ -181,31 +181,7 @@ wheels = [ ] [[package]] -name = "assistant-drive" -version = "0.1.0" -source = { editable = "../assistant-drive" } -dependencies = [ - { name = "pydantic" }, - { name = "pydantic-settings" }, -] - -[package.metadata] -requires-dist = [ - { name = "pydantic", specifier = ">=2.6.1" }, - { name = "pydantic-settings", specifier = ">=2.5.2" }, -] - -[package.metadata.requires-dev] -dev = [ - { name = "ipykernel", specifier = ">=6.29.5" }, - { name = "pyright", specifier = ">=1.1.389" }, - { name = "pytest", specifier = ">=8.3.1" }, - { name = "pytest-asyncio", specifier = ">=0.23.8" }, - { name = "pytest-repeat", specifier = ">=0.9.3" }, -] - -[[package]] -name = "assistant-evaluations" +name = "assistant-data-gen" version = "0.1.0" source = { editable = "." } dependencies = [ @@ -239,6 +215,30 @@ requires-dist = [ [package.metadata.requires-dev] dev = [{ name = "pyright", specifier = ">=1.1.401" }] +[[package]] +name = "assistant-drive" +version = "0.1.0" +source = { editable = "../assistant-drive" } +dependencies = [ + { name = "pydantic" }, + { name = "pydantic-settings" }, +] + +[package.metadata] +requires-dist = [ + { name = "pydantic", specifier = ">=2.6.1" }, + { name = "pydantic-settings", specifier = ">=2.5.2" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "ipykernel", specifier = ">=6.29.5" }, + { name = "pyright", specifier = ">=1.1.389" }, + { name = "pytest", specifier = ">=8.3.1" }, + { name = "pytest-asyncio", specifier = ">=0.23.8" }, + { name = "pytest-repeat", specifier = ">=0.9.3" }, +] + [[package]] name = "assistant-extensions" version = "0.1.0"