microsoft · DavidKoleczek · Jul 10, 2025 · Jul 10, 2025
diff --git a/...assistant-evaluations/.vscode/launch.json → ...on/assistant-data-gen/.vscode/launch.json b/...assistant-evaluations/.vscode/launch.json → ...on/assistant-data-gen/.vscode/launch.json
diff --git a/...sistant-evaluations/.vscode/settings.json → .../assistant-data-gen/.vscode/settings.json b/...sistant-evaluations/.vscode/settings.json → .../assistant-data-gen/.vscode/settings.json
diff --git a/...ies/python/assistant-evaluations/Makefile → libraries/python/assistant-data-gen/Makefile b/...ies/python/assistant-evaluations/Makefile → libraries/python/assistant-data-gen/Makefile
diff --git a/...es/python/assistant-evaluations/README.md → ...aries/python/assistant-data-gen/README.md b/...es/python/assistant-evaluations/README.md → ...aries/python/assistant-data-gen/README.md
@@ -1,9 +1,8 @@
-# Evaluation
-This is a work-in-progress tool for evaluating Semantic Workbench Assistants for quality.
+# Data Generation
 
+This is a tool for generating data for testing Semantic Workbench assistants.
 
-## Automation and Data Generation
-There is currently one part to this which is automation to populate a Workbench conversation automatically without human intervention.
+The core functionality of this library is an automation to populate a Workbench conversation automatically without human intervention.
 This is implemented using a specialized version of the guided conversation engine (GCE).
 The GCE here focuses on the agenda and using an exact resource constraint to force the GCE to have a long running conversation.
 
@@ -12,7 +11,7 @@ There is also a quick `generate_scenario.py` script that can be used to generate
 ### Setup
 
 1. Run the workbench service running locally (at http://127.0.0.1:3000), an assistant service, and create the assistant you want to test.
-2. Have LLM provider configured. Check [pydantic_ai_utils.py](./assistant_evaluations/pydantic_ai_utils.py) for an example of how it is configured for Pydantic AI.
+2. Have LLM provider configured. Check [pydantic_ai_utils.py](./assistant_data_gen/pydantic_ai_utils.py) for an example of how it is configured for Pydantic AI.
    1. For example, create a `.env` file with your Azure OpenAI endpoint set as `ASSISTANT__AZURE_OPENAI_ENDPOINT=<your_endpoint>`
 3. Create a configuration file. See [document_assistant_example_config.yaml](./configs/document_assistant_example_config.yaml) for an example.
    1. The scenarios field is a list that allows you to specify multiple test scenarios (different conversation paths).
@@ -33,4 +32,3 @@ python scripts/generate_scenario.py --config path/to/custom_config.yaml
 
 ### Recommendations
 1. Be as specific as possible with your conversation flows. Generic conversation flows and/or resource constraints that are too high can lead to the agents getting stuck in a thank you loop.
-
diff --git a/...uations/assistant_evaluations/__init__.py → ...t-data-gen/assistant_data_gen/__init__.py b/...uations/assistant_evaluations/__init__.py → ...t-data-gen/assistant_data_gen/__init__.py
diff --git a/...ns/assistant_evaluations/assistant_api.py → ...a-gen/assistant_data_gen/assistant_api.py b/...ns/assistant_evaluations/assistant_api.py → ...a-gen/assistant_data_gen/assistant_api.py
diff --git a/...aluations/assistant_evaluations/config.py → ...ant-data-gen/assistant_data_gen/config.py b/...aluations/assistant_evaluations/config.py → ...ant-data-gen/assistant_data_gen/config.py
@@ -6,7 +6,7 @@
 import yaml
 from pydantic import BaseModel, Field
 
-from assistant_evaluations.gce.gce_agent import ResourceConstraintMode
+from assistant_data_gen.gce.gce_agent import ResourceConstraintMode
 
 
 class ScenarioConfig(BaseModel):

diff --git a/...ons/assistant_evaluations/gce/__init__.py → ...ta-gen/assistant_data_gen/gce/__init__.py b/...ons/assistant_evaluations/gce/__init__.py → ...ta-gen/assistant_data_gen/gce/__init__.py
diff --git a/...ns/assistant_evaluations/gce/gce_agent.py → ...a-gen/assistant_data_gen/gce/gce_agent.py b/...ns/assistant_evaluations/gce/gce_agent.py → ...a-gen/assistant_data_gen/gce/gce_agent.py
@@ -40,7 +40,7 @@
 from pydantic_ai.providers.openai import OpenAIProvider
 from pydantic_ai.tools import ToolDefinition
 
-from assistant_evaluations.gce.prompts import (
+from assistant_data_gen.gce.prompts import (
     AGENDA_SYSTEM_PROMPT,
     CONVERSATION_SYSTEM_PROMPT,
     FIRST_USER_MESSAGE,
@@ -49,7 +49,7 @@
     TERMINATION_INSTRUCTIONS_EXACT,
     TERMINATION_INSTRUCTIONS_MAXIMUM,
 )
-from assistant_evaluations.pydantic_ai_utils import create_model
+from assistant_data_gen.pydantic_ai_utils import create_model
 
 
 class ResourceConstraintMode(Enum):

diff --git a/...ions/assistant_evaluations/gce/prompts.py → ...ata-gen/assistant_data_gen/gce/prompts.py b/...ions/assistant_evaluations/gce/prompts.py → ...ata-gen/assistant_data_gen/gce/prompts.py
diff --git a/...ssistant_evaluations/pydantic_ai_utils.py → ...n/assistant_data_gen/pydantic_ai_utils.py b/...ssistant_evaluations/pydantic_ai_utils.py → ...n/assistant_data_gen/pydantic_ai_utils.py
diff --git a/...gs/document_assistant_example_config.yaml → ...gs/document_assistant_example_config.yaml b/...gs/document_assistant_example_config.yaml → ...gs/document_assistant_example_config.yaml
@@ -1,5 +1,5 @@
 general:
-  assistant_name: "Document Assistant 6-20 v1"
+  assistant_name: "Document Assistant 7-7 v1"
   conversation_title: "GCE - Auto-Generated Conversation"
   assistant_details: >-
     The Document assistant you are talking with help you with things like web search and writing documents.

diff --git a/...thon/assistant-evaluations/pyproject.toml → .../python/assistant-data-gen/pyproject.toml b/...thon/assistant-evaluations/pyproject.toml → .../python/assistant-data-gen/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
-name = "assistant-evaluations"
+name = "assistant_data_gen"
 version = "0.1.0"
-description = "Assistant evaluations"
+description = "Assistant Data Generation"
 authors = [{ name = "Semantic Workbench Team" }]
 readme = "README.md"
 requires-python = ">=3.11"

diff --git a/...ant-evaluations/scripts/gce_simulation.py → ...istant-data-gen/scripts/gce_simulation.py b/...ant-evaluations/scripts/gce_simulation.py → ...istant-data-gen/scripts/gce_simulation.py
@@ -6,15 +6,15 @@
 import time
 from pathlib import Path
 
-from assistant_evaluations.assistant_api import (
+from assistant_data_gen.assistant_api import (
     create_test_jwt_token,
     get_all_messages,
     get_assistant,
     get_user_from_workbench_db,
     poll_assistant_status,
 )
-from assistant_evaluations.config import EvaluationConfig
-from assistant_evaluations.gce.gce_agent import (
+from assistant_data_gen.config import EvaluationConfig
+from assistant_data_gen.gce.gce_agent import (
     Agenda,
     GuidedConversationInput,
     GuidedConversationState,

diff --git a/...-evaluations/scripts/generate_scenario.py → ...ant-data-gen/scripts/generate_scenario.py b/...-evaluations/scripts/generate_scenario.py → ...ant-data-gen/scripts/generate_scenario.py
@@ -14,8 +14,8 @@
 import asyncio
 from pathlib import Path
 
-from assistant_evaluations.config import EvaluationConfig
-from assistant_evaluations.pydantic_ai_utils import create_model
+from assistant_data_gen.config import EvaluationConfig
+from assistant_data_gen.pydantic_ai_utils import create_model
 from dotenv import load_dotenv
 from liquid import render
 from pydantic import BaseModel, Field

diff --git a/...ries/python/assistant-evaluations/uv.lock → libraries/python/assistant-data-gen/uv.lock b/...ries/python/assistant-evaluations/uv.lock → libraries/python/assistant-data-gen/uv.lock