diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d291b6c0..03e836f9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -88,6 +88,7 @@ jobs: - name: Run Core Tests with pytest-xdist env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} E2B_API_KEY: ${{ secrets.E2B_API_KEY }} FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2dce407d..3302cfb9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,9 +7,9 @@ repos: rev: v6.0.0 hooks: - id: trailing-whitespace - exclude: "(^vite-app/|\\.snap$)" + exclude: "(^vite-app/|\\.snap$|\\.ambr$)" - id: end-of-file-fixer - exclude: "(^vite-app/|\\.snap$)" + exclude: "(^vite-app/|\\.snap$|\\.ambr$)" - id: check-yaml - id: check-added-large-files - id: check-merge-conflict diff --git a/eval_protocol/adapters/openai_responses.py b/eval_protocol/adapters/openai_responses.py new file mode 100644 index 00000000..8380ce06 --- /dev/null +++ b/eval_protocol/adapters/openai_responses.py @@ -0,0 +1,189 @@ +"""Langfuse adapter for Eval Protocol. + +This adapter allows pulling data from Langfuse deployments and converting it +to EvaluationRow format for use in evaluation pipelines. +""" + +from collections.abc import Iterable, Sequence +import logging +from typing import List +from typing_extensions import Any + +from openai.pagination import SyncCursorPage +from openai.types.chat.chat_completion_function_tool_param import ChatCompletionFunctionToolParam +from openai.types.chat.chat_completion_message import FunctionCall +from openai.types.responses import Response +from openai.types.responses.response_item import ResponseItem +from openai.types.chat.chat_completion_message_function_tool_call import ( + ChatCompletionMessageFunctionToolCall, + Function, +) +from openai.types.responses.tool import Tool + +from eval_protocol.models import EvaluationRow, InputMetadata, Message + +logger = logging.getLogger(__name__) + + +from openai import OpenAI + + +class OpenAIResponsesAdapter: + """Adapter to pull data from OpenAI Responses API and convert to EvaluationRow format. + + This adapter can pull both chat conversations and tool calling traces from + Langfuse deployments and convert them into the EvaluationRow format expected + by the evaluation protocol. + + Examples: + Basic usage: + >>> adapter = OpenAIResponsesAdapter( + ... api_key="your_api_key", + ... ) + >>> rows = list(adapter.get_evaluation_rows(respnse_ids=["response_id_1", "response_id_2"])) + """ + + def __init__(self, api_key: str | None = None, base_url: str | None = None): + """Initialize the OpenAI Responses adapter.""" + self.openai = OpenAI(api_key=api_key, base_url=base_url) + + def get_evaluation_rows( + self, + response_ids: List[str], + ) -> List[EvaluationRow]: + """Pull responses from OpenAI Responses API and convert to EvaluationRow format. + + Args: + response_ids: List of response IDs to fetch + Returns: + List[EvaluationRow]: Converted evaluation rows + """ + eval_rows: list[EvaluationRow] = [] + + for response_id in response_ids: + input_items = self.openai.responses.input_items.list(response_id=response_id) + response = self.openai.responses.retrieve(response_id=response_id) + eval_rows.append(self._create_evaluation_row(input_items, response)) + + logger.info( + "Successfully processed %d selected traces into %d evaluation rows", len(response_ids), len(eval_rows) + ) + return eval_rows + + def _create_evaluation_row(self, input_items: SyncCursorPage[ResponseItem], response: Response) -> EvaluationRow: + """Convert a response to an evaluation row.""" + messages: list[Message] = [] + if response.instructions: + if isinstance(response.instructions, list): + raise NotImplementedError("List of instructions is not supported") + else: + messages.append(Message(role="system", content=response.instructions)) + messages.extend(self._create_messages(input_items)) + if response.output_text: + messages.append(Message(role="assistant", content=response.output_text)) + tools = self._responses_tools_to_chat_completion_tools(response.tools) + tool_dicts = [dict(tool) for tool in tools] + return EvaluationRow( + messages=messages, + tools=tool_dicts, + input_metadata=InputMetadata( + completion_params={ + "model": response.model, + "temperature": response.temperature, + "max_output_tokens": response.max_output_tokens, + "max_tool_calls": response.max_tool_calls, + "parallel_tool_calls": response.parallel_tool_calls, + """ + We have to manually extract the reasoning effort and summary + from the response.reasoning object because the openai-python + causes an issue with model_dump() which is used for testing. + + https://github.com/openai/openai-python/issues/1306#issuecomment-2966267356 + """ + "reasoning": { + "effort": response.reasoning.effort, + "summary": response.reasoning.summary, + } + if response.reasoning + else None, + "top_logprobs": response.top_logprobs, + "truncation": response.truncation, + "top_p": response.top_p, + } + ), + ) + + def _responses_tools_to_chat_completion_tools( + self, tools: List[Tool] + ) -> Sequence[ChatCompletionFunctionToolParam]: + """Convert OpenAI Responses API tools to chat completion message function tool calls.""" + chat_completion_tools: List[ChatCompletionFunctionToolParam] = [] + for tool in tools: + if tool.type == "function": + chat_completion_tools.append( + { + "type": "function", + "function": { + "name": tool.name, + "parameters": tool.parameters or {}, + "strict": tool.strict, + "description": tool.description or "", + }, + } + ) + else: + raise NotImplementedError("Only function tools are supported") + return chat_completion_tools + + def _create_messages(self, input_items: SyncCursorPage[ResponseItem]) -> Iterable[Message]: + """Create messages from input items. + + Converts OpenAI Responses API input items to chat completion message format. + Handles different types of response items including messages and tool calls. + Groups parallel tool calls under a single assistant message. + Since we iterate backwards and reverse at the end, tool call outputs should + be added before the assistant message with tool calls. + """ + messages: list[Message] = [] + current_tool_calls: list[ChatCompletionMessageFunctionToolCall] = [] + tool_call_outputs: list[Message] = [] + + for item in input_items: + if item.type == "message": + # If we have accumulated tool calls, create an assistant message with them + if current_tool_calls: + # Add tool call outputs first (since we reverse at the end) + messages.extend(tool_call_outputs) + tool_call_outputs = [] + # Then add the assistant message with tool calls + messages.append(Message(role="assistant", tool_calls=current_tool_calls)) + current_tool_calls = [] + + # This is a message item (input or output) + content = item.content + for content_item in content: + if content_item.type == "input_text": + text_content = content_item.text + # Create new message + messages.append(Message(role=item.role, content=text_content)) + else: + raise NotImplementedError(f"Unsupported content type: {content_item.type}") + elif item.type == "function_call_output": + # Collect tool call outputs to add before assistant message + tool_call_outputs.append(Message(role="tool", content=item.output, tool_call_id=item.call_id)) + elif item.type == "function_call": + tool_call = ChatCompletionMessageFunctionToolCall( + id=item.call_id, type="function", function=Function(name=item.name, arguments=item.arguments) + ) + current_tool_calls.append(tool_call) + else: + raise NotImplementedError(f"Unsupported item type: {item.type}") + + # If we have remaining tool calls, create an assistant message with them + if current_tool_calls: + # Add tool call outputs first (since we reverse at the end) + messages.extend(tool_call_outputs) + # Then add the assistant message with tool calls + messages.append(Message(role="assistant", tool_calls=current_tool_calls)) + + return reversed(messages) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index e9a6ca39..2804db59 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -658,6 +658,13 @@ def last_assistant_message(self) -> Optional[Message]: return None return assistant_messages[-1] + def get_first_user_message(self) -> Optional[Message]: + """Returns the first user message from the conversation. Returns None if none found.""" + user_messages = self.get_user_messages() + if not user_messages: + return None + return user_messages[0] + def get_user_messages(self) -> List[Message]: """Returns only the user messages from the conversation.""" return [msg for msg in self.messages if msg.role == "user"] diff --git a/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py b/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py index 9b9a4353..47b7b456 100644 --- a/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py +++ b/eval_protocol/pytest/default_pydantic_ai_rollout_processor.py @@ -13,7 +13,7 @@ from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionMessageParam from openai.types.chat.chat_completion import Choice as ChatCompletionChoice from pydantic import TypeAdapter -from pydantic_ai import Agent +from pydantic_ai import Agent, ModelSettings from pydantic_ai._utils import generate_tool_call_id from pydantic_ai.messages import ModelMessage from pydantic_ai.messages import ( @@ -22,7 +22,7 @@ ToolReturnPart, UserPromptPart, ) -from pydantic_ai.models.openai import OpenAIChatModel +from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel from pydantic_ai.providers.openai import OpenAIProvider logger = logging.getLogger(__name__) @@ -46,7 +46,6 @@ def __call__(self, rows: list[EvaluationRow], config: RolloutProcessorConfig) -> """Create agent rollout tasks and return them for external handling.""" semaphore = config.semaphore - agent = self._setup_agent(config) async def process_row(row: EvaluationRow) -> EvaluationRow: @@ -70,7 +69,10 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: row.tools = tools model_messages = [self.convert_ep_message_to_pyd_message(m, row) for m in row.messages] - response = await agent.run(message_history=model_messages, usage_limits=config.kwargs.get("usage_limits")) + settings = self.construct_model_settings(agent, row) + response = await agent.run( + message_history=model_messages, usage_limits=config.kwargs.get("usage_limits"), model_settings=settings + ) row.messages = await self.convert_pyd_message_to_ep_message(response.all_messages()) # TODO: pydantic ai accumulates usage info across all models in multi-agent setup, so this simple tracking doesn't work for cost. to discuss with @dphuang2 when he's back. @@ -98,6 +100,28 @@ async def convert_pyd_message_to_ep_message(self, messages: list[ModelMessage]) oai_messages: list[ChatCompletionMessageParam] = await self._util._map_messages(messages) return [Message(**m) for m in oai_messages] # pyright: ignore[reportArgumentType] + def construct_model_settings(self, agent: Agent, row: EvaluationRow) -> ModelSettings: + model = agent.model + settings = None + if model and not isinstance(model, str) and model.settings: + # We must copy model settings to avoid concurrency issues by modifying the same object in-place + settings = model.settings.copy() + if settings is None: + settings = ModelSettings() + settings["extra_body"] = settings.get("extra_body", {}) + extra_body = settings["extra_body"] + + # Only store metadata for ResponsesModel, not for ChatModel + if isinstance(extra_body, dict) and isinstance(model, OpenAIResponsesModel): + extra_body["metadata"] = settings.get("metadata", {}) + extra_body["metadata"]["row_id"] = row.input_metadata.row_id + extra_body["metadata"]["invocation_id"] = row.execution_metadata.invocation_id + extra_body["metadata"]["rollout_id"] = row.execution_metadata.rollout_id + extra_body["metadata"]["run_id"] = row.execution_metadata.run_id + extra_body["metadata"]["experiment_id"] = row.execution_metadata.experiment_id + + return settings + def convert_ep_message_to_pyd_message(self, message: Message, row: EvaluationRow) -> ModelMessage: if message.role == "assistant": type_adapter = TypeAdapter(ChatCompletionMessage) diff --git a/eval_protocol/pytest/parameterize.py b/eval_protocol/pytest/parameterize.py index 030a80fb..cba8f65c 100644 --- a/eval_protocol/pytest/parameterize.py +++ b/eval_protocol/pytest/parameterize.py @@ -33,7 +33,7 @@ def generate_id(self, combo: CombinationTuple) -> str | None: class DefaultParameterIdGenerator: """Default ID generator that creates meaningful IDs from parameter combinations.""" - def __init__(self, max_length: int = 50): + def __init__(self, max_length: int = 200): """Initialize the ID generator with configuration options. Args: @@ -45,13 +45,16 @@ def generate_id(self, combo: CombinationTuple) -> str | None: """Generate an ID for a parameter combination.""" dataset, completion_params, messages, rows, evaluation_test_kwargs = combo - # Add model name if available if completion_params: - model = completion_params.get("model") - if model: - # Extract just the model name, not the full path - model_name = model.split("/")[-1] if "/" in model else model - id_str = f"model-{model_name}" + # Get all string, numeric, and boolean values from completion_params, sorted by key + str_values = [] + for key in sorted(completion_params.keys()): + value = completion_params[key] + if isinstance(value, (str, int, float, bool)): + str_values.append(str(value)) + + if str_values: + id_str = ":".join(str_values) # Truncate if too long if len(id_str) > self.max_length: diff --git a/eval_protocol/quickstart/__init__.py b/eval_protocol/quickstart/__init__.py new file mode 100644 index 00000000..24592748 --- /dev/null +++ b/eval_protocol/quickstart/__init__.py @@ -0,0 +1,4 @@ +from .llm_judge import aha_judge +from .utils import split_multi_turn_rows + +__all__ = ["aha_judge"] diff --git a/eval_protocol/quickstart/llm_judge.py b/eval_protocol/quickstart/llm_judge.py index 3978d46e..7cd03647 100644 --- a/eval_protocol/quickstart/llm_judge.py +++ b/eval_protocol/quickstart/llm_judge.py @@ -2,9 +2,11 @@ Default LLM judge for Eval Protocol. Inspired by Arena-Hard-Auto. """ +from collections.abc import Awaitable, Callable import os from datetime import datetime from typing import List, Dict, Any, Optional +from typing_extensions import cast from tqdm import tqdm import pytest @@ -55,6 +57,10 @@ mode="all", ) async def test_llm_judge(rows: list[EvaluationRow]) -> list[EvaluationRow]: + return await aha_judge(rows) + + +async def aha_judge(rows: list[EvaluationRow], judge_name: str = "gemini-2.5-pro") -> list[EvaluationRow]: """ LLM Judge evaluation using Arena-Hard-Auto style pairwise comparisons. @@ -72,8 +78,6 @@ async def test_llm_judge(rows: list[EvaluationRow]) -> list[EvaluationRow]: Same rows with updated evaluation_result containing scores and judgments """ - judge_name = "gemini-2.5-pro" # Edit to which judge you'd like to use. Configs are in utils.py. - if not rows: print("❌ No evaluation rows provided") return rows diff --git a/eval_protocol/quickstart/llm_judge_langsmith.py b/eval_protocol/quickstart/llm_judge_langsmith.py index f4efb7f5..16a287a9 100644 --- a/eval_protocol/quickstart/llm_judge_langsmith.py +++ b/eval_protocol/quickstart/llm_judge_langsmith.py @@ -22,6 +22,7 @@ import os from typing import Any, Dict, List, Optional +from openai import AsyncOpenAI import pytest from eval_protocol.models import EvaluationRow, Message, EvaluateResult, MetricResult @@ -31,7 +32,7 @@ split_multi_turn_rows, JUDGE_CONFIGS, calculate_bootstrap_scores, - run_judgment, + run_judgment_async, ) from eval_protocol.adapters.langsmith import LangSmithAdapter @@ -91,10 +92,15 @@ async def test_llm_judge_langsmith(rows: List[EvaluationRow]) -> List[Evaluation judgments: List[Dict[str, Any]] = [] - for row in rows: - result = run_judgment(row, model_name, judge_name) - if result and result["games"][0] and result["games"][1]: - judgments.append(result) + judge_config = JUDGE_CONFIGS[judge_name] + + async with AsyncOpenAI( + api_key=judge_config.get("api_key"), base_url=judge_config.get("base_url") + ) as shared_client: + for row in rows: + result = await run_judgment_async(row, model_name, judge_name, shared_client) + if result and result["games"][0] and result["games"][1]: + judgments.append(result) if not judgments: print("❌ No valid judgments generated") @@ -102,7 +108,12 @@ async def test_llm_judge_langsmith(rows: List[EvaluationRow]) -> List[Evaluation print(f"✅ Generated {len(judgments)} valid judgments") - mean_score, lower_score, upper_score = calculate_bootstrap_scores(judgments) + result = calculate_bootstrap_scores(judgments) + if not result: + print("❌ No valid scores extracted") + return rows + + mean_score, lower_score, upper_score = result if mean_score == 0.0: print("❌ No valid scores extracted") return rows diff --git a/eval_protocol/quickstart/llm_judge_openai_responses.py b/eval_protocol/quickstart/llm_judge_openai_responses.py new file mode 100644 index 00000000..056abfc3 --- /dev/null +++ b/eval_protocol/quickstart/llm_judge_openai_responses.py @@ -0,0 +1,55 @@ +""" +LLM Judge quickstart that PULLS DATA FROM OpenAI Responses API and persists results locally via Eval Protocol. + +This mirrors `eval_protocol/quickstart/llm_judge.py` (Langfuse source), but uses +OpenAI Responses API as the source of evaluation rows. + +Env vars: + export OPENAI_API_KEY=... # required to fetch examples + +Judge model keys: + - Default judge is "gemini-2.5-pro" from utils; requires GEMINI_API_KEY + - Or set judge in the code to "gpt-4.1" and export OPENAI_API_KEY + +Run: + pytest python-sdk/eval_protocol/quickstart/llm_judge_openai_responses.py -q -s +""" + +import os +from typing import List + +import pytest + +from eval_protocol.models import EvaluationRow +from eval_protocol.pytest import evaluation_test +from eval_protocol.pytest.default_single_turn_rollout_process import SingleTurnRolloutProcessor +from eval_protocol.quickstart import aha_judge, split_multi_turn_rows +from eval_protocol.adapters.openai_responses import OpenAIResponsesAdapter + +adapter = OpenAIResponsesAdapter() +input_rows = adapter.get_evaluation_rows( + response_ids=[ + "resp_0e1b7db5d96e92470068c99506443c819e9305e92915d2405f", + "resp_05639dcaca074fbc0068c9946593b481908cac70075926d85c", + ] +) + + +@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Skip in CI") # pyright: ignore[reportAttributeAccessIssue] +@pytest.mark.asyncio # pyright: ignore[reportAttributeAccessIssue] +@evaluation_test( + input_rows=[input_rows], + completion_params=[ + { + "model": "fireworks_ai/accounts/fireworks/models/deepseek-v3p1", + }, + { + "model": "fireworks_ai/accounts/fireworks/models/kimi-k2-instruct-0905", + }, + ], + rollout_processor=SingleTurnRolloutProcessor(), + preprocess_fn=split_multi_turn_rows, + mode="all", +) +async def test_llm_judge_openai_responses(rows: List[EvaluationRow]) -> List[EvaluationRow]: + return await aha_judge(rows) diff --git a/eval_protocol/quickstart/utils.py b/eval_protocol/quickstart/utils.py index 9fda11b5..8b98d83e 100644 --- a/eval_protocol/quickstart/utils.py +++ b/eval_protocol/quickstart/utils.py @@ -6,6 +6,7 @@ from datetime import datetime import re from typing import List, Dict, Any, Optional +from openai import AsyncOpenAI import pandas as pd from eval_protocol.models import EvaluationRow, Message, EvaluateResult, MetricResult @@ -204,7 +205,7 @@ async def pairwise_judgment_async(question_text, answer_a, answer_b, tools, judg async def run_judgment_async( - row: EvaluationRow, model_name: str, judge_name: str, shared_client + row: EvaluationRow, model_name: str, judge_name: str, shared_client: AsyncOpenAI ) -> Optional[Dict[str, Any]]: """Async judgment using shared client to avoid cleanup issues.""" if not row.messages: diff --git a/pyproject.toml b/pyproject.toml index 4724328b..581e880b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,7 @@ dev = [ "jupyter>=1.1.1", "pip>=25.1.1", "haikus==0.3.8", + "syrupy>=4.0.0", ] trl = [ "torch>=1.9", diff --git a/tests/adapters/__snapshots__/test_openai_responses_adapter.ambr b/tests/adapters/__snapshots__/test_openai_responses_adapter.ambr new file mode 100644 index 00000000..ad45893b --- /dev/null +++ b/tests/adapters/__snapshots__/test_openai_responses_adapter.ambr @@ -0,0 +1,670 @@ +# serializer version: 1 +# name: test_openai_responses_adapter_with_real_response_parallel_tool_calls + list([ + dict({ + 'eval_metadata': None, + 'evaluation_result': None, + 'ground_truth': None, + 'input_metadata': dict({ + 'completion_params': dict({ + ''' + + We have to manually extract the reasoning effort and summary + from the response.reasoning object because the openai-python + causes an issue with model_dump() which is used for testing. + + https://github.com/openai/openai-python/issues/1306#issuecomment-2966267356 + reasoning + ''': dict({ + 'effort': 'medium', + 'summary': None, + }), + 'max_output_tokens': None, + 'max_tool_calls': None, + 'model': 'gpt-5-2025-08-07', + 'parallel_tool_calls': True, + 'temperature': 1.0, + 'top_logprobs': 0, + 'top_p': 1.0, + 'truncation': 'disabled', + }), + 'dataset_info': None, + 'row_id': None, + 'session_data': None, + }), + 'messages': list([ + dict({ + 'content': ''' + You are a helpful assistant that has access to the + Chinook database stored in a Postgres database. You have access to a tool to + execute SQL queries that you should use to answer questions. Your job is to + answer questions about the database. If you run into an error, you should try to + fix the query and try again. Here is the schema of the database: + + Schema: + table_name,column_name,data_type,is_nullable + album,album_id,integer,NO + album,title,character varying,NO + album,artist_id,integer,NO + artist,artist_id,integer,NO + artist,name,character varying,YES + customer,customer_id,integer,NO + customer,first_name,character varying,NO + customer,last_name,character varying,NO + customer,company,character varying,YES + customer,address,character varying,YES + customer,city,character varying,YES + customer,state,character varying,YES + customer,country,character varying,YES + customer,postal_code,character varying,YES + customer,phone,character varying,YES + customer,fax,character varying,YES + customer,email,character varying,NO + customer,support_rep_id,integer,YES + employee,employee_id,integer,NO + employee,last_name,character varying,NO + employee,first_name,character varying,NO + employee,title,character varying,YES + employee,reports_to,integer,YES + employee,birth_date,timestamp without time zone,YES + employee,hire_date,timestamp without time zone,YES + employee,address,character varying,YES + employee,city,character varying,YES + employee,state,character varying,YES + employee,country,character varying,YES + employee,postal_code,character varying,YES + employee,phone,character varying,YES + employee,fax,character varying,YES + employee,email,character varying,YES + genre,genre_id,integer,NO + genre,name,character varying,YES + invoice,invoice_id,integer,NO + invoice,customer_id,integer,NO + invoice,invoice_date,timestamp without time zone,NO + invoice,billing_address,character varying,YES + invoice,billing_city,character varying,YES + invoice,billing_state,character varying,YES + invoice,billing_country,character varying,YES + invoice,billing_postal_code,character varying,YES + invoice,total,numeric,NO + invoice_line,invoice_line_id,integer,NO + invoice_line,invoice_id,integer,NO + invoice_line,track_id,integer,NO + invoice_line,unit_price,numeric,NO + invoice_line,quantity,integer,NO + media_type,media_type_id,integer,NO + media_type,name,character varying,YES + playlist,playlist_id,integer,NO + playlist,name,character varying,YES + playlist_track,playlist_id,integer,NO + playlist_track,track_id,integer,NO + track,track_id,integer,NO + track,name,character varying,NO + track,album_id,integer,YES + track,media_type_id,integer,NO + track,genre_id,integer,YES + track,composer,character varying,YES + track,milliseconds,integer,NO + track,bytes,integer,YES + track,unit_price,numeric,NO + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'system', + 'tool_call_id': None, + 'tool_calls': None, + }), + dict({ + 'content': ''' + Calculate customer lifetime value segments and predict churn risk. Group + customers into segments based on spending patterns and calculate days since last + purchase to identify churn risk. + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'user', + 'tool_call_id': None, + 'tool_calls': None, + }), + dict({ + 'content': '', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'assistant', + 'tool_call_id': None, + 'tool_calls': list([ + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase_raw,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n CASE \\n WHEN days_since_last_purchase_raw IS NULL THEN NULL\\n ELSE GREATEST(0, days_since_last_purchase_raw)\\n END AS days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) > 365 THEN \'High Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n days_since_last_purchase,\\n lv_segment,\\n churn_risk\\nFROM segmented\\nWHERE churn_risk = \'High Risk\'\\nORDER BY days_since_last_purchase DESC, lifetime_value DESC\\nLIMIT 10;"}', + 'name': 'execute_sql', + }), + 'id': 'call_QklYCtJeAzMn8iMshl0PaYXg', + 'type': 'function', + }), + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase_raw,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n CASE \\n WHEN days_since_last_purchase_raw IS NULL THEN NULL\\n ELSE GREATEST(0, days_since_last_purchase_raw)\\n END AS days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) > 365 THEN \'High Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT \\n lv_segment,\\n COUNT(*) AS customers,\\n ROUND(AVG(lifetime_value)::numeric, 2) AS avg_ltv,\\n ROUND(AVG(COALESCE(days_since_last_purchase, 0))::numeric, 1) AS avg_days_since_last_purchase,\\n SUM(CASE WHEN churn_risk = \'High Risk\' THEN 1 ELSE 0 END) AS high_risk_customers\\nFROM segmented\\nGROUP BY lv_segment\\nORDER BY \\n CASE lv_segment WHEN \'High\' THEN 1 WHEN \'Medium\' THEN 2 WHEN \'Low\' THEN 3 WHEN \'No Spend\' THEN 4 ELSE 5 END;"}', + 'name': 'execute_sql', + }), + 'id': 'call_8yfYxjCJFppcVcjnmKbTBxoj', + 'type': 'function', + }), + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase_raw,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n CASE \\n WHEN days_since_last_purchase_raw IS NULL THEN NULL\\n ELSE GREATEST(0, days_since_last_purchase_raw)\\n END AS days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) > 365 THEN \'High Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT *\\nFROM segmented\\nORDER BY lifetime_value DESC, customer_id;"}', + 'name': 'execute_sql', + }), + 'id': 'call_pyQOTPrrGRT0IuljYLkbeb0a', + 'type': 'function', + }), + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase_raw,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n CASE \\n WHEN days_since_last_purchase_raw IS NULL THEN NULL\\n ELSE GREATEST(0, days_since_last_purchase_raw)\\n END AS days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) > 365 THEN \'High Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT \\n churn_risk,\\n COUNT(*) AS customers,\\n ROUND(AVG(lifetime_value)::numeric, 2) AS avg_ltv,\\n ROUND(AVG(COALESCE(days_since_last_purchase, 0))::numeric, 1) AS avg_days_since_last_purchase\\nFROM segmented\\nGROUP BY 1\\nORDER BY \\n CASE churn_risk WHEN \'High Risk\' THEN 1 WHEN \'Medium Risk\' THEN 2 WHEN \'Low Risk\' THEN 3 WHEN \'Active\' THEN 4 WHEN \'Churn Risk: No purchases\' THEN 5 ELSE 6 END;"}', + 'name': 'execute_sql', + }), + 'id': 'call_ufaloEQ87VReYLpLxeaf2EmV', + 'type': 'function', + }), + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase_raw,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n CASE \\n WHEN days_since_last_purchase_raw IS NULL THEN NULL\\n ELSE GREATEST(0, days_since_last_purchase_raw)\\n END AS days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) > 365 THEN \'High Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n days_since_last_purchase,\\n lv_segment,\\n churn_risk\\nFROM segmented\\nWHERE churn_risk = \'High Risk\'\\nORDER BY lifetime_value DESC, days_since_last_purchase DESC\\nLIMIT 10;"}', + 'name': 'execute_sql', + }), + 'id': 'call_C3MJguOJ62YJtQ4wN6duDsEV', + 'type': 'function', + }), + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase_raw,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n CASE \\n WHEN days_since_last_purchase_raw IS NULL THEN NULL\\n ELSE GREATEST(0, days_since_last_purchase_raw)\\n END AS days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) > 365 THEN \'High Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT \\n lv_segment,\\n COUNT(*) AS customers,\\n ROUND(AVG(lifetime_value)::numeric, 2) AS avg_ltv,\\n ROUND(AVG(COALESCE(days_since_last_purchase, 0))::numeric, 1) AS avg_days_since_last_purchase,\\n SUM(CASE WHEN churn_risk = \'High Risk\' THEN 1 ELSE 0 END) AS high_risk_customers\\nFROM segmented\\nGROUP BY lv_segment\\nORDER BY \\n CASE lv_segment WHEN \'High\' THEN 1 WHEN \'Medium\' THEN 2 WHEN \'Low\' THEN 3 WHEN \'No Spend\' THEN 4 ELSE 5 END;"}', + 'name': 'execute_sql', + }), + 'id': 'call_feR1zKfz6o7B50j6iCcRyLZI', + 'type': 'function', + }), + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase_raw,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n CASE \\n WHEN days_since_last_purchase_raw IS NULL THEN NULL\\n ELSE GREATEST(0, days_since_last_purchase_raw)\\n END AS days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) > 365 THEN \'High Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN (CASE WHEN days_since_last_purchase_raw IS NULL THEN NULL ELSE GREATEST(0, days_since_last_purchase_raw) END) BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT *\\nFROM segmented\\nORDER BY lifetime_value DESC, customer_id;"}', + 'name': 'execute_sql', + }), + 'id': 'call_PxEO2fausJ3iH9oOiqPKpaku', + 'type': 'function', + }), + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH customer_metrics AS (\\n SELECT \\n c.customer_id,\\n (c.first_name || \' \' || c.last_name) AS customer_name,\\n c.country,\\n COALESCE(SUM(i.total), 0) AS lifetime_value,\\n COUNT(i.invoice_id) AS orders,\\n COALESCE(AVG(i.total), 0) AS avg_order_value,\\n MAX(i.invoice_date) AS last_purchase_date\\n FROM customer c\\n LEFT JOIN invoice i ON i.customer_id = c.customer_id\\n GROUP BY c.customer_id, customer_name, c.country\\n),\\nscored AS (\\n SELECT \\n *,\\n CASE \\n WHEN last_purchase_date IS NULL THEN NULL\\n ELSE (current_date - last_purchase_date::date)\\n END AS days_since_last_purchase,\\n PERCENT_RANK() OVER (ORDER BY lifetime_value) AS lv_percent_rank\\n FROM customer_metrics\\n),\\nsegmented AS (\\n SELECT \\n customer_id,\\n customer_name,\\n country,\\n lifetime_value,\\n orders,\\n avg_order_value,\\n last_purchase_date,\\n days_since_last_purchase,\\n CASE \\n WHEN lifetime_value = 0 THEN \'No Spend\'\\n WHEN lv_percent_rank >= 0.8 THEN \'High\'\\n WHEN lv_percent_rank >= 0.5 THEN \'Medium\'\\n ELSE \'Low\'\\n END AS lv_segment,\\n CASE \\n WHEN last_purchase_date IS NULL THEN \'Churn Risk: No purchases\'\\n WHEN days_since_last_purchase > 365 THEN \'High Risk\'\\n WHEN days_since_last_purchase BETWEEN 181 AND 365 THEN \'Medium Risk\'\\n WHEN days_since_last_purchase BETWEEN 91 AND 180 THEN \'Low Risk\'\\n ELSE \'Active\'\\n END AS churn_risk\\n FROM scored\\n)\\nSELECT *\\nFROM segmented\\nORDER BY lifetime_value DESC, customer_id;"}', + 'name': 'execute_sql', + }), + 'id': 'call_XMf9acssGFrA4PQy1QsOyGJS', + 'type': 'function', + }), + ]), + }), + dict({ + 'content': ''' + | customer_id | customer_name | country | lifetime_value | orders | avg_order_value | last_purchase_date | days_since_last_purchase | lv_segment | churn_risk | + | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | + | 6 | Helena Holý | Czech Republic | 49.62 | 7 | 7.0885714285714286 | 2025-11-13 00:00:00 | -58 | High | Active | + | 26 | Richard Cunningham | USA | 47.62 | 7 | 6.8028571428571429 | 2025-04-05 00:00:00 | 164 | High | Low Risk | + | 57 | Luis Rojas | Chile | 46.62 | 7 | 6.6600000000000000 | 2024-10-14 00:00:00 | 337 | High | Medium Risk | + | 45 | Ladislav Kovács | Hungary | 45.62 | 7 | 6.5171428571428571 | 2025-07-20 00:00:00 | 58 | High | Active | + | 46 | Hugh O'Reilly | Ireland | 45.62 | 7 | 6.5171428571428571 | 2025-11-04 00:00:00 | -49 | High | Active | + | 24 | Frank Ralston | USA | 43.62 | 7 | 6.2314285714285714 | 2025-08-20 00:00:00 | 27 | High | Active | + | 28 | Julia Barnett | USA | 43.62 | 7 | 6.2314285714285714 | 2025-05-19 00:00:00 | 120 | High | Low Risk | + | 37 | Fynn Zimmermann | Germany | 43.62 | 7 | 6.2314285714285714 | 2025-06-03 00:00:00 | 105 | High | Low Risk | + | 7 | Astrid Gruber | Austria | 42.62 | 7 | 6.0885714285714286 | 2025-06-19 00:00:00 | 89 | High | Active | + | 25 | Victor Stevens | USA | 42.62 | 7 | 6.0885714285714286 | 2025-12-05 00:00:00 | -80 | High | Active | + | 44 | Terhi Hämäläinen | Finland | 41.62 | 7 | 5.9457142857142857 | 2025-12-14 00:00:00 | -89 | High | Active | + | 5 | František Wichterlová | Czech Republic | 40.62 | 7 | 5.8028571428571429 | 2025-05-06 00:00:00 | 133 | Medium | Low Risk | + | 43 | Isabelle Mercier | France | 40.62 | 7 | 5.8028571428571429 | 2025-06-06 00:00:00 | 102 | Medium | Low Risk | + | 48 | Johannes Van der Berg | Netherlands | 40.62 | 7 | 5.8028571428571429 | 2025-09-12 00:00:00 | 4 | Medium | Active | + | 1 | Luís Gonçalves | Brazil | 39.62 | 7 | 5.6600000000000000 | 2025-08-07 00:00:00 | 40 | Medium | Active | + | 3 | François Tremblay | Canada | 39.62 | 7 | 5.6600000000000000 | 2025-09-20 00:00:00 | -4 | Medium | Active | + | 4 | Bjørn Hansen | Norway | 39.62 | 7 | 5.6600000000000000 | 2025-10-03 00:00:00 | -17 | Medium | Active | + | 17 | Jack Smith | USA | 39.62 | 7 | 5.6600000000000000 | 2024-07-31 00:00:00 | 412 | Medium | High Risk | + | 20 | Dan Miller | USA | 39.62 | 7 | 5.6600000000000000 | 2025-11-21 00:00:00 | -66 | Medium | Active | + | 22 | Heather Leacock | USA | 39.62 | 7 | 5.6600000000000000 | 2025-07-07 00:00:00 | 71 | Medium | Active | + | 34 | João Fernandes | Portugal | 39.62 | 7 | 5.6600000000000000 | 2024-10-01 00:00:00 | 350 | Medium | Medium Risk | + | 42 | Wyatt Girard | France | 39.62 | 7 | 5.6600000000000000 | 2025-11-03 00:00:00 | -48 | Medium | Active | + | 15 | Jennifer Peterson | Canada | 38.62 | 7 | 5.5171428571428571 | 2024-12-15 00:00:00 | 275 | Medium | Medium Risk | + | 19 | Tim Goyer | USA | 38.62 | 7 | 5.5171428571428571 | 2024-09-13 00:00:00 | 368 | Medium | High Risk | + | 39 | Camille Bernard | France | 38.62 | 7 | 5.5171428571428571 | 2025-09-07 00:00:00 | 9 | Medium | Active | + | 40 | Dominique Lefebvre | France | 38.62 | 7 | 5.5171428571428571 | 2024-08-13 00:00:00 | 399 | Medium | High Risk | + | 51 | Joakim Johansson | Sweden | 38.62 | 7 | 5.5171428571428571 | 2024-12-02 00:00:00 | 288 | Medium | Medium Risk | + | 58 | Manoj Pareek | India | 38.62 | 7 | 5.5171428571428571 | 2025-12-22 00:00:00 | -97 | Medium | Active | + | 2 | Leonie Köhler | Germany | 37.62 | 7 | 5.3742857142857143 | 2024-07-13 00:00:00 | 430 | Low | High Risk | + | 8 | Daan Peeters | Belgium | 37.62 | 7 | 5.3742857142857143 | 2025-10-04 00:00:00 | -18 | Low | Active | + | 9 | Kara Nielsen | Denmark | 37.62 | 7 | 5.3742857142857143 | 2025-02-02 00:00:00 | 226 | Low | Medium Risk | + | 10 | Eduardo Martins | Brazil | 37.62 | 7 | 5.3742857142857143 | 2025-08-12 00:00:00 | 35 | Low | Active | + | 11 | Alexandre Rocha | Brazil | 37.62 | 7 | 5.3742857142857143 | 2025-03-18 00:00:00 | 182 | Low | Medium Risk | + | 12 | Roberto Almeida | Brazil | 37.62 | 7 | 5.3742857142857143 | 2025-10-05 00:00:00 | -19 | Low | Active | + | 13 | Fernanda Ramos | Brazil | 37.62 | 7 | 5.3742857142857143 | 2024-11-01 00:00:00 | 319 | Low | Medium Risk | + | 14 | Mark Philips | Canada | 37.62 | 7 | 5.3742857142857143 | 2025-05-11 00:00:00 | 128 | Low | Low Risk | + | 16 | Frank Harris | USA | 37.62 | 7 | 5.3742857142857143 | 2025-07-04 00:00:00 | 74 | Low | Active | + | 18 | Michelle Brooks | USA | 37.62 | 7 | 5.3742857142857143 | 2025-10-08 00:00:00 | -22 | Low | Active | + | 21 | Kathy Chase | USA | 37.62 | 7 | 5.3742857142857143 | 2025-12-04 00:00:00 | -79 | Low | Active | + | 23 | John Gordon | USA | 37.62 | 7 | 5.3742857142857143 | 2025-12-04 00:00:00 | -79 | Low | Active | + | 27 | Patrick Gray | USA | 37.62 | 7 | 5.3742857142857143 | 2025-10-13 00:00:00 | -27 | Low | Active | + | 29 | Robert Brown | Canada | 37.62 | 7 | 5.3742857142857143 | 2025-12-06 00:00:00 | -81 | Low | Active | + | 30 | Edward Francis | Canada | 37.62 | 7 | 5.3742857142857143 | 2025-01-02 00:00:00 | 257 | Low | Medium Risk | + | 31 | Martha Silk | Canada | 37.62 | 7 | 5.3742857142857143 | 2025-07-12 00:00:00 | 66 | Low | Active | + | 32 | Aaron Mitchell | Canada | 37.62 | 7 | 5.3742857142857143 | 2025-02-15 00:00:00 | 213 | Low | Medium Risk | + | 33 | Ellie Sullivan | Canada | 37.62 | 7 | 5.3742857142857143 | 2025-09-04 00:00:00 | 12 | Low | Active | + | 35 | Madalena Sampaio | Portugal | 37.62 | 7 | 5.3742857142857143 | 2025-12-09 00:00:00 | -84 | Low | Active | + | 36 | Hannah Schneider | Germany | 37.62 | 7 | 5.3742857142857143 | 2024-11-14 00:00:00 | 306 | Low | Medium Risk | + | 38 | Niklas Schröder | Germany | 37.62 | 7 | 5.3742857142857143 | 2024-06-30 00:00:00 | 443 | Low | High Risk | + | 41 | Marc Dubois | France | 37.62 | 7 | 5.3742857142857143 | 2025-10-21 00:00:00 | -35 | Low | Active | + | 47 | Lucas Mancini | Italy | 37.62 | 7 | 5.3742857142857143 | 2025-03-05 00:00:00 | 195 | Low | Medium Risk | + | 49 | Stanisław Wójcik | Poland | 37.62 | 7 | 5.3742857142857143 | 2025-04-18 00:00:00 | 151 | Low | Low Risk | + | 50 | Enrique Muñoz | Spain | 37.62 | 7 | 5.3742857142857143 | 2025-11-05 00:00:00 | -50 | Low | Active | + | 52 | Emma Jones | United Kingdom | 37.62 | 7 | 5.3742857142857143 | 2025-06-11 00:00:00 | 97 | Low | Low Risk | + | 53 | Phil Hughes | United Kingdom | 37.62 | 7 | 5.3742857142857143 | 2025-01-15 00:00:00 | 244 | Low | Medium Risk | + | 54 | Steve Murray | United Kingdom | 37.62 | 7 | 5.3742857142857143 | 2025-08-04 00:00:00 | 43 | Low | Active | + | 55 | Mark Taylor | Australia | 37.62 | 7 | 5.3742857142857143 | 2024-08-31 00:00:00 | 381 | Low | High Risk | + | 56 | Diego Gutiérrez | Argentina | 37.62 | 7 | 5.3742857142857143 | 2025-11-08 00:00:00 | -53 | Low | Active | + | 59 | Puja Srivastava | India | 36.64 | 6 | 6.1066666666666667 | 2024-05-30 00:00:00 | 474 | Low | High Risk | + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_XMf9acssGFrA4PQy1QsOyGJS', + 'tool_calls': None, + }), + dict({ + 'content': ''' + | churn_risk | customers | avg_ltv | avg_days_since_last_purchase | + | --- | --- | --- | --- | + | High Risk | 7 | 38.05 | 415.3 | + | Medium Risk | 12 | 38.70 | 266.0 | + | Low Risk | 8 | 41.12 | 125.0 | + | Active | 32 | 39.65 | 16.5 | + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_PxEO2fausJ3iH9oOiqPKpaku', + 'tool_calls': None, + }), + dict({ + 'content': 'No results found.', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_feR1zKfz6o7B50j6iCcRyLZI', + 'tool_calls': None, + }), + dict({ + 'content': 'No results found.', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_C3MJguOJ62YJtQ4wN6duDsEV', + 'tool_calls': None, + }), + dict({ + 'content': 'No results found.', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_ufaloEQ87VReYLpLxeaf2EmV', + 'tool_calls': None, + }), + dict({ + 'content': ''' + | customer_id | customer_name | country | lifetime_value | orders | avg_order_value | last_purchase_date | days_since_last_purchase | lv_segment | churn_risk | + | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | + | 59 | Puja Srivastava | India | 36.64 | 6 | 6.1066666666666667 | 2024-05-30 00:00:00 | 474 | Low | High Risk | + | 38 | Niklas Schröder | Germany | 37.62 | 7 | 5.3742857142857143 | 2024-06-30 00:00:00 | 443 | Low | High Risk | + | 2 | Leonie Köhler | Germany | 37.62 | 7 | 5.3742857142857143 | 2024-07-13 00:00:00 | 430 | Low | High Risk | + | 17 | Jack Smith | USA | 39.62 | 7 | 5.6600000000000000 | 2024-07-31 00:00:00 | 412 | Medium | High Risk | + | 40 | Dominique Lefebvre | France | 38.62 | 7 | 5.5171428571428571 | 2024-08-13 00:00:00 | 399 | Medium | High Risk | + | 55 | Mark Taylor | Australia | 37.62 | 7 | 5.3742857142857143 | 2024-08-31 00:00:00 | 381 | Low | High Risk | + | 19 | Tim Goyer | USA | 38.62 | 7 | 5.5171428571428571 | 2024-09-13 00:00:00 | 368 | Medium | High Risk | + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_pyQOTPrrGRT0IuljYLkbeb0a', + 'tool_calls': None, + }), + dict({ + 'content': 'No results found.', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_8yfYxjCJFppcVcjnmKbTBxoj', + 'tool_calls': None, + }), + dict({ + 'content': 'No results found.', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_QklYCtJeAzMn8iMshl0PaYXg', + 'tool_calls': None, + }), + dict({ + 'content': ''' + Here’s a CLV segmentation and churn-risk view built from invoices in the Chinook database. + + How it’s calculated + - Lifetime value (LTV): sum of all invoice totals per customer. + - Orders: count of invoices per customer; AOV: average invoice total. + - Days since last purchase: max(0, today − last invoice date). Future-dated invoices are treated as 0 days. + - LTV segments (percentile-based): + - High: top ~20% by LTV + - Medium: next ~30% + - Low: bottom ~50% + - No Spend: LTV = 0 + - Churn risk: + - Active: 0–90 days + - Low Risk: 91–180 days + - Medium Risk: 181–365 days + - High Risk: >365 days + - Churn Risk: No purchases (never bought) + + Churn-risk summary (all 59 customers) + - High Risk: 7 customers | Avg LTV: 38.05 | Avg days since last purchase: 415.3 + - Medium Risk: 12 customers | Avg LTV: 38.70 | Avg days since last purchase: 266.0 + - Low Risk: 8 customers | Avg LTV: 41.12 | Avg days since last purchase: 125.0 + - Active: 32 customers | Avg LTV: 39.65 | Avg days since last purchase: 16.5 + + Top high-risk customers to prioritize (longest time since last purchase) + - Puja Srivastava (India) — LTV 36.64 — 474 days — LV segment: Low — Churn risk: High + - Niklas Schröder (Germany) — LTV 37.62 — 443 days — LV segment: Low — Churn risk: High + - Leonie Köhler (Germany) — LTV 37.62 — 430 days — LV segment: Low — Churn risk: High + - Jack Smith (USA) — LTV 39.62 — 412 days — LV segment: Medium — Churn risk: High + - Dominique Lefebvre (France) — LTV 38.62 — 399 days — LV segment: Medium — Churn risk: High + - Mark Taylor (Australia) — LTV 37.62 — 381 days — LV segment: Low — Churn risk: High + - Tim Goyer (USA) — LTV 38.62 — 368 days — LV segment: Medium — Churn risk: High + + Notes and next steps + - The percent-rank approach keeps LTV segments relative to your customer base. As spending patterns shift, segment thresholds adjust automatically. + - If you want fixed-dollar LTV segments (e.g., Low < $30, Medium $30–$45, High > $45) or different churn thresholds, say the word and I’ll rerun the analysis. + - I can export the full customer-level table (customer, country, LTV, orders, AOV, last purchase date, days since last purchase, LTV segment, churn risk) as CSV if needed. + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'assistant', + 'tool_call_id': None, + 'tool_calls': None, + }), + ]), + 'pid': None, + 'rollout_status': dict({ + 'code': , + 'details': list([ + ]), + 'message': 'Rollout is running', + }), + 'tools': list([ + dict({ + 'function': dict({ + 'description': '', + 'name': 'execute_sql', + 'parameters': dict({ + 'additionalProperties': False, + 'properties': dict({ + 'query': dict({ + 'type': 'string', + }), + }), + 'required': list([ + 'query', + ]), + 'type': 'object', + }), + 'strict': True, + }), + 'type': 'function', + }), + ]), + }), + ]) +# --- +# name: test_openai_responses_adapter_with_real_response_simple + list([ + dict({ + 'eval_metadata': None, + 'evaluation_result': None, + 'ground_truth': None, + 'input_metadata': dict({ + 'completion_params': dict({ + ''' + + We have to manually extract the reasoning effort and summary + from the response.reasoning object because the openai-python + causes an issue with model_dump() which is used for testing. + + https://github.com/openai/openai-python/issues/1306#issuecomment-2966267356 + reasoning + ''': dict({ + 'effort': 'medium', + 'summary': None, + }), + 'max_output_tokens': None, + 'max_tool_calls': None, + 'model': 'gpt-5-2025-08-07', + 'parallel_tool_calls': True, + 'temperature': 1.0, + 'top_logprobs': 0, + 'top_p': 1.0, + 'truncation': 'disabled', + }), + 'dataset_info': None, + 'row_id': None, + 'session_data': None, + }), + 'messages': list([ + dict({ + 'content': ''' + You are a helpful assistant that has access to the + Chinook database stored in a Postgres database. You have access to a tool to + execute SQL queries that you should use to answer questions. Your job is to + answer questions about the database. If you run into an error, you should try to + fix the query and try again. Here is the schema of the database: + + Schema: + table_name,column_name,data_type,is_nullable + album,album_id,integer,NO + album,title,character varying,NO + album,artist_id,integer,NO + artist,artist_id,integer,NO + artist,name,character varying,YES + customer,customer_id,integer,NO + customer,first_name,character varying,NO + customer,last_name,character varying,NO + customer,company,character varying,YES + customer,address,character varying,YES + customer,city,character varying,YES + customer,state,character varying,YES + customer,country,character varying,YES + customer,postal_code,character varying,YES + customer,phone,character varying,YES + customer,fax,character varying,YES + customer,email,character varying,NO + customer,support_rep_id,integer,YES + employee,employee_id,integer,NO + employee,last_name,character varying,NO + employee,first_name,character varying,NO + employee,title,character varying,YES + employee,reports_to,integer,YES + employee,birth_date,timestamp without time zone,YES + employee,hire_date,timestamp without time zone,YES + employee,address,character varying,YES + employee,city,character varying,YES + employee,state,character varying,YES + employee,country,character varying,YES + employee,postal_code,character varying,YES + employee,phone,character varying,YES + employee,fax,character varying,YES + employee,email,character varying,YES + genre,genre_id,integer,NO + genre,name,character varying,YES + invoice,invoice_id,integer,NO + invoice,customer_id,integer,NO + invoice,invoice_date,timestamp without time zone,NO + invoice,billing_address,character varying,YES + invoice,billing_city,character varying,YES + invoice,billing_state,character varying,YES + invoice,billing_country,character varying,YES + invoice,billing_postal_code,character varying,YES + invoice,total,numeric,NO + invoice_line,invoice_line_id,integer,NO + invoice_line,invoice_id,integer,NO + invoice_line,track_id,integer,NO + invoice_line,unit_price,numeric,NO + invoice_line,quantity,integer,NO + media_type,media_type_id,integer,NO + media_type,name,character varying,YES + playlist,playlist_id,integer,NO + playlist,name,character varying,YES + playlist_track,playlist_id,integer,NO + playlist_track,track_id,integer,NO + track,track_id,integer,NO + track,name,character varying,NO + track,album_id,integer,YES + track,media_type_id,integer,NO + track,genre_id,integer,YES + track,composer,character varying,YES + track,milliseconds,integer,NO + track,bytes,integer,YES + track,unit_price,numeric,NO + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'system', + 'tool_call_id': None, + 'tool_calls': None, + }), + dict({ + 'content': 'Find all employees and their reporting hierarchy levels using a recursive CTE. Show employee name, level, and the complete hierarchy path from top to bottom.', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'user', + 'tool_call_id': None, + 'tool_calls': None, + }), + dict({ + 'content': '', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'assistant', + 'tool_call_id': None, + 'tool_calls': list([ + dict({ + 'function': dict({ + 'arguments': '{"query":"WITH RECURSIVE emp_tree AS (\\n SELECT \\n e.employee_id,\\n (e.first_name || \' \' || e.last_name) AS name,\\n e.reports_to,\\n 1 AS level,\\n (e.first_name || \' \' || e.last_name) AS path\\n FROM employee e\\n WHERE e.reports_to IS NULL\\n UNION ALL\\n SELECT \\n c.employee_id,\\n (c.first_name || \' \' || c.last_name) AS name,\\n c.reports_to,\\n et.level + 1 AS level,\\n (et.path || \' > \' || c.first_name || \' \' || c.last_name) AS path\\n FROM employee c\\n JOIN emp_tree et ON c.reports_to = et.employee_id\\n)\\nSELECT name, level, path\\nFROM emp_tree\\nORDER BY path;"}', + 'name': 'execute_sql', + }), + 'id': 'call_jWVUHFHBmrK1FpHIP7FqZmaO', + 'type': 'function', + }), + ]), + }), + dict({ + 'content': ''' + | name | level | path | + | --- | --- | --- | + | Andrew Adams | 1 | Andrew Adams | + | Michael Mitchell | 2 | Andrew Adams > Michael Mitchell | + | Laura Callahan | 3 | Andrew Adams > Michael Mitchell > Laura Callahan | + | Robert King | 3 | Andrew Adams > Michael Mitchell > Robert King | + | Nancy Edwards | 2 | Andrew Adams > Nancy Edwards | + | Jane Peacock | 3 | Andrew Adams > Nancy Edwards > Jane Peacock | + | Margaret Park | 3 | Andrew Adams > Nancy Edwards > Margaret Park | + | Steve Johnson | 3 | Andrew Adams > Nancy Edwards > Steve Johnson | + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'tool', + 'tool_call_id': 'call_jWVUHFHBmrK1FpHIP7FqZmaO', + 'tool_calls': None, + }), + dict({ + 'content': ''' + Here are all employees with their hierarchy level and full path from the top: + + - Andrew Adams — Level 1 — Path: Andrew Adams + - Michael Mitchell — Level 2 — Path: Andrew Adams > Michael Mitchell + - Laura Callahan — Level 3 — Path: Andrew Adams > Michael Mitchell > Laura Callahan + - Robert King — Level 3 — Path: Andrew Adams > Michael Mitchell > Robert King + - Nancy Edwards — Level 2 — Path: Andrew Adams > Nancy Edwards + - Jane Peacock — Level 3 — Path: Andrew Adams > Nancy Edwards > Jane Peacock + - Margaret Park — Level 3 — Path: Andrew Adams > Nancy Edwards > Margaret Park + - Steve Johnson — Level 3 — Path: Andrew Adams > Nancy Edwards > Steve Johnson + + If you want this ordered differently (e.g., by level, name, or reporting manager), let me know and I can adjust the query. + ''', + 'control_plane_step': None, + 'function_call': None, + 'name': None, + 'reasoning_content': None, + 'role': 'assistant', + 'tool_call_id': None, + 'tool_calls': None, + }), + ]), + 'pid': None, + 'rollout_status': dict({ + 'code': , + 'details': list([ + ]), + 'message': 'Rollout is running', + }), + 'tools': list([ + dict({ + 'function': dict({ + 'description': '', + 'name': 'execute_sql', + 'parameters': dict({ + 'additionalProperties': False, + 'properties': dict({ + 'query': dict({ + 'type': 'string', + }), + }), + 'required': list([ + 'query', + ]), + 'type': 'object', + }), + 'strict': True, + }), + 'type': 'function', + }), + ]), + }), + ]) +# --- diff --git a/tests/adapters/test_openai_responses_adapter.py b/tests/adapters/test_openai_responses_adapter.py new file mode 100644 index 00000000..bf9edd98 --- /dev/null +++ b/tests/adapters/test_openai_responses_adapter.py @@ -0,0 +1,48 @@ +"""Tests for OpenAIResponsesAdapter.""" + +from syrupy.assertion import SnapshotAssertion + +from eval_protocol.adapters.openai_responses import OpenAIResponsesAdapter + + +def test_openai_responses_adapter_with_real_response_simple(snapshot: SnapshotAssertion): + """Test OpenAIResponsesAdapter with a real response ID that is a simple 4 + message conversation with: system, user, tool, tool response, assistant. + + https://platform.openai.com/logs/resp_05639dcaca074fbc0068c9946593b481908cac70075926d85c + """ + adapter = OpenAIResponsesAdapter() + + response_id = "resp_05639dcaca074fbc0068c9946593b481908cac70075926d85c" + + eval_rows = adapter.get_evaluation_rows(response_ids=[response_id]) + + # Basic assertions about the returned data structure + assert isinstance(eval_rows, list) + assert len(eval_rows) == 1 + + # Convert to dict for snapshot testing + eval_rows_dict = [row.model_dump(exclude={"created_at", "execution_metadata"}) for row in eval_rows] + + # Assert against snapshot + assert eval_rows_dict == snapshot + + +def test_openai_responses_adapter_with_real_response_parallel_tool_calls(snapshot: SnapshotAssertion): + """ + https://platform.openai.com/logs/resp_0e1b7db5d96e92470068c99506443c819e9305e92915d2405f + """ + adapter = OpenAIResponsesAdapter() + response_id = "resp_0e1b7db5d96e92470068c99506443c819e9305e92915d2405f" + + eval_rows = adapter.get_evaluation_rows(response_ids=[response_id]) + + # Basic assertions about the returned data structure + assert isinstance(eval_rows, list) + assert len(eval_rows) == 1 + + # Convert to dict for snapshot testing + eval_rows_dict = [row.model_dump(exclude={"created_at", "execution_metadata"}) for row in eval_rows] + + # Assert against snapshot + assert eval_rows_dict == snapshot diff --git a/tests/chinook/dataset/task_10/ground_truth.md b/tests/chinook/dataset/task_10/ground_truth.md new file mode 100644 index 00000000..332e271d --- /dev/null +++ b/tests/chinook/dataset/task_10/ground_truth.md @@ -0,0 +1,349 @@ +| album_title | artist_name | total_tracks | track_count_variance | genre_count | most_common_genre | +| ----------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------ | ---------------------- | ----------- | ------------------ | +| Lost, Season 3 | Lost | 26 | 81.0000000000000000 | 2 | Drama | +| Heroes, Season 1 | Heroes | 23 | 72.2500000000000000 | 2 | Drama | +| Greatest Hits | Lenny Kravitz | 57 | 60.6666666666666667 | 3 | Rock | +| LOST, Season 4 | Lost | 17 | 30.2500000000000000 | 2 | Drama | +| The Office, Season 3 | The Office | 25 | 20.2500000000000000 | 2 | Comedy | +| Battlestar Galactica, Season 3 | Battlestar Galactica | 19 | 17.5555555555555556 | 3 | Science Fiction | +| Lost, Season 2 | Lost | 24 | 16.0000000000000000 | 2 | TV Shows | +| Rock In Rio [CD2] | Iron Maiden | 9 | 12.2500000000000000 | 2 | Rock | +| The Number of The Beast | Iron Maiden | 8 | 9.0000000000000000 | 2 | Metal | +| Live After Death | Iron Maiden | 18 | 4.0000000000000000 | 2 | Metal | +| Unplugged | Eric Clapton | 30 | 1.00000000000000000000 | 2 | Latin | +| Minha Historia | Chico Buarque | 34 | 0 | 1 | Latin | +| Lost, Season 1 | Lost | 25 | 0 | 1 | TV Shows | +| My Way: The Best Of Frank Sinatra [Disc 1] | Frank Sinatra | 24 | 0 | 1 | Easy Listening | +| Battlestar Galactica (Classic), Season 1 | Battlestar Galactica (Classic) | 24 | 0 | 1 | Sci Fi & Fantasy | +| Instant Karma: The Amnesty International Campaign to Save Darfur | U2 | 23 | 0 | 1 | Pop | +| Afrociberdelia | Chico Science & Nação Zumbi | 23 | 0 | 1 | Latin | +| Acústico | Titãs | 22 | 0 | 1 | Alternative & Punk | +| Up An' Atom | Gene Krupa | 22 | 0 | 1 | Jazz | +| The Office, Season 2 | The Office | 22 | 0 | 1 | TV Shows | +| International Superhits | Green Day | 21 | 0 | 1 | Alternative & Punk | +| Acústico MTV | Os Paralamas Do Sucesso | 21 | 0 | 1 | Latin | +| Greatest Kiss | Kiss | 20 | 0 | 1 | Rock | +| Chronicle, Vol. 2 | Creedence Clearwater Revival | 20 | 0 | 1 | Rock | +| Chronicle, Vol. 1 | Creedence Clearwater Revival | 20 | 0 | 1 | Rock | +| Sex Machine | James Brown | 20 | 0 | 1 | R&B/Soul | +| My Generation - The Very Best Of The Who | The Who | 20 | 0 | 1 | Rock | +| Ao Vivo [IMPORT] | Zeca Pagodinho | 19 | 0 | 1 | Latin | +| House of Pain | House Of Pain | 19 | 0 | 1 | Hip Hop/Rap | +| The Cream Of Clapton | Eric Clapton | 18 | 0 | 1 | Blues | +| Barulhinho Bom | Marisa Monte | 18 | 0 | 1 | Latin | +| Pure Cult: The Best Of The Cult (For Rockers, Ravers, Lovers & Sinners) [UK] | The Cult | 18 | 0 | 1 | Rock | +| Prenda Minha | Caetano Veloso | 18 | 0 | 1 | Latin | +| The Singles | The Clash | 18 | 0 | 1 | Alternative & Punk | +| Seek And Shall Find: More Of The Best (1963-1981) | Marvin Gaye | 18 | 0 | 1 | R&B/Soul | +| Rotten Apples: Greatest Hits | Smashing Pumpkins | 18 | 0 | 1 | Alternative & Punk | +| Blood Sugar Sex Magik | Red Hot Chili Peppers | 17 | 0 | 1 | Alternative & Punk | +| Greatest Hits I | Queen | 17 | 0 | 1 | Rock | +| A-Sides | Soundgarden | 17 | 0 | 1 | Rock | +| Chill: Brazil (Disc 1) | Marcos Valle | 17 | 0 | 1 | Latin | +| The Best Of Van Halen, Vol. I | Van Halen | 17 | 0 | 1 | Rock | +| Radio Brasil (O Som da Jovem Vanguarda) - Seleccao de Henrique Amaro | O Rappa | 17 | 0 | 1 | Electronica/Dance | +| From The Muddy Banks Of The Wishkah [Live] | Nirvana | 17 | 0 | 1 | Rock | +| Chill: Brazil (Disc 2) | Antônio Carlos Jobim | 17 | 0 | 1 | Latin | +| Acústico MTV [Live] | Cidade Negra | 17 | 0 | 1 | Reggae | +| Body Count | Body Count | 17 | 0 | 1 | Alternative & Punk | +| Greatest Hits II | Queen | 17 | 0 | 1 | Rock | +| Motley Crue Greatest Hits | Mötley Crüe | 17 | 0 | 1 | Metal | +| Rattle And Hum | U2 | 17 | 0 | 1 | Rock | +| Are You Experienced? | Jimi Hendrix | 17 | 0 | 1 | Rock | +| Volume Dois | Titãs | 16 | 0 | 1 | Alternative & Punk | +| Live On Two Legs [Live] | Pearl Jam | 16 | 0 | 1 | Rock | +| Living After Midnight | Judas Priest | 16 | 0 | 1 | Metal | +| Arquivo Os Paralamas Do Sucesso | Os Paralamas Do Sucesso | 16 | 0 | 1 | Latin | +| Os Cães Ladram Mas A Caravana Não Pára | Planet Hemp | 16 | 0 | 1 | Hip Hop/Rap | +| By The Way | Red Hot Chili Peppers | 16 | 0 | 1 | Rock | +| Use Your Illusion I | Guns N' Roses | 16 | 0 | 1 | Rock | +| Mais Do Mesmo | Legião Urbana | 16 | 0 | 1 | Latin | +| Judas 0: B-Sides and Rarities | Smashing Pumpkins | 16 | 0 | 1 | Alternative & Punk | +| Vault: Def Leppard's Greatest Hits | Def Leppard | 16 | 0 | 1 | Rock | +| Roda De Funk | Funk Como Le Gusta | 16 | 0 | 1 | Latin | +| The Best Of R.E.M.: The IRS Years | R.E.M. | 16 | 0 | 1 | Alternative & Punk | +| Garage Inc. (Disc 2) | Metallica | 16 | 0 | 1 | Metal | +| A TempestadeTempestade Ou O Livro Dos Dias | Legião Urbana | 15 | 0 | 1 | Latin | +| Voodoo Lounge | The Rolling Stones | 15 | 0 | 1 | Rock | +| Serie Sem Limite (Disc 1) | Tim Maia | 15 | 0 | 1 | Latin | +| Unplugged [Live] | Kiss | 15 | 0 | 1 | Rock | +| B-Sides 1980-1990 | U2 | 15 | 0 | 1 | Rock | +| Vinicius De Moraes | Vinícius De Moraes | 15 | 0 | 1 | Latin | +| King For A Day Fool For A Lifetime | Faith No More | 15 | 0 | 1 | Rock | +| Cássia Eller - Sem Limite [Disc 1] | Cássia Eller | 15 | 0 | 1 | Latin | +| Riot Act | Pearl Jam | 15 | 0 | 1 | Rock | +| Quanta Gente Veio Ver (Live) | Gilberto Gil | 15 | 0 | 1 | Latin | +| Serie Sem Limite (Disc 2) | Tim Maia | 15 | 0 | 1 | Latin | +| Big Ones | Aerosmith | 15 | 0 | 1 | Rock | +| Compositores | O Terço | 15 | 0 | 1 | Rock | +| Vinícius De Moraes - Sem Limite | Toquinho & Vinícius | 15 | 0 | 1 | Bossa Nova | +| Ace Of Spades | Motörhead | 15 | 0 | 1 | Metal | +| Tangents | The Tea Party | 15 | 0 | 1 | Alternative & Punk | +| Californication | Red Hot Chili Peppers | 15 | 0 | 1 | Rock | +| Cássia Eller - Coleção Sem Limite [Disc 2] | Cássia Eller | 15 | 0 | 1 | Latin | +| Cafezinho | João Suplicy | 14 | 0 | 1 | World | +| Elis Regina-Minha História | Elis Regina | 14 | 0 | 1 | Latin | +| Olodum | Olodum | 14 | 0 | 1 | Latin | +| Warner 25 Anos | Antônio Carlos Jobim | 14 | 0 | 1 | Jazz | +| Sambas De Enredo 2001 | Various Artists | 14 | 0 | 1 | Latin | +| BBC Sessions [Disc 1] [Live] | Led Zeppelin | 14 | 0 | 1 | Rock | +| Carnaval 2001 | Various Artists | 14 | 0 | 1 | Soundtrack | +| Deixa Entrar | Falamansa | 14 | 0 | 1 | Latin | +| Revelations | Audioslave | 14 | 0 | 1 | Alternative | +| Minha História | Os Mutantes | 14 | 0 | 1 | Alternative & Punk | +| Audioslave | Audioslave | 14 | 0 | 1 | Rock | +| Tribute | Ozzy Osbourne | 14 | 0 | 1 | Metal | +| Lulu Santos - RCA 100 Anos De Música - Álbum 02 | Lulu Santos | 14 | 0 | 1 | Latin | +| Retrospective I (1974-1980) | Rush | 14 | 0 | 1 | Rock | +| Vozes do MPB | Various Artists | 14 | 0 | 1 | Latin | +| Axé Bahia 2001 | Various Artists | 14 | 0 | 1 | Pop | +| Meus Momentos | Gonzaguinha | 14 | 0 | 1 | Latin | +| Cidade Negra - Hits | Cidade Negra | 14 | 0 | 1 | Reggae | +| Lulu Santos - RCA 100 Anos De Música - Álbum 01 | Lulu Santos | 14 | 0 | 1 | Latin | +| Use Your Illusion II | Guns N' Roses | 14 | 0 | 1 | Metal | +| The Best Of 1980-1990 | U2 | 14 | 0 | 1 | Rock | +| Raul Seixas | Raul Seixas | 14 | 0 | 1 | Rock | +| Angel Dust | Faith No More | 14 | 0 | 1 | Alternative & Punk | +| As Canções de Eu Tu Eles | Gilberto Gil | 14 | 0 | 1 | Soundtrack | +| Original Soundtracks 1 | Passengers | 14 | 0 | 1 | Soundtrack | +| The Best of Ed Motta | Ed Motta | 14 | 0 | 1 | Latin | +| New Adventures In Hi-Fi | R.E.M. | 14 | 0 | 1 | Rock | +| The Police Greatest Hits | The Police | 14 | 0 | 1 | Rock | +| Load | Metallica | 14 | 0 | 1 | Metal | +| UB40 The Best Of - Volume Two [UK] | UB40 | 14 | 0 | 1 | Reggae | +| Carry On | Chris Cornell | 14 | 0 | 1 | Alternative | +| Miles Ahead | Miles Davis | 14 | 0 | 1 | Jazz | +| Jorge Ben Jor 25 Anos | Jorge Ben | 14 | 0 | 1 | Latin | +| No Security | The Rolling Stones | 14 | 0 | 1 | Rock | +| Milton Nascimento Ao Vivo | Milton Nascimento | 13 | 0 | 1 | Latin | +| Supernatural | Santana | 13 | 0 | 1 | Rock | +| ReLoad | Metallica | 13 | 0 | 1 | Metal | +| The Essential Miles Davis [Disc 1] | Miles Davis | 13 | 0 | 1 | Jazz | +| Get Born | JET | 13 | 0 | 1 | Alternative & Punk | +| The Colour And The Shape | Foo Fighters | 13 | 0 | 1 | Rock | +| Da Lama Ao Caos | Chico Science & Nação Zumbi | 13 | 0 | 1 | Latin | +| Djavan Ao Vivo - Vol. 1 | Djavan | 13 | 0 | 1 | Latin | +| Jagged Little Pill | Alanis Morissette | 13 | 0 | 1 | Rock | +| Pearl Jam | Pearl Jam | 13 | 0 | 1 | Alternative & Punk | +| Alcohol Fueled Brewtality Live! [Disc 1] | Black Label Society | 13 | 0 | 1 | Metal | +| Minas | Milton Nascimento | 13 | 0 | 1 | Latin | +| Blue Moods | Incognito | 13 | 0 | 1 | Jazz | +| American Idiot | Green Day | 13 | 0 | 1 | Alternative & Punk | +| Djavan Ao Vivo - Vol. 02 | Djavan | 13 | 0 | 1 | Latin | +| Contraband | Velvet Revolver | 13 | 0 | 1 | Rock | +| Beyond Good And Evil | The Cult | 12 | 0 | 1 | Rock | +| Walking Into Clarksdale | Page & Plant | 12 | 0 | 1 | Rock | +| BackBeat Soundtrack | BackBeat | 12 | 0 | 1 | Rock And Roll | +| Into The Light | David Coverdale | 12 | 0 | 1 | Rock | +| Vs. | Pearl Jam | 12 | 0 | 1 | Rock | +| Purpendicular | Deep Purple | 12 | 0 | 1 | Rock | +| 20th Century Masters - The Millennium Collection: The Best of Scorpions | Scorpions | 12 | 0 | 1 | Rock | +| Appetite for Destruction | Guns N' Roses | 12 | 0 | 1 | Rock | +| Faceless | Godsmack | 12 | 0 | 1 | Metal | +| Demorou... | Mônica Marianno | 12 | 0 | 1 | World | +| Speak of the Devil | Ozzy Osbourne | 12 | 0 | 1 | Rock | +| Heart of the Night | Spyro Gyra | 12 | 0 | 1 | Jazz | +| Back to Black | Amy Winehouse | 12 | 0 | 1 | R&B/Soul | +| Album Of The Year | Faith No More | 12 | 0 | 1 | Alternative & Punk | +| Black Album | Metallica | 12 | 0 | 1 | Metal | +| Hot Rocks, 1964-1971 (Disc 1) | The Rolling Stones | 12 | 0 | 1 | Rock | +| Achtung Baby | U2 | 12 | 0 | 1 | Rock | +| Core | Stone Temple Pilots | 12 | 0 | 1 | Rock | +| Van Halen III | Van Halen | 12 | 0 | 1 | Rock | +| A Real Dead One | Iron Maiden | 12 | 0 | 1 | Metal | +| Pop | U2 | 12 | 0 | 1 | Rock | +| Nevermind | Nirvana | 12 | 0 | 1 | Rock | +| Arquivo II | Os Paralamas Do Sucesso | 12 | 0 | 1 | Latin | +| Facelift | Alice In Chains | 12 | 0 | 1 | Rock | +| Maquinarama | Skank | 12 | 0 | 1 | Rock | +| Diver Down | Van Halen | 12 | 0 | 1 | Rock | +| Fear Of The Dark | Iron Maiden | 12 | 0 | 1 | Rock | +| Out Of Exile | Audioslave | 12 | 0 | 1 | Alternative & Punk | +| Jota Quest-1995 | Jota Quest | 12 | 0 | 1 | Latin | +| O Samba Poconé | Skank | 11 | 0 | 1 | Rock | +| A Real Live One | Iron Maiden | 11 | 0 | 1 | Metal | +| Ten | Pearl Jam | 11 | 0 | 1 | Rock | +| Mezmerize | System Of A Down | 11 | 0 | 1 | Metal | +| The Return Of The Space Cowboy | Jamiroquai | 11 | 0 | 1 | Electronica/Dance | +| Green | R.E.M. | 11 | 0 | 1 | Alternative & Punk | +| The Best Of Buddy Guy - The Millenium Collection | Buddy Guy | 11 | 0 | 1 | Blues | +| Garage Inc. (Disc 1) | Metallica | 11 | 0 | 1 | Metal | +| The Doors | The Doors | 11 | 0 | 1 | Rock | +| One By One | Foo Fighters | 11 | 0 | 1 | Alternative & Punk | +| Transmission | The Tea Party | 11 | 0 | 1 | Alternative & Punk | +| Frank | Amy Winehouse | 11 | 0 | 1 | Pop | +| News Of The World | Queen | 11 | 0 | 1 | Rock | +| The Real Thing | Faith No More | 11 | 0 | 1 | Alternative & Punk | +| Van Halen | Van Halen | 11 | 0 | 1 | Rock | +| The X Factor | Iron Maiden | 11 | 0 | 1 | Rock | +| How To Dismantle An Atomic Bomb | U2 | 11 | 0 | 1 | Rock | +| Dance Of Death | Iron Maiden | 11 | 0 | 1 | Heavy Metal | +| Synkronized | Jamiroquai | 11 | 0 | 1 | R&B/Soul | +| Chemical Wedding | Bruce Dickinson | 11 | 0 | 1 | Metal | +| Knocking at Your Back Door: The Best Of Deep Purple in the 80's | Deep Purple | 11 | 0 | 1 | Rock | +| A Matter of Life and Death | Iron Maiden | 11 | 0 | 1 | Rock | +| Out Of Time | R.E.M. Feat. Kate Pearson | 11 | 0 | 1 | Alternative & Punk | +| All That You Can't Leave Behind | U2 | 11 | 0 | 1 | Rock | +| St. Anger | Metallica | 11 | 0 | 1 | Metal | +| BBC Sessions [Disc 2] [Live] | Led Zeppelin | 10 | 0 | 1 | Rock | +| War | U2 | 10 | 0 | 1 | Rock | +| The Battle Rages On | Deep Purple | 10 | 0 | 1 | Rock | +| Temple of the Dog | Temple of the Dog | 10 | 0 | 1 | Alternative | +| For Those About To Rock We Salute You | AC/DC | 10 | 0 | 1 | Rock | +| Live At Donington 1992 (Disc 1) | Iron Maiden | 10 | 0 | 1 | Rock | +| Led Zeppelin III | Led Zeppelin | 10 | 0 | 1 | Rock | +| Emergency On Planet Earth | Jamiroquai | 10 | 0 | 1 | Rock | +| The Essential Miles Davis [Disc 2] | Miles Davis | 10 | 0 | 1 | Jazz | +| In Step | Stevie Ray Vaughan & Double Trouble | 10 | 0 | 1 | Blues | +| Killers | Iron Maiden | 10 | 0 | 1 | Heavy Metal | +| The Best Of Men At Work | Men At Work | 10 | 0 | 1 | Rock | +| Zooropa | U2 | 10 | 0 | 1 | Rock | +| In Your Honor [Disc 1] | Foo Fighters | 10 | 0 | 1 | Rock | +| In Your Honor [Disc 2] | Foo Fighters | 10 | 0 | 1 | Rock | +| Rock In Rio [CD1] | Iron Maiden | 10 | 0 | 1 | Metal | +| Live [Disc 1] | The Black Crowes | 10 | 0 | 1 | Blues | +| Live At Donington 1992 (Disc 2) | Iron Maiden | 10 | 0 | 1 | Rock | +| No Prayer For The Dying | Iron Maiden | 10 | 0 | 1 | Metal | +| Black Sabbath Vol. 4 (Remaster) | Black Sabbath | 10 | 0 | 1 | Metal | +| Surfing with the Alien (Remastered) | Joe Satriani | 10 | 0 | 1 | Rock | +| Brave New World | Iron Maiden | 10 | 0 | 1 | Rock | +| Na Pista | Cláudio Zoli | 10 | 0 | 1 | Latin | +| The Beast Live | Paul D'Ianno | 10 | 0 | 1 | Rock | +| Cesta Básica | Raimundos | 10 | 0 | 1 | Alternative & Punk | +| Misplaced Childhood | Marillion | 10 | 0 | 1 | Rock | +| Kill 'Em All | Metallica | 10 | 0 | 1 | Metal | +| Physical Graffiti [Disc 2] | Led Zeppelin | 9 | 0 | 1 | Rock | +| Morning Dance | Spyro Gyra | 9 | 0 | 1 | Jazz | +| Piece Of Mind | Iron Maiden | 9 | 0 | 1 | Metal | +| Dark Side Of The Moon | Pink Floyd | 9 | 0 | 1 | Rock | +| Outbreak | Dennis Chambers | 9 | 0 | 1 | Jazz | +| ...And Justice For All | Metallica | 9 | 0 | 1 | Metal | +| Led Zeppelin II | Led Zeppelin | 9 | 0 | 1 | Rock | +| Come Taste The Band | Deep Purple | 9 | 0 | 1 | Rock | +| Stormbringer | Deep Purple | 9 | 0 | 1 | Rock | +| Slaves And Masters | Deep Purple | 9 | 0 | 1 | Rock | +| Iron Maiden | Iron Maiden | 9 | 0 | 1 | Blues | +| Bongo Fury | Frank Zappa & Captain Beefheart | 9 | 0 | 1 | Rock | +| Live [Disc 2] | The Black Crowes | 9 | 0 | 1 | Blues | +| Led Zeppelin I | Led Zeppelin | 9 | 0 | 1 | Rock | +| Virtual XI | Iron Maiden | 8 | 0 | 1 | Rock | +| Plays Metallica By Four Cellos | Apocalyptica | 8 | 0 | 1 | Metal | +| Master Of Puppets | Metallica | 8 | 0 | 1 | Metal | +| IV | Led Zeppelin | 8 | 0 | 1 | Rock | +| Coda | Led Zeppelin | 8 | 0 | 1 | Rock | +| Santana - As Years Go By | Santana | 8 | 0 | 1 | Rock | +| Somewhere in Time | Iron Maiden | 8 | 0 | 1 | Metal | +| Seventh Son of a Seventh Son | Iron Maiden | 8 | 0 | 1 | Metal | +| Powerslave | Iron Maiden | 8 | 0 | 1 | Metal | +| Let There Be Rock | AC/DC | 8 | 0 | 1 | Rock | +| Houses Of The Holy | Led Zeppelin | 8 | 0 | 1 | Rock | +| The Best Of Billy Cobham | Billy Cobham | 8 | 0 | 1 | Jazz | +| Ride The Lightning | Metallica | 8 | 0 | 1 | Metal | +| Fireball | Deep Purple | 7 | 0 | 1 | Rock | +| Deep Purple In Rock | Deep Purple | 7 | 0 | 1 | Rock | +| In Through The Out Door | Led Zeppelin | 7 | 0 | 1 | Rock | +| MK III The Final Concerts [Disc 1] | Deep Purple | 7 | 0 | 1 | Rock | +| [1997] Black Light Syndrome | Terry Bozzio, Tony Levin & Steve Stevens | 7 | 0 | 1 | Rock | +| Black Sabbath | Black Sabbath | 7 | 0 | 1 | Metal | +| Machine Head | Deep Purple | 7 | 0 | 1 | Rock | +| Presence | Led Zeppelin | 7 | 0 | 1 | Rock | +| Physical Graffiti [Disc 1] | Led Zeppelin | 6 | 0 | 1 | Rock | +| Santana Live | Santana | 6 | 0 | 1 | Rock | +| The Office, Season 1 | The Office | 6 | 0 | 1 | TV Shows | +| Alcohol Fueled Brewtality Live! [Disc 2] | Black Label Society | 5 | 0 | 1 | Metal | +| The Song Remains The Same (Disc 1) | Led Zeppelin | 5 | 0 | 1 | Rock | +| The Final Concerts (Disc 2) | Deep Purple | 4 | 0 | 1 | Rock | +| The Song Remains The Same (Disc 2) | Led Zeppelin | 4 | 0 | 1 | Rock | +| Restless and Wild | Accept | 3 | 0 | 1 | Rock | +| Quanta Gente Veio ver--Bônus De Carnaval | Gilberto Gil | 3 | 0 | 1 | Jazz | +| Sozinho Remix Ao Vivo | Caetano Veloso | 3 | 0 | 1 | Latin | +| No More Tears (Remastered) | Ozzy Osbourne | 2 | 0 | 1 | Rock | +| English Renaissance | The King's Singers | 2 | 0 | 1 | Classical | +| Muso Ko | Habib Koité and Bamada | 2 | 0 | 1 | World | +| Every Kind of Light | The Posies | 2 | 0 | 1 | Rock | +| Quiet Songs | Aisha Duo | 2 | 0 | 1 | Jazz | +| The World of Classical Favourites | Academy of St. Martin in the Fields & Sir Neville Marriner | 2 | 0 | 1 | Classical | +| Blizzard of Ozz | Ozzy Osbourne | 2 | 0 | 1 | Rock | +| Realize | Karsh Kale | 2 | 0 | 1 | Electronica/Dance | +| Great Performances - Barber's Adagio and Other Romantic Favorites for Strings | Leonard Bernstein & New York Philharmonic | 1 | 0 | 1 | Classical | +| Locatelli: Concertos for Violin, Strings and Continuo, Vol. 3 | Mela Tenenbaum, Pro Musica Prague & Richard Kapp | 1 | 0 | 1 | Classical | +| Beethoven Piano Sonatas: Moonlight & Pastorale | Maurizio Pollini | 1 | 0 | 1 | Classical | +| Mascagni: Cavalleria Rusticana | James Levine | 1 | 0 | 1 | Classical | +| Monteverdi: L'Orfeo | C. Monteverdi, Nigel Rogers - Chiaroscuro; London Baroque; London Cornett & Sackbu | 1 | 0 | 1 | Classical | +| Great Opera Choruses | Chicago Symphony Chorus, Chicago Symphony Orchestra & Sir Georg Solti | 1 | 0 | 1 | Classical | +| Liszt - 12 Études D'Execution Transcendante | Michele Campanella | 1 | 0 | 1 | Classical | +| Tchaikovsky: 1812 Festival Overture, Op.49, Capriccio Italien & Beethoven: Wellington's Victory | Antal Doráti & London Symphony Orchestra | 1 | 0 | 1 | Classical | +| Bach: Goldberg Variations | Wilhelm Kempff | 1 | 0 | 1 | Classical | +| Pachelbel: Canon & Gigue | English Concert & Trevor Pinnock | 1 | 0 | 1 | Classical | +| Diary of a Madman (Remastered) | Ozzy Osbourne | 1 | 0 | 1 | Rock | +| Handel: Music for the Royal Fireworks (Original Version 1749) | English Concert & Trevor Pinnock | 1 | 0 | 1 | Classical | +| Mozart Gala: Famous Arias | Sir Georg Solti, Sumi Jo & Wiener Philharmoniker | 1 | 0 | 1 | Opera | +| Battlestar Galactica: The Story So Far | Battlestar Galactica | 1 | 0 | 1 | Science Fiction | +| Mozart: Chamber Music | Nash Ensemble | 1 | 0 | 1 | Classical | +| A Copland Celebration, Vol. I | Aaron Copland & London Symphony Orchestra | 1 | 0 | 1 | Classical | +| Elgar: Cello Concerto & Vaughan Williams: Fantasias | Felix Schmidt, London Symphony Orchestra & Rafael Frühbeck de Burgos | 1 | 0 | 1 | Classical | +| Worlds | Aaron Goldberg | 1 | 0 | 1 | Jazz | +| Pavarotti's Opera Made Easy | Luciano Pavarotti | 1 | 0 | 1 | Classical | +| Allegri: Miserere | Richard Marlow & The Choir of Trinity College, Cambridge | 1 | 0 | 1 | Classical | +| Grieg: Peer Gynt Suites & Sibelius: Pelléas et Mélisande | Berliner Philharmoniker & Herbert Von Karajan | 1 | 0 | 1 | Classical | +| Palestrina: Missa Papae Marcelli & Allegri: Miserere | Choir Of Westminster Abbey & Simon Preston | 1 | 0 | 1 | Classical | +| Mendelssohn: A Midsummer Night's Dream | Philharmonia Orchestra & Sir Neville Marriner | 1 | 0 | 1 | Classical | +| Bach: Toccata & Fugue in D Minor | Ton Koopman | 1 | 0 | 1 | Classical | +| Scheherazade | Chicago Symphony Orchestra & Fritz Reiner | 1 | 0 | 1 | Classical | +| Chopin: Piano Concertos Nos. 1 & 2 | Emanuel Ax, Eugene Ormandy & Philadelphia Orchestra | 1 | 0 | 1 | Classical | +| Puccini: Madama Butterfly - Highlights | Herbert Von Karajan, Mirella Freni & Wiener Philharmoniker | 1 | 0 | 1 | Classical | +| Carmina Burana | Boston Symphony Orchestra & Seiji Ozawa | 1 | 0 | 1 | Classical | +| Bizet: Carmen Highlights | Chor der Wiener Staatsoper, Herbert Von Karajan & Wiener Philharmoniker | 1 | 0 | 1 | Classical | +| Aquaman | Aquaman | 1 | 0 | 1 | TV Shows | +| Bach: Violin Concertos | Hilary Hahn, Jeffrey Kahane, Los Angeles Chamber Orchestra & Margaret Batjer | 1 | 0 | 1 | Classical | +| Cake: B-Sides and Rarities | Cake | 1 | 0 | 1 | Alternative | +| Sibelius: Finlandia | Berliner Philharmoniker & Hans Rosbaud | 1 | 0 | 1 | Classical | +| Armada: Music from the Courts of England and Spain | Fretwork | 1 | 0 | 1 | Classical | +| Balls to the Wall | Accept | 1 | 0 | 1 | Rock | +| Haydn: Symphonies 99 - 104 | Royal Philharmonic Orchestra & Sir Thomas Beecham | 1 | 0 | 1 | Classical | +| Respighi:Pines of Rome | Eugene Ormandy | 1 | 0 | 1 | Classical | +| Un-Led-Ed | Dread Zeppelin | 1 | 0 | 1 | Rock | +| Purcell: The Fairy Queen | Roger Norrington, London Classical Players | 1 | 0 | 1 | Classical | +| Koyaanisqatsi (Soundtrack from the Motion Picture) | Philip Glass Ensemble | 1 | 0 | 1 | Soundtrack | +| Szymanowski: Piano Works, Vol. 1 | Martin Roscoe | 1 | 0 | 1 | Classical | +| Carried to Dust (Bonus Track Version) | Calexico | 1 | 0 | 1 | Alternative | +| Mozart: Symphonies Nos. 40 & 41 | Berliner Philharmoniker & Herbert Von Karajan | 1 | 0 | 1 | Classical | +| Bartok: Violin & Viola Concertos | Yehudi Menuhin | 1 | 0 | 1 | Classical | +| Bach: The Cello Suites | Yo-Yo Ma | 1 | 0 | 1 | Classical | +| Duos II | Luciana Souza/Romero Lubambo | 1 | 0 | 1 | Latin | +| Fauré: Requiem, Ravel: Pavane & Others | Academy of St. Martin in the Fields, John Birch, Sir Neville Marriner & Sylvia McNair | 1 | 0 | 1 | Classical | +| Adorate Deum: Gregorian Chant from the Proper of the Mass | Alberto Turco & Nova Schola Gregoriana | 1 | 0 | 1 | Classical | +| Bark at the Moon (Remastered) | Ozzy Osbourne | 1 | 0 | 1 | Rock | +| Schubert: The Late String Quartets & String Quintet (3 CD's) | Emerson String Quartet | 1 | 0 | 1 | Classical | +| The Last Night of the Proms | Barry Wordsworth & BBC Concert Orchestra | 1 | 0 | 1 | Classical | +| The Best of Beethoven | Nicolaus Esterhazy Sinfonia | 1 | 0 | 1 | Classical | +| SCRIABIN: Vers la flamme | Christopher O'Riley | 1 | 0 | 1 | Classical | +| Tchaikovsky: The Nutcracker | London Symphony Orchestra & Sir Charles Mackerras | 1 | 0 | 1 | Classical | +| Beethoven: Symphony No. 6 'Pastoral' Etc. | Otto Klemperer & Philharmonia Orchestra | 1 | 0 | 1 | Classical | +| Prokofiev: Romeo & Juliet | Michael Tilson Thomas & San Francisco Symphony | 1 | 0 | 1 | Classical | +| Vivaldi: The Four Seasons | Anne-Sophie Mutter, Herbert Von Karajan & Wiener Philharmoniker | 1 | 0 | 1 | Classical | +| Prokofiev: Symphony No.1 | Sergei Prokofiev & Yuri Temirkanov | 1 | 0 | 1 | Classical | +| Handel: The Messiah (Highlights) | Scholars Baroque Ensemble | 1 | 0 | 1 | Classical | +| Beethoven: Symhonies Nos. 5 & 6 | Orchestre Révolutionnaire et Romantique & John Eliot Gardiner | 1 | 0 | 1 | Classical | +| Purcell: Music for the Queen Mary | Equale Brass Ensemble, John Eliot Gardiner & Munich Monteverdi Orchestra and Choir | 1 | 0 | 1 | Classical | +| Sir Neville Marriner: A Celebration | Academy of St. Martin in the Fields Chamber Ensemble & Sir Neville Marriner | 1 | 0 | 1 | Classical | +| Berlioz: Symphonie Fantastique | Michael Tilson Thomas & San Francisco Symphony | 1 | 0 | 1 | Classical | +| Bach: Orchestral Suites Nos. 1 - 4 | Academy of St. Martin in the Fields, Sir Neville Marriner & Thurston Dart | 1 | 0 | 1 | Classical | +| Charpentier: Divertissements, Airs & Concerts | Les Arts Florissants & William Christie | 1 | 0 | 1 | Classical | +| Górecki: Symphony No. 3 | Adrian Leaper & Doreen de Feis | 1 | 0 | 1 | Classical | +| Great Recordings of the Century - Shubert: Schwanengesang, 4 Lieder | Gerald Moore | 1 | 0 | 1 | Classical | +| J.S. Bach: Chaconne, Suite in E Minor, Partita in E Major & Prelude, Fugue and Allegro | Julian Bream | 1 | 0 | 1 | Classical | +| Bach: The Brandenburg Concertos | Orchestra of The Age of Enlightenment | 1 | 0 | 1 | Classical | +| Great Recordings of the Century: Paganini's 24 Caprices | Itzhak Perlman | 1 | 0 | 1 | Classical | +| Wagner: Favourite Overtures | Sir Georg Solti & Wiener Philharmoniker | 1 | 0 | 1 | Classical | +| South American Getaway | The 12 Cellists of The Berlin Philharmonic | 1 | 0 | 1 | Classical | +| A Soprano Inspired | Britten Sinfonia, Ivor Bolton & Lesley Garrett | 1 | 0 | 1 | Classical | +| Great Recordings of the Century - Mahler: Das Lied von der Erde | Gustav Mahler | 1 | 0 | 1 | Classical | +| Mozart: Wind Concertos | Berliner Philharmoniker, Claudio Abbado & Sabine Meyer | 1 | 0 | 1 | Classical | +| Nielsen: The Six Symphonies | Göteborgs Symfoniker & Neeme Järvi | 1 | 0 | 1 | Classical | +| Weill: The Seven Deadly Sins | Kent Nagano and Orchestre de l'Opéra de Lyon | 1 | 0 | 1 | Classical | +| Prokofiev: Symphony No.5 & Stravinksy: Le Sacre Du Printemps | Berliner Philharmoniker & Herbert Von Karajan | 1 | 0 | 1 | Classical | +| Holst: The Planets, Op. 32 & Vaughan Williams: Fantasies | Eugene Ormandy | 1 | 0 | 1 | Classical | +| Strauss: Waltzes | Eugene Ormandy | 1 | 0 | 1 | Classical | +| The Ultimate Relexation Album | Charles Dutoit & L'Orchestre Symphonique de Montréal | 1 | 0 | 1 | Classical | +| Adams, John: The Chairman Dances | Edo de Waart & San Francisco Symphony | 1 | 0 | 1 | Classical | diff --git a/tests/chinook/dataset/task_10/task.txt b/tests/chinook/dataset/task_10/task.txt new file mode 100644 index 00000000..65721606 --- /dev/null +++ b/tests/chinook/dataset/task_10/task.txt @@ -0,0 +1,3 @@ +Find albums with the highest track count variance and their genre distribution. +Show album title, artist, total tracks, track count variance, genre count, and +most common genre. diff --git a/tests/chinook/dataset/task_11/ground_truth.md b/tests/chinook/dataset/task_11/ground_truth.md new file mode 100644 index 00000000..994f8602 --- /dev/null +++ b/tests/chinook/dataset/task_11/ground_truth.md @@ -0,0 +1,575 @@ +| country | genre | month | average_purchase_value | customer_count | annual_avg_purchase_value | seasonal_trend_indicator | +| -------------- | ------------------ | ----- | ---------------------- | -------------- | ------------------------- | ------------------------ | +| Argentina | Alternative & Punk | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Alternative & Punk | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Easy Listening | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Jazz | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Latin | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Latin | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Metal | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Rock | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Argentina | Soundtrack | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Blues | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Heavy Metal | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Latin | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Metal | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Metal | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Reggae | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Rock | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Rock | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Australia | Rock | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Classical | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Drama | 1 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Austria | Jazz | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Latin | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Metal | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Pop | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | R&B/Soul | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | R&B/Soul | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Rock | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Rock | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Austria | TV Shows | 1 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Belgium | Alternative & Punk | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | Alternative & Punk | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | Metal | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | R&B/Soul | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | Rock | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Belgium | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Alternative & Punk | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Alternative & Punk | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Alternative & Punk | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Blues | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Blues | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Classical | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Classical | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Hip Hop/Rap | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Latin | 3 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Brazil | Latin | 4 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Latin | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Latin | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Latin | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Latin | 12 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Metal | 1 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Brazil | Metal | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Metal | 9 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Pop | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Pop | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | R&B/Soul | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | R&B/Soul | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Reggae | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Reggae | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Reggae | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 5 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 6 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 11 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Brazil | Rock | 12 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Brazil | Sci Fi & Fantasy | 3 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Brazil | Soundtrack | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | Soundtrack | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Brazil | World | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Alternative & Punk | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Alternative & Punk | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Alternative & Punk | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Alternative & Punk | 4 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Canada | Alternative & Punk | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Alternative & Punk | 9 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Blues | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Blues | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Bossa Nova | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Bossa Nova | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Classical | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Classical | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Drama | 3 | 1.99 | 2 | 1.99000000000000000000 | Normal | +| Canada | Electronica/Dance | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Electronica/Dance | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Hip Hop/Rap | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Hip Hop/Rap | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Jazz | 1 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Canada | Jazz | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Jazz | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Jazz | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Jazz | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Latin | 1 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Canada | Latin | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Latin | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Latin | 4 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Latin | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Latin | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Latin | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Latin | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Latin | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Latin | 12 | 0.99 | 4 | 0.99000000000000000000 | Normal | +| Canada | Metal | 1 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Metal | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Metal | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Metal | 6 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Canada | Metal | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Metal | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Metal | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Metal | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | R&B/Soul | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | R&B/Soul | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Reggae | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Reggae | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Rock | 1 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Rock | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Rock | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Rock | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Rock | 6 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Canada | Rock | 7 | 0.99 | 5 | 0.99000000000000000000 | Normal | +| Canada | Rock | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Rock | 9 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Canada | Rock | 10 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Canada | Rock | 11 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | Rock | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | Rock And Roll | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | TV Shows | 3 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Canada | World | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Canada | World | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Canada | World | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Alternative & Punk | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Blues | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Classical | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Drama | 1 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Chile | Easy Listening | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Latin | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Latin | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Metal | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Metal | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | Sci Fi & Fantasy | 1 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Chile | Science Fiction | 1 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Chile | Soundtrack | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Chile | TV Shows | 1 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Czech Republic | Alternative & Punk | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Alternative & Punk | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Alternative & Punk | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Alternative & Punk | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Blues | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Drama | 9 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Czech Republic | Drama | 11 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Czech Republic | Electronica/Dance | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Jazz | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Latin | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Latin | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Latin | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Metal | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Metal | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Pop | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | R&B/Soul | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Rock | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Czech Republic | Rock | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Czech Republic | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Rock | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Czech Republic | Science Fiction | 11 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Czech Republic | TV Shows | 9 | 1.99 | 1 | 1.9900000000000000 | Normal | +| Czech Republic | TV Shows | 11 | 1.99 | 1 | 1.9900000000000000 | Normal | +| Denmark | Alternative & Punk | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Latin | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Latin | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Metal | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Pop | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Rock | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Denmark | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Jazz | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Latin | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Latin | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Reggae | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Rock | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Finland | Sci Fi & Fantasy | 6 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| France | Alternative | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Alternative & Punk | 1 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Alternative & Punk | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Alternative & Punk | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Alternative & Punk | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Alternative & Punk | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Alternative & Punk | 11 | 0.99 | 4 | 0.99000000000000000000 | Normal | +| France | Alternative & Punk | 12 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Blues | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Bossa Nova | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Classical | 4 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| France | Classical | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Drama | 6 | 1.99 | 2 | 1.9900000000000000 | Normal | +| France | Drama | 10 | 1.99 | 1 | 1.9900000000000000 | Normal | +| France | Electronica/Dance | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Hip Hop/Rap | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Jazz | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Jazz | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Jazz | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Latin | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Latin | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Latin | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Latin | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Metal | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Metal | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Metal | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Metal | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Metal | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Metal | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Pop | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Reggae | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Rock | 1 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Rock | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Rock | 6 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Rock | 7 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| France | Rock | 8 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| France | Rock | 9 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Rock | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| France | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Rock | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Rock And Roll | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Sci Fi & Fantasy | 6 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| France | Soundtrack | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | Soundtrack | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| France | TV Shows | 10 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Germany | Alternative | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Alternative & Punk | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Alternative & Punk | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Alternative & Punk | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Alternative & Punk | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Alternative & Punk | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Blues | 6 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Blues | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Blues | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Drama | 4 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Germany | Easy Listening | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Heavy Metal | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Jazz | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Jazz | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Latin | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Latin | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Latin | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Latin | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Latin | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Latin | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Metal | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Metal | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Metal | 6 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Metal | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Metal | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Metal | 11 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Pop | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Rock | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Rock | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Rock | 6 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| Germany | Rock | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Rock | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Rock | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Germany | Science Fiction | 4 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Germany | Soundtrack | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Soundtrack | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | Soundtrack | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Germany | TV Shows | 4 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Hungary | Alternative & Punk | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Blues | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Comedy | 2 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Hungary | Drama | 2 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Hungary | Electronica/Dance | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Latin | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Metal | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Metal | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | R&B/Soul | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Rock | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Rock | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Hungary | Sci Fi & Fantasy | 2 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Hungary | TV Shows | 2 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| India | Alternative & Punk | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Alternative & Punk | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Alternative & Punk | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Blues | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Classical | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Jazz | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Jazz | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Jazz | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Latin | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Latin | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Metal | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Metal | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | R&B/Soul | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Rock | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Rock | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| India | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| India | Sci Fi & Fantasy | 2 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| India | TV Shows | 12 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Ireland | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Blues | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Drama | 4 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Ireland | Jazz | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Jazz | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Latin | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Latin | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | Rock | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Ireland | TV Shows | 4 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Italy | Alternative & Punk | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Alternative & Punk | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Alternative & Punk | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Blues | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Classical | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Latin | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Italy | Rock | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Alternative & Punk | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Alternative & Punk | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Blues | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Latin | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Pop | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Netherlands | Sci Fi & Fantasy | 6 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Norway | Alternative | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Alternative & Punk | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Classical | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Drama | 6 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Norway | Electronica/Dance | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Latin | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Rock | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Norway | World | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Alternative & Punk | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Blues | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Jazz | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Latin | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Latin | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Rock | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Poland | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Alternative & Punk | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Alternative & Punk | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Alternative & Punk | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Blues | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Electronica/Dance | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Heavy Metal | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Jazz | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Latin | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Latin | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| Portugal | Latin | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Latin | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Latin | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Metal | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Metal | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Metal | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Metal | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Metal | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Pop | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | R&B/Soul | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Rock | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Rock | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Rock | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Portugal | Sci Fi & Fantasy | 10 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Portugal | TV Shows | 10 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Portugal | World | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Alternative & Punk | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Alternative & Punk | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Hip Hop/Rap | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Jazz | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Latin | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Latin | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Latin | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Metal | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Reggae | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Rock | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Rock | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Spain | Rock | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Alternative & Punk | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Alternative & Punk | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Jazz | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Latin | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Latin | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Metal | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Metal | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Metal | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | Rock | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| Sweden | TV Shows | 1 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| Sweden | World | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Alternative & Punk | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Alternative & Punk | 9 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| United Kingdom | Alternative & Punk | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Hip Hop/Rap | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Jazz | 1 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| United Kingdom | Jazz | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Latin | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Latin | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Latin | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| United Kingdom | Latin | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Latin | 11 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| United Kingdom | Metal | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Metal | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| United Kingdom | Metal | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Metal | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Metal | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Pop | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | R&B/Soul | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Reggae | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 6 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | Rock | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| United Kingdom | World | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Alternative | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 3 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 6 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 11 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Alternative & Punk | 12 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Blues | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Blues | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Blues | 8 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Blues | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Blues | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Bossa Nova | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Bossa Nova | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Classical | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Classical | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Comedy | 3 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | Comedy | 5 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | Comedy | 9 | 1.99 | 2 | 1.99000000000000000000 | Normal | +| USA | Drama | 8 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | Easy Listening | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Heavy Metal | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Heavy Metal | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Hip Hop/Rap | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Hip Hop/Rap | 9 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Jazz | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Jazz | 2 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Jazz | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Jazz | 4 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Jazz | 6 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Jazz | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Jazz | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Jazz | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Latin | 1 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Latin | 2 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Latin | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Latin | 4 | 0.99 | 9 | 0.99000000000000000000 | Normal | +| USA | Latin | 5 | 0.99 | 5 | 0.99000000000000000000 | Normal | +| USA | Latin | 6 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Latin | 7 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Latin | 8 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Latin | 9 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Latin | 10 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Latin | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Latin | 12 | 0.99 | 5 | 0.99000000000000000000 | Normal | +| USA | Metal | 1 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Metal | 3 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Metal | 4 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Metal | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Metal | 6 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Metal | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Metal | 9 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Metal | 10 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Metal | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Metal | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Pop | 7 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Pop | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | R&B/Soul | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | R&B/Soul | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | R&B/Soul | 7 | 0.99 | 4 | 0.99000000000000000000 | Normal | +| USA | Reggae | 5 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Reggae | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Rock | 1 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Rock | 2 | 0.99 | 5 | 0.99000000000000000000 | Normal | +| USA | Rock | 3 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Rock | 4 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Rock | 5 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Rock | 6 | 0.99 | 6 | 0.99000000000000000000 | Normal | +| USA | Rock | 8 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Rock | 9 | 0.99 | 2 | 0.99000000000000000000 | Normal | +| USA | Rock | 10 | 0.99 | 3 | 0.99000000000000000000 | Normal | +| USA | Rock | 11 | 0.99 | 4 | 0.99000000000000000000 | Normal | +| USA | Rock | 12 | 0.99 | 7 | 0.99000000000000000000 | Normal | +| USA | Rock And Roll | 1 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Rock And Roll | 11 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Sci Fi & Fantasy | 8 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | Sci Fi & Fantasy | 9 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | Science Fiction | 7 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | Soundtrack | 4 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Soundtrack | 8 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | Soundtrack | 12 | 0.99 | 1 | 0.99000000000000000000 | Normal | +| USA | TV Shows | 3 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | TV Shows | 5 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | TV Shows | 7 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | TV Shows | 8 | 1.99 | 1 | 1.99000000000000000000 | Normal | +| USA | TV Shows | 9 | 1.99 | 3 | 1.99000000000000000000 | Normal | diff --git a/tests/chinook/dataset/task_11/task.txt b/tests/chinook/dataset/task_11/task.txt new file mode 100644 index 00000000..a40fb591 --- /dev/null +++ b/tests/chinook/dataset/task_11/task.txt @@ -0,0 +1,3 @@ +Analyze seasonal purchasing trends by customer country and genre preference. +Show country, genre, month, average purchase value, customer count, and seasonal +trend indicator. diff --git a/tests/chinook/dataset/task_12/ground_truth.md b/tests/chinook/dataset/task_12/ground_truth.md new file mode 100644 index 00000000..526ac03c --- /dev/null +++ b/tests/chinook/dataset/task_12/ground_truth.md @@ -0,0 +1,52 @@ +| customer_id | customer_name | genre_diversity_score | avg_track_price | total_spending | diversity_to_spending_ratio | +| ----------- | --------------------- | --------------------- | ---------------------- | -------------- | --------------------------- | +| 57 | Luis Rojas | 12 | 1.2268421052631579 | 46.62 | 0.25740025740025740026 | +| 45 | Ladislav Kovács | 11 | 1.2005263157894737 | 45.62 | 0.24112231477422183253 | +| 14 | Mark Philips | 10 | 0.99000000000000000000 | 37.62 | 0.26581605528973950027 | +| 34 | João Fernandes | 10 | 1.0426315789473684 | 39.62 | 0.25239777889954568400 | +| 3 | François Tremblay | 10 | 1.0426315789473684 | 39.62 | 0.25239777889954568400 | +| 17 | Jack Smith | 10 | 1.0426315789473684 | 39.62 | 0.25239777889954568400 | +| 37 | Fynn Zimmermann | 10 | 1.1478947368421053 | 43.62 | 0.22925263640531866116 | +| 24 | Frank Ralston | 10 | 1.1478947368421053 | 43.62 | 0.22925263640531866116 | +| 27 | Patrick Gray | 9 | 0.99000000000000000000 | 37.62 | 0.23923444976076555024 | +| 23 | John Gordon | 9 | 0.99000000000000000000 | 37.62 | 0.23923444976076555024 | +| 21 | Kathy Chase | 9 | 0.99000000000000000000 | 37.62 | 0.23923444976076555024 | +| 41 | Marc Dubois | 9 | 0.99000000000000000000 | 37.62 | 0.23923444976076555024 | +| 19 | Tim Goyer | 9 | 1.01631578947368421053 | 38.62 | 0.23303987571206628690 | +| 43 | Isabelle Mercier | 9 | 1.0689473684210526 | 40.62 | 0.22156573116691285081 | +| 7 | Astrid Gruber | 9 | 1.1215789473684211 | 42.62 | 0.21116846550915063351 | +| 6 | Helena Holý | 9 | 1.3057894736842105 | 49.62 | 0.18137847642079806530 | +| 54 | Steve Murray | 8 | 0.99000000000000000000 | 37.62 | 0.21265284423179160021 | +| 15 | Jennifer Peterson | 8 | 1.01631578947368421053 | 38.62 | 0.20714655618850336613 | +| 58 | Manoj Pareek | 8 | 1.01631578947368421053 | 38.62 | 0.20714655618850336613 | +| 1 | Luís Gonçalves | 8 | 1.0426315789473684 | 39.62 | 0.20191822311963654720 | +| 4 | Bjørn Hansen | 8 | 1.0426315789473684 | 39.62 | 0.20191822311963654720 | +| 22 | Heather Leacock | 8 | 1.0426315789473684 | 39.62 | 0.20191822311963654720 | +| 5 | František Wichterlová | 8 | 1.0689473684210526 | 40.62 | 0.19694731659281142294 | +| 25 | Victor Stevens | 8 | 1.1215789473684211 | 42.62 | 0.18770530267480056312 | +| 46 | Hugh O'Reilly | 8 | 1.2005263157894737 | 45.62 | 0.17536168347216133275 | +| 59 | Puja Srivastava | 7 | 1.01777777777777777778 | 36.64 | 0.19104803493449781659 | +| 56 | Diego Gutiérrez | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 50 | Enrique Muñoz | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 32 | Aaron Mitchell | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 10 | Eduardo Martins | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 13 | Fernanda Ramos | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 38 | Niklas Schröder | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 2 | Leonie Köhler | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 16 | Frank Harris | 7 | 0.99000000000000000000 | 37.62 | 0.18607123870281765019 | +| 39 | Camille Bernard | 7 | 1.01631578947368421053 | 38.62 | 0.18125323666494044537 | +| 51 | Joakim Johansson | 7 | 1.01631578947368421053 | 38.62 | 0.18125323666494044537 | +| 40 | Dominique Lefebvre | 7 | 1.01631578947368421053 | 38.62 | 0.18125323666494044537 | +| 20 | Dan Miller | 7 | 1.0426315789473684 | 39.62 | 0.17667844522968197880 | +| 42 | Wyatt Girard | 7 | 1.0426315789473684 | 39.62 | 0.17667844522968197880 | +| 48 | Johannes Van der Berg | 7 | 1.0689473684210526 | 40.62 | 0.17232890201870999508 | +| 44 | Terhi Hämäläinen | 7 | 1.0952631578947368 | 41.62 | 0.16818837097549255166 | +| 26 | Richard Cunningham | 7 | 1.2531578947368421 | 47.62 | 0.14699706005879882402 | +| 35 | Madalena Sampaio | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | +| 33 | Ellie Sullivan | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | +| 47 | Lucas Mancini | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | +| 36 | Hannah Schneider | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | +| 11 | Alexandre Rocha | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | +| 18 | Michelle Brooks | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | +| 31 | Martha Silk | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | +| 30 | Edward Francis | 6 | 0.99000000000000000000 | 37.62 | 0.15948963317384370016 | diff --git a/tests/chinook/dataset/task_12/task.txt b/tests/chinook/dataset/task_12/task.txt new file mode 100644 index 00000000..844f82d6 --- /dev/null +++ b/tests/chinook/dataset/task_12/task.txt @@ -0,0 +1,3 @@ +Find customers with the most diverse music taste and their spending correlation. +Calculate genre diversity score, average track price, total spending, and +diversity-to-spending ratio. diff --git a/tests/chinook/dataset/task_7/ground_truth.md b/tests/chinook/dataset/task_7/ground_truth.md new file mode 100644 index 00000000..4c2c2972 --- /dev/null +++ b/tests/chinook/dataset/task_7/ground_truth.md @@ -0,0 +1,29 @@ +| customer_id | first_name | last_name | last_purchase | lifetime_value | distinct_genres | related_tracks_available | customer_status | segment | +| ----------- | ---------- | ----------- | ------------------- | -------------- | --------------- | ------------------------ | --------------- | ------------------ | +| 26 | Richard | Cunningham | 2025-04-05 00:00:00 | 474.62 | 7 | 2727 | Needs Attention | High Value Diverse | +| 57 | Luis | Rojas | 2024-10-14 00:00:00 | 415.62 | 12 | 2962 | At Risk | High Value Diverse | +| 37 | Fynn | Zimmermann | 2025-06-03 00:00:00 | 388.62 | 10 | 2911 | Needs Attention | High Value Diverse | +| 5 | František | Wichterlová | 2025-05-06 00:00:00 | 376.62 | 8 | 2879 | Needs Attention | High Value Diverse | +| 43 | Isabelle | Mercier | 2025-06-06 00:00:00 | 376.62 | 9 | 2057 | Needs Attention | High Value Diverse | +| 28 | Julia | Barnett | 2025-05-19 00:00:00 | 370.62 | 6 | 2587 | Needs Attention | High Value Diverse | +| 17 | Jack | Smith | 2024-07-31 00:00:00 | 352.62 | 10 | 2862 | At Risk | High Value Diverse | +| 34 | João | Fernandes | 2024-10-01 00:00:00 | 352.62 | 10 | 2830 | At Risk | High Value Diverse | +| 15 | Jennifer | Peterson | 2024-12-15 00:00:00 | 343.62 | 8 | 2384 | At Risk | High Value Diverse | +| 51 | Joakim | Johansson | 2024-12-02 00:00:00 | 340.62 | 7 | 2795 | At Risk | High Value Diverse | +| 40 | Dominique | Lefebvre | 2024-08-13 00:00:00 | 336.62 | 7 | 2753 | At Risk | High Value Diverse | +| 19 | Tim | Goyer | 2024-09-13 00:00:00 | 335.62 | 9 | 2924 | At Risk | High Value Diverse | +| 53 | Phil | Hughes | 2025-01-15 00:00:00 | 334.62 | 6 | 2735 | At Risk | High Value Diverse | +| 55 | Mark | Taylor | 2024-08-31 00:00:00 | 334.62 | 6 | 2379 | At Risk | High Value Diverse | +| 2 | Leonie | Köhler | 2024-07-13 00:00:00 | 334.62 | 7 | 2716 | At Risk | High Value Diverse | +| 9 | Kara | Nielsen | 2025-02-02 00:00:00 | 334.62 | 5 | 2592 | At Risk | High Value Diverse | +| 11 | Alexandre | Rocha | 2025-03-18 00:00:00 | 334.62 | 6 | 2637 | Needs Attention | High Value Diverse | +| 13 | Fernanda | Ramos | 2024-11-01 00:00:00 | 334.62 | 7 | 2476 | At Risk | High Value Diverse | +| 14 | Mark | Philips | 2025-05-11 00:00:00 | 334.62 | 10 | 2889 | Needs Attention | High Value Diverse | +| 30 | Edward | Francis | 2025-01-02 00:00:00 | 334.62 | 6 | 2188 | At Risk | High Value Diverse | +| 32 | Aaron | Mitchell | 2025-02-15 00:00:00 | 334.62 | 7 | 2509 | At Risk | High Value Diverse | +| 36 | Hannah | Schneider | 2024-11-14 00:00:00 | 334.62 | 6 | 2653 | At Risk | High Value Diverse | +| 38 | Niklas | Schröder | 2024-06-30 00:00:00 | 334.62 | 7 | 2795 | At Risk | High Value Diverse | +| 47 | Lucas | Mancini | 2025-03-05 00:00:00 | 334.62 | 6 | 2699 | At Risk | High Value Diverse | +| 49 | Stanisław | Wójcik | 2025-04-18 00:00:00 | 334.62 | 5 | 2381 | Needs Attention | High Value Diverse | +| 52 | Emma | Jones | 2025-06-11 00:00:00 | 334.62 | 5 | 2602 | Needs Attention | High Value Diverse | +| 59 | Puja | Srivastava | 2024-05-30 00:00:00 | 331.66 | 7 | 2763 | At Risk | High Value Diverse | diff --git a/tests/chinook/dataset/task_7/task.txt b/tests/chinook/dataset/task_7/task.txt new file mode 100644 index 00000000..47521597 --- /dev/null +++ b/tests/chinook/dataset/task_7/task.txt @@ -0,0 +1,5 @@ +Find high-value customers who haven't purchased recently and their potential +re-engagement opportunities. Identify customers who haven't purchased in 3+ +months, have lifetime value >$25, and have >10 related tracks available. Include +customer status (At Risk/Needs Attention/Active) and segment (High Value +Diverse/Medium Value/Low Value). diff --git a/tests/chinook/dataset/task_8/ground_truth.md b/tests/chinook/dataset/task_8/ground_truth.md new file mode 100644 index 00000000..6f4c47cc --- /dev/null +++ b/tests/chinook/dataset/task_8/ground_truth.md @@ -0,0 +1,334 @@ +| genre | track | artist | unit_price | total_quantity_sold | total_revenue | popularity_rank | +| ------------------ | ---------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | ---------- | ------------------- | ------------- | --------------- | +| Alternative | Moth | Audioslave | 0.99 | 1 | 0.99 | 1 | +| Alternative | All Night Thing | Temple of the Dog | 0.99 | 1 | 0.99 | 1 | +| Alternative | Slowness | Calexico | 0.99 | 1 | 0.99 | 1 | +| Alternative | Call Me a Dog | Temple of the Dog | 0.99 | 1 | 0.99 | 1 | +| Alternative | Show Me How to Live (Live at the Quart Festival) | Audioslave | 0.99 | 1 | 0.99 | 1 | +| Alternative | Scar On the Sky | Chris Cornell | 0.99 | 1 | 0.99 | 1 | +| Alternative | Until We Fall | Audioslave | 0.99 | 1 | 0.99 | 1 | +| Alternative | Four Walled World | Temple of the Dog | 0.99 | 1 | 0.99 | 1 | +| Alternative | Say Hello 2 Heaven | Temple of the Dog | 0.99 | 1 | 0.99 | 1 | +| Alternative | Sound of a Gun | Audioslave | 0.99 | 1 | 0.99 | 1 | +| Alternative | Billie Jean | Chris Cornell | 0.99 | 1 | 0.99 | 1 | +| Alternative | Safe and Sound | Chris Cornell | 0.99 | 1 | 0.99 | 1 | +| Alternative | Disappearing Act | Chris Cornell | 0.99 | 1 | 0.99 | 1 | +| Alternative | Shape of Things to Come | Audioslave | 0.99 | 1 | 0.99 | 1 | +| Alternative & Punk | Cabeça Dinossauro | Titãs | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Times Like These | Foo Fighters | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Midlife Crisis | Faith No More | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | She Loves Me Not | Faith No More | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Train In Vain | The Clash | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Toda Cor | Titãs | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | When I Come Around | Green Day | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Big Wave | Pearl Jam | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | I Believe | R.E.M. | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Voodoo | Body Count | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Country Feedback | R.E.M. Feat. Kate Pearson | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Comida | Titãs | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Ando Meio Desligado | Os Mutantes | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Lugar Nenhum | Titãs | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Gyroscope | The Tea Party | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Bullet With Butterfly Wings | Smashing Pumpkins | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Saturnine | Smashing Pumpkins | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | So Central Rain | R.E.M. | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Heaven Coming Down | The Tea Party | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Give Me Novacaine | Green Day | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Get Up | R.E.M. | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | A Melhor Forma | Titãs | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Last Cup Of Sorrow | Faith No More | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Basket Case | Green Day | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Babylon | The Tea Party | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Real Love | Smashing Pumpkins | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Caffeine | Faith No More | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Easy | Faith No More | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Bowels Of The Devil | Body Count | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Fall On Me | R.E.M. | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Untitled | R.E.M. | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Esporrei Na Manivela | Raimundos | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Holiday | Green Day | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Nice Guys Finish Last | Green Day | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Whatsername | Green Day | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Waiting | Smashing Pumpkins | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Under The Bridge | Red Hot Chili Peppers | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Cherub Rock | Smashing Pumpkins | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | O Pulso | Titãs | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Take It Or Leave It | JET | 0.99 | 2 | 1.98 | 1 | +| Alternative & Punk | Release | The Tea Party | 0.99 | 2 | 1.98 | 1 | +| Blues | Promises | Eric Clapton | 0.99 | 2 | 1.98 | 1 | +| Blues | Travis Walk | Stevie Ray Vaughan & Double Trouble | 0.99 | 2 | 1.98 | 1 | +| Blues | Sunshine Of Your Love | Eric Clapton | 0.99 | 2 | 1.98 | 1 | +| Blues | Midnight From The Inside Out | The Black Crowes | 0.99 | 2 | 1.98 | 1 | +| Blues | Lay Down Sally | Eric Clapton | 0.99 | 2 | 1.98 | 1 | +| Blues | Title Song | The Black Crowes | 0.99 | 2 | 1.98 | 1 | +| Blues | Lonely Stranger | Eric Clapton | 0.99 | 2 | 1.98 | 1 | +| Blues | When My Left Eye Jumps | Buddy Guy | 0.99 | 2 | 1.98 | 1 | +| Bossa Nova | Onde Anda Você | Toquinho & Vinícius | 0.99 | 2 | 1.98 | 1 | +| Classical | Scheherazade, Op. 35: I. The Sea and Sindbad's Ship | Chicago Symphony Orchestra & Fritz Reiner | 0.99 | 2 | 1.98 | 1 | +| Classical | Symphonie Fantastique, Op. 14: V. Songe d'une nuit du sabbat | Michael Tilson Thomas & San Francisco Symphony | 0.99 | 2 | 1.98 | 1 | +| Classical | String Quartet No. 12 in C Minor, D. 703 "Quartettsatz": II. Andante - Allegro assai | Emerson String Quartet | 0.99 | 2 | 1.98 | 1 | +| Classical | Suite No. 3 in D, BWV 1068: III. Gavotte I & II | Academy of St. Martin in the Fields, Sir Neville Marriner & Thurston Dart | 0.99 | 2 | 1.98 | 1 | +| Classical | Music for the Funeral of Queen Mary: VI. "Thou Knowest, Lord, the Secrets of Our Hearts" | Equale Brass Ensemble, John Eliot Gardiner & Munich Monteverdi Orchestra and Choir | 0.99 | 2 | 1.98 | 1 | +| Comedy | Phyllis's Wedding | The Office | 1.99 | 2 | 3.98 | 1 | +| Drama | How to Stop an Exploding Man | Heroes | 1.99 | 2 | 3.98 | 1 | +| Drama | The Fix | Heroes | 1.99 | 2 | 3.98 | 1 | +| Easy Listening | I Get A Kick Out Of You | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | What Now My Love | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | New York, New York | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | Bad, Bad Leroy Brown | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | Come Fly With Me | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | It Was A Very Good Year | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | Fly Me To The Moon | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | My Kind Of Town | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | Summer Love | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Easy Listening | Mack The Knife | Frank Sinatra | 0.99 | 1 | 0.99 | 1 | +| Electronica/Dance | Light Years | Jamiroquai | 0.99 | 2 | 1.98 | 1 | +| Heavy Metal | Gates Of Tomorrow | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Drifter | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Another Life | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Dance Of Death | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Die With Your Boots On | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Purgatory | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | The Ides Of March | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Age Of Innocence | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Phantom Of The Opera | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Wildest Dreams | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Face In The Sand | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Heavy Metal | Genghis Khan | Iron Maiden | 0.99 | 1 | 0.99 | 1 | +| Hip Hop/Rap | Rappers Reais | Planet Hemp | 0.99 | 2 | 1.98 | 1 | +| Hip Hop/Rap | Nega Do Cabelo Duro | Planet Hemp | 0.99 | 2 | 1.98 | 1 | +| Jazz | Surrender | Spyro Gyra | 0.99 | 2 | 1.98 | 1 | +| Jazz | Starburst | Spyro Gyra | 0.99 | 2 | 1.98 | 1 | +| Jazz | L'Arc En Ciel De Miles | Incognito | 0.99 | 2 | 1.98 | 1 | +| Jazz | End Of Romanticism | Spyro Gyra | 0.99 | 2 | 1.98 | 1 | +| Jazz | Lemon Drop | Gene Krupa | 0.99 | 2 | 1.98 | 1 | +| Jazz | The Duke | Miles Davis | 0.99 | 2 | 1.98 | 1 | +| Jazz | Don't Take Your Love From Me | Gene Krupa | 0.99 | 2 | 1.98 | 1 | +| Jazz | Lamento De Carnaval | Gilberto Gil | 0.99 | 2 | 1.98 | 1 | +| Jazz | Blue Rythm Fantasy | Gene Krupa | 0.99 | 2 | 1.98 | 1 | +| Jazz | J Squared | Spyro Gyra | 0.99 | 2 | 1.98 | 1 | +| Jazz | Drum Boogie | Gene Krupa | 0.99 | 2 | 1.98 | 1 | +| Jazz | Por Causa De Você | Antônio Carlos Jobim | 0.99 | 2 | 1.98 | 1 | +| Latin | Não Quero Dinheiro | Tim Maia | 0.99 | 2 | 1.98 | 1 | +| Latin | Selvagem | Os Paralamas Do Sucesso | 0.99 | 2 | 1.98 | 1 | +| Latin | Revolta Olodum | Olodum | 0.99 | 2 | 1.98 | 1 | +| Latin | Meu Erro | Os Paralamas Do Sucesso | 0.99 | 2 | 1.98 | 1 | +| Latin | Meditação | Caetano Veloso | 0.99 | 2 | 1.98 | 1 | +| Latin | Falamansa Song | Falamansa | 0.99 | 2 | 1.98 | 1 | +| Latin | Romance Ideal | Os Paralamas Do Sucesso | 0.99 | 2 | 1.98 | 1 | +| Latin | Tarde Em Itapoã | Antônio Carlos Jobim | 0.99 | 2 | 1.98 | 1 | +| Latin | Álibi | Djavan | 0.99 | 2 | 1.98 | 1 | +| Latin | Bem Devagar | Caetano Veloso | 0.99 | 2 | 1.98 | 1 | +| Latin | Meu Caro Amigo | Chico Buarque | 0.99 | 2 | 1.98 | 1 | +| Latin | Geni E O Zepelim | Chico Buarque | 0.99 | 2 | 1.98 | 1 | +| Latin | Lixo Do Mangue | Chico Science & Nação Zumbi | 0.99 | 2 | 1.98 | 1 | +| Latin | Amor De Muito | Chico Science & Nação Zumbi | 0.99 | 2 | 1.98 | 1 | +| Latin | Esquinas | Djavan | 0.99 | 2 | 1.98 | 1 | +| Latin | Menestrel Das Alagoas | Milton Nascimento | 0.99 | 2 | 1.98 | 1 | +| Latin | Samba Do Lado | Chico Science & Nação Zumbi | 0.99 | 2 | 1.98 | 1 | +| Latin | Is This Love (Live) | Gilberto Gil | 0.99 | 2 | 1.98 | 1 | +| Latin | Música No Ar | Tim Maia | 0.99 | 2 | 1.98 | 1 | +| Latin | As Rosas Não Falam (Beth Carvalho) | Various Artists | 0.99 | 2 | 1.98 | 1 | +| Latin | Mensagen De Amor (2000) | Os Paralamas Do Sucesso | 0.99 | 2 | 1.98 | 1 | +| Latin | Norwegian Wood | Milton Nascimento | 0.99 | 2 | 1.98 | 1 | +| Latin | Beira Mar | Eric Clapton | 0.99 | 2 | 1.98 | 1 | +| Latin | Tanto Tempo | Antônio Carlos Jobim | 0.99 | 2 | 1.98 | 1 | +| Latin | Tempo Perdido | Legião Urbana | 0.99 | 2 | 1.98 | 1 | +| Latin | Stir It Up (Live) | Gilberto Gil | 0.99 | 2 | 1.98 | 1 | +| Latin | Brasil | Cássia Eller | 0.99 | 2 | 1.98 | 1 | +| Latin | Açai | Djavan | 0.99 | 2 | 1.98 | 1 | +| Latin | Bumbo Da Mangueira | Antônio Carlos Jobim | 0.99 | 2 | 1.98 | 1 | +| Latin | Linha Do Equador | Cláudio Zoli | 0.99 | 2 | 1.98 | 1 | +| Latin | Samba De Orly | Chico Buarque | 0.99 | 2 | 1.98 | 1 | +| Latin | Woman Of The World (Ao Vivo) | Cássia Eller | 0.99 | 2 | 1.98 | 1 | +| Latin | Só Tinha De Ser Com Você | Antônio Carlos Jobim | 0.99 | 2 | 1.98 | 1 | +| Latin | Morena De Angola | Chico Buarque | 0.99 | 2 | 1.98 | 1 | +| Latin | Nebulosa Do Amor | Os Paralamas Do Sucesso | 0.99 | 2 | 1.98 | 1 | +| Latin | Comportamento Geral | Gonzaguinha | 0.99 | 2 | 1.98 | 1 | +| Latin | Dois Pra Lá, Dois Pra Cá | Elis Regina | 0.99 | 2 | 1.98 | 1 | +| Latin | Azul | Djavan | 0.99 | 2 | 1.98 | 1 | +| Latin | A Novidade (Live) | Gilberto Gil | 0.99 | 2 | 1.98 | 1 | +| Latin | Pela Luz dos Olhos Teus (Miúcha e Tom Jobim) | Various Artists | 0.99 | 2 | 1.98 | 1 | +| Latin | Compadre | Tim Maia | 0.99 | 2 | 1.98 | 1 | +| Latin | Saber Amar | Os Paralamas Do Sucesso | 0.99 | 2 | 1.98 | 1 | +| Latin | Mis Penas Lloraba Yo (Ao Vivo) Soy Gitano (Tangos) | Cássia Eller | 0.99 | 2 | 1.98 | 1 | +| Latin | L'Avventura | Legião Urbana | 0.99 | 2 | 1.98 | 1 | +| Latin | Nossa Gente (Avisa Là) | Olodum | 0.99 | 2 | 1.98 | 1 | +| Latin | Mel | Caetano Veloso | 0.99 | 2 | 1.98 | 1 | +| Metal | Gangland | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | Where Eagles Dare | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | The Unforgiven II | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Can I Play With Madness | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | Motorbreath | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | For Whom The Bell Tolls | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Phantom Lord | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Whiskey In The Jar | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Nothing Else Matters | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Re-Align | Godsmack | 0.99 | 2 | 1.98 | 1 | +| Metal | Breakdown | Guns N' Roses | 0.99 | 2 | 1.98 | 1 | +| Metal | Better Than You | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Moonchild | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | Run Silent Run Deep | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | Snowblind | Black Sabbath | 0.99 | 2 | 1.98 | 1 | +| Metal | Turbo Lover | Judas Priest | 0.99 | 2 | 1.98 | 1 | +| Metal | Welcome Home (Sanitarium) | Apocalyptica | 0.99 | 2 | 1.98 | 1 | +| Metal | Cornucopia | Black Sabbath | 0.99 | 2 | 1.98 | 1 | +| Metal | Leper Messiah | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | The House Jack Built | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Shoot Me Again | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Soldier Side - Intro | System Of A Down | 0.99 | 2 | 1.98 | 1 | +| Metal | Question! | System Of A Down | 0.99 | 2 | 1.98 | 1 | +| Metal | Blood Brothers | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | The Prisoner | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | You've Got Another Thing Comin' | Judas Priest | 0.99 | 2 | 1.98 | 1 | +| Metal | Stone Cold Crazy | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Battery | Metallica | 0.99 | 2 | 1.98 | 1 | +| Metal | Children Of The Grave | Ozzy Osbourne | 0.99 | 2 | 1.98 | 1 | +| Metal | Violent Pornography | System Of A Down | 0.99 | 2 | 1.98 | 1 | +| Metal | Aces High | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Metal | Chemical Wedding | Bruce Dickinson | 0.99 | 2 | 1.98 | 1 | +| Metal | The Clairvoyant | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Opera | Die Zauberflöte, K.620: "Der Hölle Rache Kocht in Meinem Herze" | Sir Georg Solti, Sumi Jo & Wiener Philharmoniker | 0.99 | 0 | 0 | 1 | +| Pop | Give Peace a Chance | U2 | 0.99 | 2 | 1.98 | 1 | +| Pop | Beautiful Boy | U2 | 0.99 | 2 | 1.98 | 1 | +| R&B/Soul | Rehab | Amy Winehouse | 0.99 | 2 | 1.98 | 1 | +| R&B/Soul | Hey America | James Brown | 0.99 | 2 | 1.98 | 1 | +| R&B/Soul | Abraham, Martin And John | Marvin Gaye | 0.99 | 2 | 1.98 | 1 | +| R&B/Soul | Save The Children | Marvin Gaye | 0.99 | 2 | 1.98 | 1 | +| Reggae | C'est La Vie | UB40 | 0.99 | 2 | 1.98 | 1 | +| Reggae | A Cor Do Sol | Cidade Negra | 0.99 | 2 | 1.98 | 1 | +| Rock | Two Hearts Beat As One | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | City Of Blinding Lights | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Red Light | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Shock Me | Kiss | 0.99 | 2 | 1.98 | 1 | +| Rock | The Trooper | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | Everlasting Love | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | The Midnight Special | Creedence Clearwater Revival | 0.99 | 2 | 1.98 | 1 | +| Rock | Untitled | Pearl Jam | 0.99 | 2 | 1.98 | 1 | +| Rock | Down by the Sea | Men At Work | 0.99 | 2 | 1.98 | 1 | +| Rock | What A Day | Faith No More | 0.99 | 2 | 1.98 | 1 | +| Rock | Pretty Noose | Soundgarden | 0.99 | 2 | 1.98 | 1 | +| Rock | War Pigs | Ozzy Osbourne | 0.99 | 2 | 1.98 | 1 | +| Rock | Boris The Spider | The Who | 0.99 | 2 | 1.98 | 1 | +| Rock | Sweet Hitch-Hiker | Creedence Clearwater Revival | 0.99 | 2 | 1.98 | 1 | +| Rock | Menino De Rua | O Terço | 0.99 | 2 | 1.98 | 1 | +| Rock | Sure Know Something | Kiss | 0.99 | 2 | 1.98 | 1 | +| Rock | Commotion | Creedence Clearwater Revival | 0.99 | 2 | 1.98 | 1 | +| Rock | Que Luz É Essa | Raul Seixas | 0.99 | 2 | 1.98 | 1 | +| Rock | Judgement Of Heaven | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | Sun King | The Cult | 0.99 | 2 | 1.98 | 1 | +| Rock | Smoke On The Water | Deep Purple | 0.99 | 2 | 1.98 | 1 | +| Rock | Eruption | Van Halen | 0.99 | 2 | 1.98 | 1 | +| Rock | The Aftermath | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | Perfect Crime | Guns N' Roses | 0.99 | 2 | 1.98 | 1 | +| Rock | Dancing Days | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Binky The Doormat | R.E.M. | 0.99 | 2 | 1.98 | 1 | +| Rock | Walking Into Clarksdale | Page & Plant | 0.99 | 2 | 1.98 | 1 | +| Rock | Miami | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Shakes and Ladders | Men At Work | 0.99 | 2 | 1.98 | 1 | +| Rock | Speed Of Light | The Cult | 0.99 | 2 | 1.98 | 1 | +| Rock | Whatever It Is, I Just Can't Stop | Jamiroquai | 0.99 | 2 | 1.98 | 1 | +| Rock | Good Golly Miss Molly | Creedence Clearwater Revival | 0.99 | 2 | 1.98 | 1 | +| Rock | The Star Spangled Banner | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Don't Look Now | Creedence Clearwater Revival | 0.99 | 2 | 1.98 | 1 | +| Rock | When I Look At The World | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Sweetest Thing | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | The Number Of The Beast | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | Don't Look To The Eyes Of A Stranger | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | Hallelujah Here She Comes | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Hallowed Be Thy Name | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | I'm Going Slightly Mad | Queen | 0.99 | 2 | 1.98 | 1 | +| Rock | You Better You Bet | The Who | 0.99 | 2 | 1.98 | 1 | +| Rock | You're My Best Friend | Queen | 0.99 | 2 | 1.98 | 1 | +| Rock | Susie Q | Creedence Clearwater Revival | 0.99 | 2 | 1.98 | 1 | +| Rock | We Are The Champions | Queen | 0.99 | 2 | 1.98 | 1 | +| Rock | I'm Gonna Crawl | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Eruption | Van Halen | 0.99 | 2 | 1.98 | 1 | +| Rock | The Garden | Guns N' Roses | 0.99 | 2 | 1.98 | 1 | +| Rock | Overdose | AC/DC | 0.99 | 2 | 1.98 | 1 | +| Rock | Calling Dr. Love | Kiss | 0.99 | 2 | 1.98 | 1 | +| Rock | Balls to the Wall | Accept | 0.99 | 2 | 1.98 | 1 | +| Rock | Walter's Walk | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Inject The Venom | AC/DC | 0.99 | 2 | 1.98 | 1 | +| Rock | The Last Song | Foo Fighters | 0.99 | 2 | 1.98 | 1 | +| Rock | No No No | Deep Purple | 0.99 | 2 | 1.98 | 1 | +| Rock | Piece Of Pie | Stone Temple Pilots | 0.99 | 2 | 1.98 | 1 | +| Rock | The Thin Line Between Love & Hate | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | Year to the Day | Van Halen | 0.99 | 2 | 1.98 | 1 | +| Rock | Ramble On | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Snowballed | AC/DC | 0.99 | 2 | 1.98 | 1 | +| Rock | Right On Time | Red Hot Chili Peppers | 0.99 | 2 | 1.98 | 1 | +| Rock | Sangue Latino | O Terço | 0.99 | 2 | 1.98 | 1 | +| Rock | Pride (In The Name Of Love) | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | In The Evening | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | For Your Life | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Undertow | R.E.M. | 0.99 | 2 | 1.98 | 1 | +| Rock | For the Greater Good of God | Iron Maiden | 0.99 | 2 | 1.98 | 1 | +| Rock | Folhas Secas | O Terço | 0.99 | 2 | 1.98 | 1 | +| Rock | Tear | Red Hot Chili Peppers | 0.99 | 2 | 1.98 | 1 | +| Rock | Plaster Caster | Kiss | 0.99 | 2 | 1.98 | 1 | +| Rock | Living Loving Maid (She's Just A Woman) | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Living On Love | David Coverdale | 0.99 | 2 | 1.98 | 1 | +| Rock | How Many More Times | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Dirty Day | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Rocket Queen | Guns N' Roses | 0.99 | 2 | 1.98 | 1 | +| Rock | Flying High Again | Ozzy Osbourne | 0.99 | 2 | 1.98 | 1 | +| Rock | Deuces Are Wild | Aerosmith | 0.99 | 2 | 1.98 | 1 | +| Rock | Dazed and Confused | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Sure Know Something | Kiss | 0.99 | 2 | 1.98 | 1 | +| Rock | Peace On Earth | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Maybe I'm A Leo | Deep Purple | 0.99 | 2 | 1.98 | 1 | +| Rock | Not The Doctor | Alanis Morissette | 0.99 | 2 | 1.98 | 1 | +| Rock | Mr. Cab Driver | Lenny Kravitz | 0.99 | 2 | 1.98 | 1 | +| Rock | Rock And Roll All Nite | Kiss | 0.99 | 2 | 1.98 | 1 | +| Rock | All Along The Watchtower | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock | Mama, I'm Coming Home | Ozzy Osbourne | 0.99 | 2 | 1.98 | 1 | +| Rock | Garden of Eden | Guns N' Roses | 0.99 | 2 | 1.98 | 1 | +| Rock | Somebody To Love | Queen | 0.99 | 2 | 1.98 | 1 | +| Rock | Bron-Y-Aur Stomp | Led Zeppelin | 0.99 | 2 | 1.98 | 1 | +| Rock | Sunday Bloody Sunday | U2 | 0.99 | 2 | 1.98 | 1 | +| Rock And Roll | Long Tall Sally | BackBeat | 0.99 | 1 | 0.99 | 1 | +| Rock And Roll | Money | BackBeat | 0.99 | 1 | 0.99 | 1 | +| Rock And Roll | Carol | BackBeat | 0.99 | 1 | 0.99 | 1 | +| Rock And Roll | Rock 'N' Roll Music | BackBeat | 0.99 | 1 | 0.99 | 1 | +| Rock And Roll | C'Mon Everybody | BackBeat | 0.99 | 1 | 0.99 | 1 | +| Rock And Roll | Good Golly Miss Molly | BackBeat | 0.99 | 1 | 0.99 | 1 | +| Sci Fi & Fantasy | The Hand of God | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Battlestar Galactica, Pt. 2 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | The Gun On Ice Planet Zero, Pt. 2 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Take the Celestra | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Battlestar Galactica, Pt. 1 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | The Lost Warrior | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | The Long Patrol | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Lost Planet of the Gods, Pt. 2 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Lost Planet of the Gods, Pt. 1 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | War of the Gods, Pt. 2 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | The Living Legend, Pt. 1 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Baltar's Escape | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Greetings from Earth, Pt. 1 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | The Living Legend, Pt. 2 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | The Gun On Ice Planet Zero, Pt. 1 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Crossroads, Pt. 2 | Battlestar Galactica | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Experiment In Terra | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Greetings from Earth, Pt. 2 | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | The Magnificent Warriors | Battlestar Galactica (Classic) | 1.99 | 1 | 1.99 | 1 | +| Sci Fi & Fantasy | Crossroads, Pt. 1 | Battlestar Galactica | 1.99 | 1 | 1.99 | 1 | +| Science Fiction | The Woman King | Battlestar Galactica | 1.99 | 2 | 3.98 | 1 | +| Soundtrack | Plot 180 | Passengers | 0.99 | 2 | 1.98 | 1 | +| TV Shows | Walkabout | Lost | 1.99 | 2 | 3.98 | 1 | +| TV Shows | Pilot | Aquaman | 1.99 | 2 | 3.98 | 1 | +| TV Shows | Gay Witch Hunt | The Office | 1.99 | 2 | 3.98 | 1 | +| TV Shows | Hot Girl | The Office | 1.99 | 2 | 3.98 | 1 | +| World | No Futuro | João Suplicy | 0.99 | 1 | 0.99 | 1 | +| World | Demorou! | Mônica Marianno | 0.99 | 1 | 0.99 | 1 | +| World | Enquanto O Dia Não Vem | João Suplicy | 0.99 | 1 | 0.99 | 1 | +| World | A Moça e a Chuva | Mônica Marianno | 0.99 | 1 | 0.99 | 1 | +| World | Papelão | João Suplicy | 0.99 | 1 | 0.99 | 1 | +| World | É Fogo | Mônica Marianno | 0.99 | 1 | 0.99 | 1 | +| World | Din Din Wo (Little Child) | Habib Koité and Bamada | 0.99 | 1 | 0.99 | 1 | +| World | Voce Inteira | João Suplicy | 0.99 | 1 | 0.99 | 1 | +| World | O Que Vai Em Meu Coração | Mônica Marianno | 0.99 | 1 | 0.99 | 1 | +| World | Cafezinho | João Suplicy | 0.99 | 1 | 0.99 | 1 | +| World | Já Foi | Mônica Marianno | 0.99 | 1 | 0.99 | 1 | +| World | Equinocio | João Suplicy | 0.99 | 1 | 0.99 | 1 | +| World | Aos Leões | Mônica Marianno | 0.99 | 1 | 0.99 | 1 | diff --git a/tests/chinook/dataset/task_8/task.txt b/tests/chinook/dataset/task_8/task.txt new file mode 100644 index 00000000..35967740 --- /dev/null +++ b/tests/chinook/dataset/task_8/task.txt @@ -0,0 +1,3 @@ +Find me all tracks and their artist that are both the most expensive in their +genre and have the highest sales volume within that same genre. I need to +identify which premium-priced tracks are still dominating their market segments. diff --git a/tests/chinook/dataset/task_9/ground_truth.md b/tests/chinook/dataset/task_9/ground_truth.md new file mode 100644 index 00000000..547b4d07 --- /dev/null +++ b/tests/chinook/dataset/task_9/ground_truth.md @@ -0,0 +1,61 @@ +i| customer_id | customer_name | orders_count | total_spent | total_spent_rounded | orders_per_day | days_active | days_since_last_purchase | first_order | last_order | clv_segment | churn_risk | +| ----------- | --------------------- | ------------ | ----------- | ------------------- | ---------------------- | ----------- | ------------------------ | ---------------------- | ---------------------- | ----------- | ----------------- | +| 6 | Helena Holý | 7 | 49.62 | 49.62 | 0.00441361916771752837 | 1586 | -63 | 2021-07-11 00:00:00+00 | 2025-11-13 00:00:00+00 | High | Low churn risk | +| 26 | Richard Cunningham | 7 | 47.62 | 47.62 | 0.00562248995983935743 | 1245 | 159 | 2021-11-07 00:00:00+00 | 2025-04-05 00:00:00+00 | High | High churn risk | +| 57 | Luis Rojas | 7 | 46.62 | 46.62 | 0.00543056633048875097 | 1289 | 332 | 2021-04-04 00:00:00+00 | 2024-10-14 00:00:00+00 | High | High churn risk | +| 45 | Ladislav Kovács | 7 | 45.62 | 45.62 | 0.00543056633048875097 | 1289 | 53 | 2022-01-08 00:00:00+00 | 2025-07-20 00:00:00+00 | High | Medium churn risk | +| 46 | Hugh O'Reilly | 7 | 45.62 | 45.62 | 0.00403458213256484150 | 1735 | -54 | 2021-02-03 00:00:00+00 | 2025-11-04 00:00:00+00 | High | Low churn risk | +| 28 | Julia Barnett | 7 | 43.62 | 43.62 | 0.00543056633048875097 | 1289 | 115 | 2021-11-07 00:00:00+00 | 2025-05-19 00:00:00+00 | High | High churn risk | +| 37 | Fynn Zimmermann | 7 | 43.62 | 43.62 | 0.00438596491228070175 | 1596 | 100 | 2021-01-19 00:00:00+00 | 2025-06-03 00:00:00+00 | High | High churn risk | +| 24 | Frank Ralston | 7 | 43.62 | 43.62 | 0.00543056633048875097 | 1289 | 22 | 2022-02-08 00:00:00+00 | 2025-08-20 00:00:00+00 | High | Low churn risk | +| 25 | Victor Stevens | 7 | 42.62 | 42.62 | 0.00403458213256484150 | 1735 | -85 | 2021-03-06 00:00:00+00 | 2025-12-05 00:00:00+00 | High | Low churn risk | +| 7 | Astrid Gruber | 7 | 42.62 | 42.62 | 0.00543056633048875097 | 1289 | 84 | 2021-12-08 00:00:00+00 | 2025-06-19 00:00:00+00 | High | Medium churn risk | +| 44 | Terhi Hämäläinen | 7 | 41.62 | 41.62 | 0.00441361916771752837 | 1586 | -94 | 2021-08-11 00:00:00+00 | 2025-12-14 00:00:00+00 | High | Low churn risk | +| 48 | Johannes Van der Berg | 7 | 40.62 | 40.62 | 0.00441361916771752837 | 1586 | -1 | 2021-05-10 00:00:00+00 | 2025-09-12 00:00:00+00 | High | Low churn risk | +| 43 | Isabelle Mercier | 7 | 40.62 | 40.62 | 0.00562248995983935743 | 1245 | 97 | 2022-01-08 00:00:00+00 | 2025-06-06 00:00:00+00 | High | High churn risk | +| 5 | František Wichterlová | 7 | 40.62 | 40.62 | 0.00562248995983935743 | 1245 | 128 | 2021-12-08 00:00:00+00 | 2025-05-06 00:00:00+00 | High | High churn risk | +| 17 | Jack Smith | 7 | 39.62 | 39.62 | 0.00562248995983935743 | 1245 | 407 | 2021-03-04 00:00:00+00 | 2024-07-31 00:00:00+00 | High | High churn risk | +| 34 | João Fernandes | 7 | 39.62 | 39.62 | 0.00562248995983935743 | 1245 | 345 | 2021-05-05 00:00:00+00 | 2024-10-01 00:00:00+00 | High | High churn risk | +| 20 | Dan Miller | 7 | 39.62 | 39.62 | 0.00543056633048875097 | 1289 | -71 | 2022-05-12 00:00:00+00 | 2025-11-21 00:00:00+00 | High | Low churn risk | +| 22 | Heather Leacock | 7 | 39.62 | 39.62 | 0.00562248995983935743 | 1245 | 66 | 2022-02-08 00:00:00+00 | 2025-07-07 00:00:00+00 | High | Medium churn risk | +| 3 | François Tremblay | 7 | 39.62 | 39.62 | 0.00543056633048875097 | 1289 | -9 | 2022-03-11 00:00:00+00 | 2025-09-20 00:00:00+00 | High | Low churn risk | +| 42 | Wyatt Girard | 7 | 39.62 | 39.62 | 0.00403458213256484150 | 1735 | -53 | 2021-02-02 00:00:00+00 | 2025-11-03 00:00:00+00 | High | Low churn risk | +| 1 | Luís Gonçalves | 7 | 39.62 | 39.62 | 0.00562248995983935743 | 1245 | 35 | 2022-03-11 00:00:00+00 | 2025-08-07 00:00:00+00 | Medium | Medium churn risk | +| 4 | Bjørn Hansen | 7 | 39.62 | 39.62 | 0.00403458213256484150 | 1735 | -22 | 2021-01-02 00:00:00+00 | 2025-10-03 00:00:00+00 | Medium | Low churn risk | +| 58 | Manoj Pareek | 7 | 38.62 | 38.62 | 0.00543056633048875097 | 1289 | -102 | 2022-06-12 00:00:00+00 | 2025-12-22 00:00:00+00 | Medium | Low churn risk | +| 40 | Dominique Lefebvre | 7 | 38.62 | 38.62 | 0.00543056633048875097 | 1289 | 394 | 2021-02-01 00:00:00+00 | 2024-08-13 00:00:00+00 | Medium | High churn risk | +| 15 | Jennifer Peterson | 7 | 38.62 | 38.62 | 0.00543056633048875097 | 1289 | 270 | 2021-06-05 00:00:00+00 | 2024-12-15 00:00:00+00 | Medium | High churn risk | +| 39 | Camille Bernard | 7 | 38.62 | 38.62 | 0.00562248995983935743 | 1245 | 4 | 2022-04-11 00:00:00+00 | 2025-09-07 00:00:00+00 | Medium | Low churn risk | +| 51 | Joakim Johansson | 7 | 38.62 | 38.62 | 0.00562248995983935743 | 1245 | 283 | 2021-07-06 00:00:00+00 | 2024-12-02 00:00:00+00 | Medium | High churn risk | +| 19 | Tim Goyer | 7 | 38.62 | 38.62 | 0.00543056633048875097 | 1289 | 363 | 2021-03-04 00:00:00+00 | 2024-09-13 00:00:00+00 | Medium | High churn risk | +| 55 | Mark Taylor | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | 376 | 2021-04-04 00:00:00+00 | 2024-08-31 00:00:00+00 | Medium | High churn risk | +| 56 | Diego Gutiérrez | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | -58 | 2022-06-12 00:00:00+00 | 2025-11-08 00:00:00+00 | Medium | Low churn risk | +| 52 | Emma Jones | 7 | 37.62 | 37.62 | 0.00441361916771752837 | 1586 | 92 | 2021-02-06 00:00:00+00 | 2025-06-11 00:00:00+00 | Medium | High churn risk | +| 2 | Leonie Köhler | 7 | 37.62 | 37.62 | 0.00543056633048875097 | 1289 | 425 | 2021-01-01 00:00:00+00 | 2024-07-13 00:00:00+00 | Medium | High churn risk | +| 8 | Daan Peeters | 7 | 37.62 | 37.62 | 0.00403458213256484150 | 1735 | -23 | 2021-01-03 00:00:00+00 | 2025-10-04 00:00:00+00 | Medium | Low churn risk | +| 9 | Kara Nielsen | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | 221 | 2021-09-06 00:00:00+00 | 2025-02-02 00:00:00+00 | Medium | High churn risk | +| 10 | Eduardo Martins | 7 | 37.62 | 37.62 | 0.00441361916771752837 | 1586 | 30 | 2021-04-09 00:00:00+00 | 2025-08-12 00:00:00+00 | Medium | Low churn risk | +| 11 | Alexandre Rocha | 7 | 37.62 | 37.62 | 0.00543056633048875097 | 1289 | 177 | 2021-09-06 00:00:00+00 | 2025-03-18 00:00:00+00 | Medium | High churn risk | +| 12 | Roberto Almeida | 7 | 37.62 | 37.62 | 0.00438596491228070175 | 1596 | -24 | 2021-05-23 00:00:00+00 | 2025-10-05 00:00:00+00 | Medium | Low churn risk | +| 13 | Fernanda Ramos | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | 314 | 2021-06-05 00:00:00+00 | 2024-11-01 00:00:00+00 | Medium | High churn risk | +| 14 | Mark Philips | 7 | 37.62 | 37.62 | 0.00441361916771752837 | 1586 | 123 | 2021-01-06 00:00:00+00 | 2025-05-11 00:00:00+00 | Medium | High churn risk | +| 16 | Frank Harris | 7 | 37.62 | 37.62 | 0.00438596491228070175 | 1596 | 69 | 2021-02-19 00:00:00+00 | 2025-07-04 00:00:00+00 | Medium | Medium churn risk | +| 18 | Michelle Brooks | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | -27 | 2022-05-12 00:00:00+00 | 2025-10-08 00:00:00+00 | Low | Low churn risk | +| 21 | Kathy Chase | 7 | 37.62 | 37.62 | 0.00403458213256484150 | 1735 | -84 | 2021-03-05 00:00:00+00 | 2025-12-04 00:00:00+00 | Low | Low churn risk | +| 23 | John Gordon | 7 | 37.62 | 37.62 | 0.00391498881431767338 | 1788 | -84 | 2021-01-11 00:00:00+00 | 2025-12-04 00:00:00+00 | Low | Low churn risk | +| 27 | Patrick Gray | 7 | 37.62 | 37.62 | 0.00441361916771752837 | 1586 | -32 | 2021-06-10 00:00:00+00 | 2025-10-13 00:00:00+00 | Low | Low churn risk | +| 29 | Robert Brown | 7 | 37.62 | 37.62 | 0.00438596491228070175 | 1596 | -86 | 2021-07-24 00:00:00+00 | 2025-12-06 00:00:00+00 | Low | Low churn risk | +| 30 | Edward Francis | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | 252 | 2021-08-06 00:00:00+00 | 2025-01-02 00:00:00+00 | Low | High churn risk | +| 31 | Martha Silk | 7 | 37.62 | 37.62 | 0.00441361916771752837 | 1586 | 61 | 2021-03-09 00:00:00+00 | 2025-07-12 00:00:00+00 | Low | Medium churn risk | +| 32 | Aaron Mitchell | 7 | 37.62 | 37.62 | 0.00543056633048875097 | 1289 | 208 | 2021-08-06 00:00:00+00 | 2025-02-15 00:00:00+00 | Low | High churn risk | +| 33 | Ellie Sullivan | 7 | 37.62 | 37.62 | 0.00438596491228070175 | 1596 | 7 | 2021-04-22 00:00:00+00 | 2025-09-04 00:00:00+00 | Low | Low churn risk | +| 35 | Madalena Sampaio | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | -89 | 2022-07-13 00:00:00+00 | 2025-12-09 00:00:00+00 | Low | Low churn risk | +| 36 | Hannah Schneider | 7 | 37.62 | 37.62 | 0.00543056633048875097 | 1289 | 301 | 2021-05-05 00:00:00+00 | 2024-11-14 00:00:00+00 | Low | High churn risk | +| 38 | Niklas Schröder | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | 438 | 2021-02-01 00:00:00+00 | 2024-06-30 00:00:00+00 | Low | High churn risk | +| 41 | Marc Dubois | 7 | 37.62 | 37.62 | 0.00543056633048875097 | 1289 | -40 | 2022-04-11 00:00:00+00 | 2025-10-21 00:00:00+00 | Low | Low churn risk | +| 47 | Lucas Mancini | 7 | 37.62 | 37.62 | 0.00562248995983935743 | 1245 | 190 | 2021-10-07 00:00:00+00 | 2025-03-05 00:00:00+00 | Low | High churn risk | +| 49 | Stanisław Wójcik | 7 | 37.62 | 37.62 | 0.00543056633048875097 | 1289 | 146 | 2021-10-07 00:00:00+00 | 2025-04-18 00:00:00+00 | Low | High churn risk | +| 50 | Enrique Muñoz | 7 | 37.62 | 37.62 | 0.00438596491228070175 | 1596 | -55 | 2021-06-23 00:00:00+00 | 2025-11-05 00:00:00+00 | Low | Low churn risk | +| 53 | Phil Hughes | 7 | 37.62 | 37.62 | 0.00543056633048875097 | 1289 | 239 | 2021-07-06 00:00:00+00 | 2025-01-15 00:00:00+00 | Low | High churn risk | +| 54 | Steve Murray | 7 | 37.62 | 37.62 | 0.00438596491228070175 | 1596 | 38 | 2021-03-22 00:00:00+00 | 2025-08-04 00:00:00+00 | Low | Medium churn risk | +| 59 | Puja Srivastava | 6 | 36.64 | 36.64 | 0.00521285838401390096 | 1151 | 469 | 2021-04-05 00:00:00+00 | 2024-05-30 00:00:00+00 | Low | High churn risk | diff --git a/tests/chinook/dataset/task_9/task.txt b/tests/chinook/dataset/task_9/task.txt new file mode 100644 index 00000000..5fb7c070 --- /dev/null +++ b/tests/chinook/dataset/task_9/task.txt @@ -0,0 +1,3 @@ +Calculate customer lifetime value segments and predict churn risk. Group +customers into segments based on spending patterns and calculate days since last +purchase to identify churn risk. diff --git a/tests/chinook/pydantic/test_pydantic_complex_queries.py b/tests/chinook/pydantic/test_pydantic_complex_queries.py index 439e4b18..583c90df 100644 --- a/tests/chinook/pydantic/test_pydantic_complex_queries.py +++ b/tests/chinook/pydantic/test_pydantic_complex_queries.py @@ -1,7 +1,7 @@ import os from pydantic import BaseModel from pydantic_ai import Agent -from pydantic_ai.models.openai import OpenAIChatModel +from pydantic_ai.models.openai import OpenAIChatModel, OpenAIChatModelSettings import pytest from eval_protocol.models import EvaluateResult, EvaluationRow @@ -12,17 +12,22 @@ from tests.pytest.test_pydantic_agent import PydanticAgentRolloutProcessor LLM_JUDGE_PROMPT = ( - "Your job is to compare the response to the expected answer.\n" - "The response will be a narrative report of the query results.\n" - "If the response contains the same or well summarized information as the expected answer, return 1.0.\n" - "If the response does not contain the same information or is missing information, return 0.0." + "You will be given the user task, the response of an AI assistant, and the expected answer.\n" + "Compare the response to the expected answer. The response is a narrative report of the query results.\n" + "Reward for core information being provided correctly and ignore formatting issues.\n" + "Return a score from 0.0 to 1.0:\n" + "- 1.0: response contains all or well summarized information as the expected answer\n" + "- 0.0: response does not contain the same information or is missing all key information\n" + "- Values in between: partial credit for responses that are partially correct or contain some but not all of the expected information." ) def agent_factory(config: RolloutProcessorConfig) -> Agent: model_name = config.completion_params["model"] - provider = config.completion_params["provider"] - model = OpenAIChatModel(model_name, provider=provider) + provider = config.completion_params.get("provider") + reasoning = config.completion_params.get("reasoning") + settings = OpenAIChatModelSettings(openai_reasoning_effort=reasoning) + model = OpenAIChatModel(model_name, provider=provider or "openai", settings=settings) return setup_agent(model) @@ -34,12 +39,23 @@ def agent_factory(config: RolloutProcessorConfig) -> Agent: @evaluation_test( input_rows=[collect_dataset()], completion_params=[ - { - "model": "accounts/fireworks/models/kimi-k2-instruct", - "provider": "fireworks", - }, + # { + # "model": "accounts/fireworks/models/kimi-k2-instruct", + # "provider": "fireworks", + # }, + # { + # "model": "accounts/fireworks/models/deepseek-v3p1", + # "provider": "fireworks", + # }, + # { + # "model": "accounts/fireworks/models/kimi-k2-instruct-0905", + # "provider": "fireworks", + # }, + {"model": "gpt-5"}, + # {"model": "gpt-5", "reasoning": "high"}, ], rollout_processor=PydanticAgentRolloutProcessor(agent_factory), + num_runs=2, ) async def test_pydantic_complex_queries(row: EvaluationRow) -> EvaluationRow: """ @@ -58,10 +74,17 @@ async def test_pydantic_complex_queries(row: EvaluationRow) -> EvaluationRow: ) else: model = OpenAIChatModel( - "accounts/fireworks/models/kimi-k2-instruct", - provider="fireworks", + "gpt-5", ) + first_user_message = row.get_first_user_message() + if first_user_message is None: + row.evaluation_result = EvaluateResult( + score=0.0, + reason="No user message found", + ) + return row + class Response(BaseModel): """ A score between 0.0 and 1.0 indicating whether the response is correct. @@ -81,7 +104,7 @@ class Response(BaseModel): output_retries=5, ) result = await comparison_agent.run( - f"Expected answer: {row.ground_truth}\nResponse: {last_assistant_message.content}" + f"User task: {first_user_message.content}\nResponse: {last_assistant_message.content}\nExpected answer: {row.ground_truth}" ) row.evaluation_result = EvaluateResult( score=result.output.score, diff --git a/tests/chinook/pydantic/test_pydantic_complex_queries_responses.py b/tests/chinook/pydantic/test_pydantic_complex_queries_responses.py index d94e98fa..5ed1e377 100644 --- a/tests/chinook/pydantic/test_pydantic_complex_queries_responses.py +++ b/tests/chinook/pydantic/test_pydantic_complex_queries_responses.py @@ -19,21 +19,24 @@ def agent_factory(config: RolloutProcessorConfig) -> Agent: model_name = config.completion_params["model"] - model_settings = OpenAIResponsesModelSettings() - model = OpenAIResponsesModel(model_name) + reasoning = config.completion_params.get("reasoning") + settings = OpenAIResponsesModelSettings( + openai_reasoning_effort=reasoning, + ) + model = OpenAIResponsesModel(model_name, settings=settings) return setup_agent(model) -@pytest.mark.skipif( +@pytest.mark.skipif( # pyright: ignore[reportAttributeAccessIssue] os.environ.get("CI") == "true", reason="This was only run locally to generate traces in Responses API", ) -@pytest.mark.asyncio +@pytest.mark.asyncio # pyright: ignore[reportAttributeAccessIssue] @evaluation_test( input_rows=[collect_dataset()], completion_params=[ { - "model": "gpt-4o", + "model": "gpt-5", }, ], rollout_processor=PydanticAgentRolloutProcessor(agent_factory), diff --git a/tests/pytest/test_parameterized_ids.py b/tests/pytest/test_parameterized_ids.py index ff1726d7..b182bfe5 100644 --- a/tests/pytest/test_parameterized_ids.py +++ b/tests/pytest/test_parameterized_ids.py @@ -43,34 +43,59 @@ def test_parameterized_ids(row: EvaluationRow) -> EvaluationRow: # Extract the IDs from the parametrize mark ids = parametrize_mark.kwargs.get("ids") if ids is not None: - # Should have IDs for models but not for temperature-only params - expected_ids = ["model-gpt-oss-120b", "model-gpt-4"] + # Should have IDs for all parameters that have string/numeric values + expected_ids = ["fireworks_ai/accounts/fireworks/models/gpt-oss-120b", "gpt-4", "0.5"] assert list(ids) == expected_ids, f"Expected {expected_ids}, got {list(ids)}" def test_default_id_generator(): - """Test the DefaultParameterIdGenerator with various model names.""" + """Test the DefaultParameterIdGenerator with various parameter combinations.""" generator = DefaultParameterIdGenerator() # Test with full model path combo1 = (None, {"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b"}, None, None, None) id1 = generator.generate_id(combo1) - assert id1 == "model-gpt-oss-120b" + assert id1 == "fireworks_ai/accounts/fireworks/models/gpt-oss-120b" # Test with simple model name combo2 = (None, {"model": "gpt-4"}, None, None, None) id2 = generator.generate_id(combo2) - assert id2 == "model-gpt-4" + assert id2 == "gpt-4" - # Test with no model - combo3 = (None, {"temperature": 0.5}, None, None, None) + # Test with multiple string parameters + combo3 = (None, {"model": "gpt-4", "stream": "true", "temperature": "0.7"}, None, None, None) id3 = generator.generate_id(combo3) - assert id3 is None + assert id3 == "gpt-4:true:0.7" - # Test with None completion_params - combo4 = (None, None, None, None, None) + # Test with mixed string and numeric parameters + combo4 = (None, {"model": "gpt-4", "temperature": 0.7, "max_tokens": 100}, None, None, None) id4 = generator.generate_id(combo4) - assert id4 is None + assert id4 == "100:gpt-4:0.7" # Keys are sorted alphabetically: max_tokens, model, temperature + + # Test with only numeric values + combo5 = (None, {"temperature": 0.5, "max_tokens": 100}, None, None, None) + id5 = generator.generate_id(combo5) + assert id5 == "100:0.5" # Keys are sorted alphabetically: max_tokens, temperature + + # Test with boolean values + combo6 = (None, {"stream": True, "echo": False}, None, None, None) + id6 = generator.generate_id(combo6) + assert id6 == "False:True" # Keys are sorted alphabetically: echo, stream + + # Test with mixed string, numeric, and boolean values + combo7 = (None, {"model": "gpt-4", "temperature": 0.7, "stream": True}, None, None, None) + id7 = generator.generate_id(combo7) + assert id7 == "gpt-4:True:0.7" # Keys are sorted alphabetically: model, stream, temperature + + # Test with no supported values (only non-supported types like lists, dicts) + combo8 = (None, {"messages": [{"role": "user"}], "config": {"key": "value"}}, None, None, None) + id8 = generator.generate_id(combo8) + assert id8 is None + + # Test with None completion_params + combo9 = (None, None, None, None, None) + id9 = generator.generate_id(combo9) + assert id9 is None def test_pytest_parametrize_with_custom_id_generator(): @@ -80,7 +105,7 @@ def test_pytest_parametrize_with_custom_id_generator(): combinations = [ (None, {"model": "gpt-4"}, None, None, None), (None, {"model": "claude-3"}, None, None, None), - (None, {"temperature": 0.5}, None, None, None), # No model + (None, {"temperature": 0.5}, None, None, None), # Only numeric values ] # Test with default generator @@ -95,7 +120,7 @@ def test_pytest_parametrize_with_custom_id_generator(): assert result["argnames"] == ["completion_params"] assert len(list(result["argvalues"])) == 3 - assert result["ids"] == ["model-gpt-4", "model-claude-3"] # None for no model + assert result["ids"] == ["gpt-4", "claude-3", "0.5"] # All have string/numeric values def test_id_generator_max_length(): @@ -105,5 +130,5 @@ def test_id_generator_max_length(): # Test with long model name combo = (None, {"model": "very-long-model-name-that-exceeds-max-length"}, None, None, None) id_str = generator.generate_id(combo) - assert id_str == "model-v..." + assert id_str == "very-lo..." assert len(id_str) <= 10 diff --git a/tests/pytest/test_pydantic_ai_metadata_handling.py b/tests/pytest/test_pydantic_ai_metadata_handling.py new file mode 100644 index 00000000..9046d6c2 --- /dev/null +++ b/tests/pytest/test_pydantic_ai_metadata_handling.py @@ -0,0 +1,110 @@ +import pytest +from typing import Any, Dict +from unittest.mock import Mock +from pydantic_ai import Agent +from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel +from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata +from eval_protocol.pytest.default_pydantic_ai_rollout_processor import PydanticAgentRolloutProcessor +from eval_protocol.pytest.types import RolloutProcessorConfig + + +def test_pydantic_ai_metadata_only_stored_for_responses_model(): + """Test that PydanticAI metadata is only stored in extra_body for ResponsesModel, not for ChatModel.""" + + # Create a test row with metadata + row = EvaluationRow( + input_metadata=InputMetadata(row_id="test-row-123"), + execution_metadata=ExecutionMetadata( + invocation_id="test-invocation-456", + rollout_id="test-rollout-789", + run_id="test-run-101", + experiment_id="test-experiment-202", + ), + messages=[], + ) + + # Test with OpenAIChatModel (should NOT store metadata) + chat_model = OpenAIChatModel("gpt-4") + chat_agent = Agent(model=chat_model) + processor = PydanticAgentRolloutProcessor(lambda config: chat_agent) + + settings = processor.construct_model_settings(chat_agent, row) + + # ChatModel should not have metadata in extra_body + extra_body = settings.get("extra_body", {}) + assert isinstance(extra_body, dict), "extra_body should be a dict" + assert "metadata" not in extra_body, "ChatModel should not store metadata in extra_body" + + # Test with OpenAIResponsesModel (should store metadata) + responses_model = OpenAIResponsesModel("gpt-5") + responses_agent = Agent(model=responses_model) + processor_responses = PydanticAgentRolloutProcessor(lambda config: responses_agent) + + settings_responses = processor_responses.construct_model_settings(responses_agent, row) + + # ResponsesModel should have metadata in extra_body + extra_body_responses = settings_responses.get("extra_body", {}) + assert isinstance(extra_body_responses, dict), "extra_body should be a dict" + assert "metadata" in extra_body_responses, "ResponsesModel should store metadata in extra_body" + + metadata = extra_body_responses["metadata"] + assert isinstance(metadata, dict), "metadata should be a dict" + assert metadata["row_id"] == "test-row-123" + assert metadata["invocation_id"] == "test-invocation-456" + assert metadata["rollout_id"] == "test-rollout-789" + assert metadata["run_id"] == "test-run-101" + assert metadata["experiment_id"] == "test-experiment-202" + + +def test_pydantic_ai_metadata_handling_with_string_model(): + """Test that PydanticAI string models don't cause issues with metadata handling.""" + + # Create a test row + row = EvaluationRow( + input_metadata=InputMetadata(row_id="test-row-123"), + execution_metadata=ExecutionMetadata( + invocation_id="test-invocation-456", + rollout_id="test-rollout-789", + run_id="test-run-101", + experiment_id="test-experiment-202", + ), + messages=[], + ) + + # Create agent with string model (should not store metadata) + agent = Agent(model="gpt-4") + processor = PydanticAgentRolloutProcessor(lambda config: agent) + + settings = processor.construct_model_settings(agent, row) + + # String model should not have metadata in extra_body + extra_body = settings.get("extra_body", {}) + assert isinstance(extra_body, dict), "extra_body should be a dict" + assert "metadata" not in extra_body, "String model should not store metadata in extra_body" + + +def test_pydantic_ai_metadata_handling_with_none_model(): + """Test that PydanticAI None model doesn't cause issues with metadata handling.""" + + # Create a test row + row = EvaluationRow( + input_metadata=InputMetadata(row_id="test-row-123"), + execution_metadata=ExecutionMetadata( + invocation_id="test-invocation-456", + rollout_id="test-rollout-789", + run_id="test-run-101", + experiment_id="test-experiment-202", + ), + messages=[], + ) + + # Create agent with None model (should not store metadata) + agent = Agent(model=None) + processor = PydanticAgentRolloutProcessor(lambda config: agent) + + settings = processor.construct_model_settings(agent, row) + + # None model should not have metadata in extra_body + extra_body = settings.get("extra_body", {}) + assert isinstance(extra_body, dict), "extra_body should be a dict" + assert "metadata" not in extra_body, "None model should not store metadata in extra_body" diff --git a/uv.lock b/uv.lock index f8f310a4..8b603958 100644 --- a/uv.lock +++ b/uv.lock @@ -1283,6 +1283,7 @@ dev = [ { name = "pytest-httpserver" }, { name = "pytest-xdist" }, { name = "ruff" }, + { name = "syrupy" }, { name = "transformers" }, { name = "twine" }, { name = "types-docker" }, @@ -1417,6 +1418,7 @@ requires-dist = [ { name = "selenium", marker = "extra == 'svgbench'", specifier = ">=4.0.0" }, { name = "supabase", marker = "extra == 'supabase'", specifier = ">=2.18.1" }, { name = "swig", marker = "extra == 'box2d'" }, + { name = "syrupy", marker = "extra == 'dev'", specifier = ">=4.0.0" }, { name = "toml", specifier = ">=0.10.0" }, { name = "torch", marker = "extra == 'trl'", specifier = ">=1.9" }, { name = "transformers", marker = "extra == 'adapters'", specifier = ">=4.0.0" }, @@ -6474,6 +6476,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, ] +[[package]] +name = "syrupy" +version = "4.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8c/f8/022d8704a3314f3e96dbd6bbd16ebe119ce30e35f41aabfa92345652fceb/syrupy-4.9.1.tar.gz", hash = "sha256:b7d0fcadad80a7d2f6c4c71917918e8ebe2483e8c703dfc8d49cdbb01081f9a4", size = 52492, upload-time = "2025-03-24T01:36:37.225Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ec/9d/aef9ec5fd5a4ee2f6a96032c4eda5888c5c7cec65cef6b28c4fc37671d88/syrupy-4.9.1-py3-none-any.whl", hash = "sha256:b94cc12ed0e5e75b448255430af642516842a2374a46936dd2650cfb6dd20eda", size = 52214, upload-time = "2025-03-24T01:36:35.278Z" }, +] + [[package]] name = "temporalio" version = "1.17.0"