From c2b9ba009be8f1ce0ec0d4130d5eb7e22228088e Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 22 Sep 2025 11:21:02 -0700 Subject: [PATCH 1/2] fix the test --- eval_protocol/pytest/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eval_protocol/pytest/utils.py b/eval_protocol/pytest/utils.py index ce15cd19..a584cd3f 100644 --- a/eval_protocol/pytest/utils.py +++ b/eval_protocol/pytest/utils.py @@ -420,7 +420,7 @@ def add_cost_metrics(row: EvaluationRow) -> None: row.execution_metadata.cost_metrics = CostMetrics( input_cost=0.0, output_cost=0.0, - total_cost=0.0, + total_cost_dollars=0.0, ) return @@ -461,5 +461,5 @@ def add_cost_metrics(row: EvaluationRow) -> None: row.execution_metadata.cost_metrics = CostMetrics( input_cost=input_cost, output_cost=output_cost, - total_cost=total_cost, + total_cost_dollars=total_cost, ) From 0820639bd9837405577ab7d2bcb0d15468e5dfc2 Mon Sep 17 00:00:00 2001 From: Dylan Huang Date: Mon, 22 Sep 2025 11:34:54 -0700 Subject: [PATCH 2/2] "total_cost_dollar" --- eval_protocol/models.py | 2 +- eval_protocol/pytest/utils.py | 4 ++-- tests/pytest/test_execution_metadata.py | 10 +++++----- vite-app/src/GlobalState.tsx | 2 +- vite-app/src/components/PivotTab.tsx | 2 +- vite-app/src/types/eval-protocol.ts | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 9507deea..da46e096 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -525,7 +525,7 @@ class CostMetrics(BaseModel): output_cost: Optional[float] = Field(None, description="Cost in USD for output tokens.") - total_cost_dollars: Optional[float] = Field(None, description="Total cost in USD for the API call.") + total_cost_dollar: Optional[float] = Field(None, description="Total cost in USD for the API call.") class ExecutionMetadata(BaseModel): diff --git a/eval_protocol/pytest/utils.py b/eval_protocol/pytest/utils.py index a584cd3f..4e9be951 100644 --- a/eval_protocol/pytest/utils.py +++ b/eval_protocol/pytest/utils.py @@ -420,7 +420,7 @@ def add_cost_metrics(row: EvaluationRow) -> None: row.execution_metadata.cost_metrics = CostMetrics( input_cost=0.0, output_cost=0.0, - total_cost_dollars=0.0, + total_cost_dollar=0.0, ) return @@ -461,5 +461,5 @@ def add_cost_metrics(row: EvaluationRow) -> None: row.execution_metadata.cost_metrics = CostMetrics( input_cost=input_cost, output_cost=output_cost, - total_cost_dollars=total_cost, + total_cost_dollar=total_cost, ) diff --git a/tests/pytest/test_execution_metadata.py b/tests/pytest/test_execution_metadata.py index 2013dbe9..ba3b883d 100644 --- a/tests/pytest/test_execution_metadata.py +++ b/tests/pytest/test_execution_metadata.py @@ -25,7 +25,7 @@ def test_single_model_with_provider(self): assert row.execution_metadata.cost_metrics is not None assert row.execution_metadata.cost_metrics.input_cost is not None assert row.execution_metadata.cost_metrics.output_cost is not None - assert row.execution_metadata.cost_metrics.total_cost_dollars is not None + assert row.execution_metadata.cost_metrics.total_cost_dollar is not None @pytest.mark.skip(reason="Revisit when we figure out how to get cost metrics for multi-agent Pydantic.") def test_pydantic_ai_multi_agent_model_dict(self): @@ -56,7 +56,7 @@ def test_pydantic_ai_multi_agent_model_dict(self): assert row.execution_metadata.cost_metrics is not None assert row.execution_metadata.cost_metrics.input_cost is not None assert row.execution_metadata.cost_metrics.output_cost is not None - assert row.execution_metadata.cost_metrics.total_cost_dollars is not None + assert row.execution_metadata.cost_metrics.total_cost_dollar is not None def test_no_usage_stats(self): """Test case with no usage statistics.""" @@ -71,7 +71,7 @@ def test_no_usage_stats(self): assert row.execution_metadata.cost_metrics is not None assert row.execution_metadata.cost_metrics.input_cost == 0.0 assert row.execution_metadata.cost_metrics.output_cost == 0.0 - assert row.execution_metadata.cost_metrics.total_cost_dollars == 0.0 + assert row.execution_metadata.cost_metrics.total_cost_dollar == 0.0 def test_no_completion_params(self): """Test case with empty completion parameters.""" @@ -88,7 +88,7 @@ def test_no_completion_params(self): assert row.execution_metadata.cost_metrics is not None assert row.execution_metadata.cost_metrics.input_cost == 0.0 assert row.execution_metadata.cost_metrics.output_cost == 0.0 - assert row.execution_metadata.cost_metrics.total_cost_dollars == 0.0 + assert row.execution_metadata.cost_metrics.total_cost_dollar == 0.0 def test_zero_tokens(self): """Test case with zero token usage.""" @@ -105,7 +105,7 @@ def test_zero_tokens(self): assert row.execution_metadata.cost_metrics is not None assert row.execution_metadata.cost_metrics.input_cost == 0.0 assert row.execution_metadata.cost_metrics.output_cost == 0.0 - assert row.execution_metadata.cost_metrics.total_cost_dollars == 0.0 + assert row.execution_metadata.cost_metrics.total_cost_dollar == 0.0 def test_provider_mapping_variations(self): """Test different provider mappings.""" diff --git a/vite-app/src/GlobalState.tsx b/vite-app/src/GlobalState.tsx index 0146538e..49c1f0fe 100644 --- a/vite-app/src/GlobalState.tsx +++ b/vite-app/src/GlobalState.tsx @@ -27,7 +27,7 @@ export const DEFAULT_QUALITY_PIVOT_CONFIG: PivotConfig = { export const DEFAULT_COST_PIVOT_CONFIG: PivotConfig = { selectedRowFields: ["$.eval_metadata.name"], selectedColumnFields: ["$.input_metadata.completion_params.model"], - selectedValueField: "$.execution_metadata.cost_metrics.total_cost_dollars", + selectedValueField: "$.execution_metadata.cost_metrics.total_cost_dollar", selectedAggregator: "sum", }; diff --git a/vite-app/src/components/PivotTab.tsx b/vite-app/src/components/PivotTab.tsx index fffc07fb..7c5f83d4 100644 --- a/vite-app/src/components/PivotTab.tsx +++ b/vite-app/src/components/PivotTab.tsx @@ -221,7 +221,7 @@ const PivotTab = observer(() => { variant="secondary" size="sm" > - Cost (total_cost_dollars) + Cost (total_cost_dollar)