eval-protocol
diff --git a/‎vite-app/dist/assets/index-BkNMjR5E.js‎
Lines changed: 136 additions & 0 deletions b/‎vite-app/dist/assets/index-BkNMjR5E.js‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎vite-app/dist/assets/index-BkNMjR5E.js.map‎
Lines changed: 1 addition & 0 deletions b/‎vite-app/dist/assets/index-BkNMjR5E.js.map‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎vite-app/dist/assets/index-CfW2XE3N.js‎
Lines changed: 0 additions & 136 deletions b/‎vite-app/dist/assets/index-CfW2XE3N.js‎
Lines changed: 0 additions & 136 deletions
diff --git a/‎vite-app/dist/assets/index-CfW2XE3N.js.map‎
Lines changed: 0 additions & 1 deletion b/‎vite-app/dist/assets/index-CfW2XE3N.js.map‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎vite-app/dist/assets/index-D6BSbBdY.css‎
Lines changed: 0 additions & 1 deletion b/‎vite-app/dist/assets/index-D6BSbBdY.css‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎vite-app/dist/assets/index-UIwKlxBz.css‎
Lines changed: 1 addition & 0 deletions b/‎vite-app/dist/assets/index-UIwKlxBz.css‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎vite-app/dist/index.html‎
Lines changed: 2 additions & 2 deletions b/‎vite-app/dist/index.html‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎vite-app/src/components/EvaluationRow.tsx‎
Lines changed: 7 additions & 7 deletions b/‎vite-app/src/components/EvaluationRow.tsx‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎vite-app/src/types/eval-protocol.ts‎
Lines changed: 11 additions & 2 deletions b/‎vite-app/src/types/eval-protocol.ts‎
Lines changed: 11 additions & 2 deletions
@@ -5,8 +5,8 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
     <title>EP | Log Viewer</title>
     <link rel="icon" href="/assets/favicon-BkAAWQga.png" />
-    <script type="module" crossorigin src="/assets/index-CfW2XE3N.js"></script>
-    <link rel="stylesheet" crossorigin href="/assets/index-D6BSbBdY.css">
+    <script type="module" crossorigin src="/assets/index-BkNMjR5E.js"></script>
+    <link rel="stylesheet" crossorigin href="/assets/index-UIwKlxBz.css">
   </head>
   <body>
     <div id="root"></div>
 
@@ -305,9 +305,9 @@ const GroundTruthSection = observer(
   )
 );
 
-const UsageStatsSection = observer(
-  ({ data }: { data: EvaluationRowType["usage"] }) => (
-    <MetadataSection title="Usage Stats" data={data} />
+const ExecutionMetadataSection = observer(
+  ({ data }: { data: EvaluationRowType["execution_metadata"] }) => (
+    <MetadataSection title="Execution Metadata" data={data} />
   )
 );
 
@@ -348,7 +348,7 @@ const ExpandedContent = observer(
     eval_metadata,
     evaluation_result,
     ground_truth,
-    usage,
+    execution_metadata,
     input_metadata,
     tools,
     rollout_status,
@@ -358,7 +358,7 @@ const ExpandedContent = observer(
     eval_metadata: EvaluationRowType["eval_metadata"];
     evaluation_result: EvaluationRowType["evaluation_result"];
     ground_truth: EvaluationRowType["ground_truth"];
-    usage: EvaluationRowType["usage"];
+    execution_metadata: EvaluationRowType["execution_metadata"];
     input_metadata: EvaluationRowType["input_metadata"];
     tools: EvaluationRowType["tools"];
     rollout_status: EvaluationRowType["rollout_status"];
@@ -375,9 +375,9 @@ const ExpandedContent = observer(
           <EvalMetadataSection data={eval_metadata} />
           <EvaluationResultSection data={evaluation_result} />
           <RolloutStatusSection data={rollout_status} />
+          <ExecutionMetadataSection data={execution_metadata} />
           <IdSection data={row} />
           <GroundTruthSection data={ground_truth} />
-          <UsageStatsSection data={usage} />
           <InputMetadataSection data={input_metadata} />
           <ToolsSection data={tools} />
         </div>
@@ -478,7 +478,7 @@ export const EvaluationRow = observer(
                 eval_metadata={row.eval_metadata}
                 evaluation_result={row.evaluation_result}
                 ground_truth={row.ground_truth}
-                usage={row.usage}
+                execution_metadata={row.execution_metadata}
                 input_metadata={row.input_metadata}
                 tools={row.tools}
                 rollout_status={row.rollout_status}
 
@@ -138,11 +138,21 @@ export const EvalMetadataSchema = z.object({
   passed: z.boolean().optional().describe('Whether the evaluation passed based on the threshold')
 });
 
+export const CostMetricsSchema = z.object({
+  input_cost: z.number().nullable().optional().describe('Cost in USD for input tokens.'),
+  output_cost: z.number().nullable().optional().describe('Cost in USD for output tokens.'),
+  total_cost: z.number().nullable().optional().describe('Total cost in USD for the API call.')
+});
+
 export const ExecutionMetadataSchema = z.object({
   invocation_id: z.string().optional().describe('The ID of the invocation that this row belongs to.'),
   experiment_id: z.string().optional().describe('The ID of the experiment that this row belongs to.'),
   rollout_id: z.string().optional().describe('The ID of the rollout that this row belongs to.'),
   run_id: z.string().optional().describe('The ID of the run that this row belongs to.'),
+  usage: CompletionUsageSchema.optional().describe('Token usage statistics from LLM calls during execution.'),
+  cost_metrics: CostMetricsSchema.optional().describe('Cost breakdown for LLM API calls.'),
+  duration_seconds: z.number().nullable().optional().describe('Processing duration in seconds for this evaluation row.'),
+  experiment_duration_seconds: z.number().nullable().optional().describe('Processing duration in seconds for an entire experiment.')
 });
 
 export const EvaluationRowSchema = z.object({
@@ -151,9 +161,8 @@ export const EvaluationRowSchema = z.object({
   input_metadata: InputMetadataSchema.describe('Metadata related to the input (dataset info, model config, session data, etc.).'),
   rollout_status: StatusSchema.describe('The status of the rollout following AIP-193 standards.'),
   execution_metadata: ExecutionMetadataSchema.optional().describe('Metadata about the execution of the evaluation.'),
-  ground_truth: z.string().optional().describe('Optional ground truth reference for this evaluation.'),
+  ground_truth: z.union([z.string(), z.number(), z.boolean(), z.array(z.any()), z.record(z.string(), z.any())]).nullable().optional().describe('JSON-serializable ground truth reference for this evaluation.'),
   evaluation_result: EvaluateResultSchema.optional().describe('The evaluation result for this row/trajectory.'),
-  usage: CompletionUsageSchema.optional().describe('Token usage statistics from LLM calls during execution.'),
   created_at: z.preprocess(
     (val) => typeof val === "string" ? new Date(val) : val,
     z.date()