Skip to content

Commit 95a94c8

Browse files
authored
Execution metadata UI added (#160)
1 parent 2e225b7 commit 95a94c8

File tree

9 files changed

+158
-149
lines changed

9 files changed

+158
-149
lines changed

vite-app/dist/assets/index-BkNMjR5E.js

Lines changed: 136 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/assets/index-BkNMjR5E.js.map

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/assets/index-CfW2XE3N.js

Lines changed: 0 additions & 136 deletions
This file was deleted.

vite-app/dist/assets/index-CfW2XE3N.js.map

Lines changed: 0 additions & 1 deletion
This file was deleted.

vite-app/dist/assets/index-D6BSbBdY.css

Lines changed: 0 additions & 1 deletion
This file was deleted.

vite-app/dist/assets/index-UIwKlxBz.css

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vite-app/dist/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
66
<title>EP | Log Viewer</title>
77
<link rel="icon" href="/assets/favicon-BkAAWQga.png" />
8-
<script type="module" crossorigin src="/assets/index-CfW2XE3N.js"></script>
9-
<link rel="stylesheet" crossorigin href="/assets/index-D6BSbBdY.css">
8+
<script type="module" crossorigin src="/assets/index-BkNMjR5E.js"></script>
9+
<link rel="stylesheet" crossorigin href="/assets/index-UIwKlxBz.css">
1010
</head>
1111
<body>
1212
<div id="root"></div>

vite-app/src/components/EvaluationRow.tsx

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,9 @@ const GroundTruthSection = observer(
305305
)
306306
);
307307

308-
const UsageStatsSection = observer(
309-
({ data }: { data: EvaluationRowType["usage"] }) => (
310-
<MetadataSection title="Usage Stats" data={data} />
308+
const ExecutionMetadataSection = observer(
309+
({ data }: { data: EvaluationRowType["execution_metadata"] }) => (
310+
<MetadataSection title="Execution Metadata" data={data} />
311311
)
312312
);
313313

@@ -348,7 +348,7 @@ const ExpandedContent = observer(
348348
eval_metadata,
349349
evaluation_result,
350350
ground_truth,
351-
usage,
351+
execution_metadata,
352352
input_metadata,
353353
tools,
354354
rollout_status,
@@ -358,7 +358,7 @@ const ExpandedContent = observer(
358358
eval_metadata: EvaluationRowType["eval_metadata"];
359359
evaluation_result: EvaluationRowType["evaluation_result"];
360360
ground_truth: EvaluationRowType["ground_truth"];
361-
usage: EvaluationRowType["usage"];
361+
execution_metadata: EvaluationRowType["execution_metadata"];
362362
input_metadata: EvaluationRowType["input_metadata"];
363363
tools: EvaluationRowType["tools"];
364364
rollout_status: EvaluationRowType["rollout_status"];
@@ -375,9 +375,9 @@ const ExpandedContent = observer(
375375
<EvalMetadataSection data={eval_metadata} />
376376
<EvaluationResultSection data={evaluation_result} />
377377
<RolloutStatusSection data={rollout_status} />
378+
<ExecutionMetadataSection data={execution_metadata} />
378379
<IdSection data={row} />
379380
<GroundTruthSection data={ground_truth} />
380-
<UsageStatsSection data={usage} />
381381
<InputMetadataSection data={input_metadata} />
382382
<ToolsSection data={tools} />
383383
</div>
@@ -478,7 +478,7 @@ export const EvaluationRow = observer(
478478
eval_metadata={row.eval_metadata}
479479
evaluation_result={row.evaluation_result}
480480
ground_truth={row.ground_truth}
481-
usage={row.usage}
481+
execution_metadata={row.execution_metadata}
482482
input_metadata={row.input_metadata}
483483
tools={row.tools}
484484
rollout_status={row.rollout_status}

vite-app/src/types/eval-protocol.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,21 @@ export const EvalMetadataSchema = z.object({
138138
passed: z.boolean().optional().describe('Whether the evaluation passed based on the threshold')
139139
});
140140

141+
export const CostMetricsSchema = z.object({
142+
input_cost: z.number().nullable().optional().describe('Cost in USD for input tokens.'),
143+
output_cost: z.number().nullable().optional().describe('Cost in USD for output tokens.'),
144+
total_cost: z.number().nullable().optional().describe('Total cost in USD for the API call.')
145+
});
146+
141147
export const ExecutionMetadataSchema = z.object({
142148
invocation_id: z.string().optional().describe('The ID of the invocation that this row belongs to.'),
143149
experiment_id: z.string().optional().describe('The ID of the experiment that this row belongs to.'),
144150
rollout_id: z.string().optional().describe('The ID of the rollout that this row belongs to.'),
145151
run_id: z.string().optional().describe('The ID of the run that this row belongs to.'),
152+
usage: CompletionUsageSchema.optional().describe('Token usage statistics from LLM calls during execution.'),
153+
cost_metrics: CostMetricsSchema.optional().describe('Cost breakdown for LLM API calls.'),
154+
duration_seconds: z.number().nullable().optional().describe('Processing duration in seconds for this evaluation row.'),
155+
experiment_duration_seconds: z.number().nullable().optional().describe('Processing duration in seconds for an entire experiment.')
146156
});
147157

148158
export const EvaluationRowSchema = z.object({
@@ -151,9 +161,8 @@ export const EvaluationRowSchema = z.object({
151161
input_metadata: InputMetadataSchema.describe('Metadata related to the input (dataset info, model config, session data, etc.).'),
152162
rollout_status: StatusSchema.describe('The status of the rollout following AIP-193 standards.'),
153163
execution_metadata: ExecutionMetadataSchema.optional().describe('Metadata about the execution of the evaluation.'),
154-
ground_truth: z.string().optional().describe('Optional ground truth reference for this evaluation.'),
164+
ground_truth: z.union([z.string(), z.number(), z.boolean(), z.array(z.any()), z.record(z.string(), z.any())]).nullable().optional().describe('JSON-serializable ground truth reference for this evaluation.'),
155165
evaluation_result: EvaluateResultSchema.optional().describe('The evaluation result for this row/trajectory.'),
156-
usage: CompletionUsageSchema.optional().describe('Token usage statistics from LLM calls during execution.'),
157166
created_at: z.preprocess(
158167
(val) => typeof val === "string" ? new Date(val) : val,
159168
z.date()

0 commit comments

Comments
 (0)