Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions vite-app/dist/assets/index-BkNMjR5E.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions vite-app/dist/assets/index-BkNMjR5E.js.map

Large diffs are not rendered by default.

136 changes: 0 additions & 136 deletions vite-app/dist/assets/index-CfW2XE3N.js

This file was deleted.

1 change: 0 additions & 1 deletion vite-app/dist/assets/index-CfW2XE3N.js.map

This file was deleted.

1 change: 0 additions & 1 deletion vite-app/dist/assets/index-D6BSbBdY.css

This file was deleted.

1 change: 1 addition & 0 deletions vite-app/dist/assets/index-UIwKlxBz.css

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions vite-app/dist/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>EP | Log Viewer</title>
<link rel="icon" href="/assets/favicon-BkAAWQga.png" />
<script type="module" crossorigin src="/assets/index-CfW2XE3N.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-D6BSbBdY.css">
<script type="module" crossorigin src="/assets/index-BkNMjR5E.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-UIwKlxBz.css">
</head>
<body>
<div id="root"></div>
Expand Down
14 changes: 7 additions & 7 deletions vite-app/src/components/EvaluationRow.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,9 @@ const GroundTruthSection = observer(
)
);

const UsageStatsSection = observer(
({ data }: { data: EvaluationRowType["usage"] }) => (
<MetadataSection title="Usage Stats" data={data} />
const ExecutionMetadataSection = observer(
({ data }: { data: EvaluationRowType["execution_metadata"] }) => (
<MetadataSection title="Execution Metadata" data={data} />
)
);

Expand Down Expand Up @@ -348,7 +348,7 @@ const ExpandedContent = observer(
eval_metadata,
evaluation_result,
ground_truth,
usage,
execution_metadata,
input_metadata,
tools,
rollout_status,
Expand All @@ -358,7 +358,7 @@ const ExpandedContent = observer(
eval_metadata: EvaluationRowType["eval_metadata"];
evaluation_result: EvaluationRowType["evaluation_result"];
ground_truth: EvaluationRowType["ground_truth"];
usage: EvaluationRowType["usage"];
execution_metadata: EvaluationRowType["execution_metadata"];
input_metadata: EvaluationRowType["input_metadata"];
tools: EvaluationRowType["tools"];
rollout_status: EvaluationRowType["rollout_status"];
Expand All @@ -375,9 +375,9 @@ const ExpandedContent = observer(
<EvalMetadataSection data={eval_metadata} />
<EvaluationResultSection data={evaluation_result} />
<RolloutStatusSection data={rollout_status} />
<ExecutionMetadataSection data={execution_metadata} />
<IdSection data={row} />
<GroundTruthSection data={ground_truth} />
<UsageStatsSection data={usage} />
<InputMetadataSection data={input_metadata} />
<ToolsSection data={tools} />
</div>
Expand Down Expand Up @@ -478,7 +478,7 @@ export const EvaluationRow = observer(
eval_metadata={row.eval_metadata}
evaluation_result={row.evaluation_result}
ground_truth={row.ground_truth}
usage={row.usage}
execution_metadata={row.execution_metadata}
input_metadata={row.input_metadata}
tools={row.tools}
rollout_status={row.rollout_status}
Expand Down
13 changes: 11 additions & 2 deletions vite-app/src/types/eval-protocol.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,21 @@ export const EvalMetadataSchema = z.object({
passed: z.boolean().optional().describe('Whether the evaluation passed based on the threshold')
});

export const CostMetricsSchema = z.object({
input_cost: z.number().nullable().optional().describe('Cost in USD for input tokens.'),
output_cost: z.number().nullable().optional().describe('Cost in USD for output tokens.'),
total_cost: z.number().nullable().optional().describe('Total cost in USD for the API call.')
});

export const ExecutionMetadataSchema = z.object({
invocation_id: z.string().optional().describe('The ID of the invocation that this row belongs to.'),
experiment_id: z.string().optional().describe('The ID of the experiment that this row belongs to.'),
rollout_id: z.string().optional().describe('The ID of the rollout that this row belongs to.'),
run_id: z.string().optional().describe('The ID of the run that this row belongs to.'),
usage: CompletionUsageSchema.optional().describe('Token usage statistics from LLM calls during execution.'),
cost_metrics: CostMetricsSchema.optional().describe('Cost breakdown for LLM API calls.'),
duration_seconds: z.number().nullable().optional().describe('Processing duration in seconds for this evaluation row.'),
experiment_duration_seconds: z.number().nullable().optional().describe('Processing duration in seconds for an entire experiment.')
});

export const EvaluationRowSchema = z.object({
Expand All @@ -151,9 +161,8 @@ export const EvaluationRowSchema = z.object({
input_metadata: InputMetadataSchema.describe('Metadata related to the input (dataset info, model config, session data, etc.).'),
rollout_status: StatusSchema.describe('The status of the rollout following AIP-193 standards.'),
execution_metadata: ExecutionMetadataSchema.optional().describe('Metadata about the execution of the evaluation.'),
ground_truth: z.string().optional().describe('Optional ground truth reference for this evaluation.'),
ground_truth: z.union([z.string(), z.number(), z.boolean(), z.array(z.any()), z.record(z.string(), z.any())]).nullable().optional().describe('JSON-serializable ground truth reference for this evaluation.'),
evaluation_result: EvaluateResultSchema.optional().describe('The evaluation result for this row/trajectory.'),
usage: CompletionUsageSchema.optional().describe('Token usage statistics from LLM calls during execution.'),
created_at: z.preprocess(
(val) => typeof val === "string" ? new Date(val) : val,
z.date()
Expand Down
Loading