From 639db9458e491aa24d5cffd52ff5c9fcfe9c1e50 Mon Sep 17 00:00:00 2001 From: David Gageot Date: Wed, 18 Mar 2026 14:20:44 +0100 Subject: [PATCH] fix: use event timestamps for user messages in SessionFromEvents SessionFromEvents was creating user messages with time.Now(), which runs after the container has finished. This gave user messages a timestamp after the last assistant message, causing Session.Duration() to return ~0 and breaking the longest-first eval sorting. Now user messages use the timestamp from the "user_message" event in the container output stream. Falls back to the first "agent_choice" timestamp when no "user_message" event is present. Assisted-By: docker-agent --- pkg/evaluation/save.go | 49 +++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/pkg/evaluation/save.go b/pkg/evaluation/save.go index 35b1fbae8..b93b147c1 100644 --- a/pkg/evaluation/save.go +++ b/pkg/evaluation/save.go @@ -63,20 +63,29 @@ func SessionFromEvents(events []map[string]any, title string, questions []string // Add user questions as initial messages. // For multi-turn evals, these are interleaved with agent responses - // as they appear in the event stream. The first question is added - // upfront; subsequent questions are inserted when a stream_stopped - // event indicates the agent finished processing the previous turn. + // as they appear in the event stream. User messages are added when + // a "user_message" event is encountered (which carries the correct + // timestamp), or when a "stream_stopped" event indicates the agent + // finished processing the previous turn in a multi-turn eval. + // If no "user_message" event is found before the first agent response, + // the question is added with the timestamp of that first response. questionIdx := 0 - addNextQuestion := func() { + userMessageAdded := false + addNextQuestion := func(timestamp string) { if questionIdx < len(questions) { - sess.AddMessage(session.UserMessage(questions[questionIdx])) + msg := &session.Message{ + Message: chat.Message{ + Role: chat.MessageRoleUser, + Content: questions[questionIdx], + CreatedAt: timestamp, + }, + } + sess.AddMessage(msg) questionIdx++ + userMessageAdded = true } } - // Add the first question - addNextQuestion() - // Track current assistant message being built var currentContent strings.Builder var currentReasoningContent strings.Builder @@ -122,7 +131,19 @@ func SessionFromEvents(events []map[string]any, title string, questions []string eventTimestamp := parseEventTimestamp(event) switch eventType { + case "user_message": + // Use the event timestamp for the user message instead of time.Now() + if !userMessageAdded { + addNextQuestion(eventTimestamp) + } + case "agent_choice": + // Ensure a user message has been added before the first agent response. + // This handles event streams that lack a "user_message" event. + if !userMessageAdded { + addNextQuestion(eventTimestamp) + } + // Accumulate agent response content if content, ok := event["content"].(string); ok { currentContent.WriteString(content) @@ -237,14 +258,22 @@ func SessionFromEvents(events []map[string]any, title string, questions []string // Flush final assistant message flushAssistantMessage() - // In multi-turn evals, add the next user question after each turn - addNextQuestion() + // In multi-turn evals, add the next user question after each turn. + // Reset the flag so the next user_message event (or agent_choice + // fallback) will add the question for the next turn. + userMessageAdded = false } } // Flush any remaining content flushAssistantMessage() + // Add any remaining questions that weren't added via user_message or + // agent_choice events (e.g. when the event stream is empty). + for questionIdx < len(questions) { + addNextQuestion(time.Now().Format(time.RFC3339)) + } + return sess }