Skip to content

Commit 0dc82c7

Browse files
committed
unique traces
1 parent ae7211a commit 0dc82c7

File tree

3 files changed

+8
-4
lines changed

3 files changed

+8
-4
lines changed

eval_protocol/adapters/langfuse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def get_evaluation_rows(
6868
from_timestamp: Optional[datetime] = None,
6969
to_timestamp: Optional[datetime] = None,
7070
include_tool_calls: bool = True,
71-
page_size: int = 30,
71+
page_size: int = 30, # TODO: remove probably
7272
sleep_between_gets: float = 0.1,
7373
max_retries: int = 3,
7474
) -> List[EvaluationRow]:

eval_protocol/quickstart/llm_judge.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,6 @@
3636
)
3737
],
3838
completion_params=[
39-
# {
40-
# "model": "fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
41-
# },
4239
{"model": "gpt-4.1"},
4340
{
4441
"max_tokens": 131000,

eval_protocol/quickstart/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def split_multi_turn_rows(data: list[EvaluationRow]) -> list[EvaluationRow]:
112112
List of expanded EvaluationRow objects, one for each assistant message
113113
"""
114114
expanded_rows = []
115+
seen_traces: set[str] = set()
115116

116117
for row in data:
117118
messages = row.messages
@@ -128,6 +129,12 @@ def split_multi_turn_rows(data: list[EvaluationRow]) -> list[EvaluationRow]:
128129
messages_before_assistant = messages[:pos]
129130
assistant_message = messages[pos]
130131

132+
# In this case, we trace every request, so we need to filter out duplicates
133+
curr_trace = "\n".join(serialize_message(m) for m in messages_before_assistant)
134+
if curr_trace in seen_traces:
135+
continue
136+
seen_traces.add(curr_trace)
137+
131138
ground_truth_message = serialize_message(assistant_message)
132139

133140
expanded_rows.append(

0 commit comments

Comments
 (0)