Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 24 additions & 14 deletions eval_protocol/adapters/langfuse.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,22 +379,32 @@ def _create_input_metadata(self, trace: Any, observations: List[Any]) -> InputMe
Returns:
InputMetadata object
"""
# Extract completion parameters from observations
# Extract completion parameters from trace input first, then observations
completion_params = {}

# Look for model parameters in observations
for obs in observations:
if hasattr(obs, "model") and obs.model:
completion_params["model"] = obs.model
if hasattr(obs, "model_parameters") and obs.model_parameters:
params = obs.model_parameters
if "temperature" in params:
completion_params["temperature"] = params["temperature"]
if "max_tokens" in params:
completion_params["max_tokens"] = params["max_tokens"]
if "top_p" in params:
completion_params["top_p"] = params["top_p"]
break
# First check trace input for evaluation test completion_params
if hasattr(trace, "input") and trace.input:
if isinstance(trace.input, dict):
kwargs = trace.input.get("kwargs", {})
if "completion_params" in kwargs:
trace_completion_params = kwargs["completion_params"]
if trace_completion_params and isinstance(trace_completion_params, dict):
completion_params.update(trace_completion_params)

# Fallback: Look for model parameters in observations if not found in trace input
if not completion_params:
for obs in observations:
if hasattr(obs, "model") and obs.model:
completion_params["model"] = obs.model
if hasattr(obs, "model_parameters") and obs.model_parameters:
params = obs.model_parameters
if "temperature" in params:
completion_params["temperature"] = params["temperature"]
if "max_tokens" in params:
completion_params["max_tokens"] = params["max_tokens"]
if "top_p" in params:
completion_params["top_p"] = params["top_p"]
break

# Create dataset info from trace metadata
dataset_info = {
Expand Down
8 changes: 5 additions & 3 deletions eval_protocol/pytest/evaluation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,11 @@ def _log_eval_error(status: Status, rows: list[EvaluationRow] | None, passed: bo
passed=None,
)
for row in data:
row.input_metadata.completion_params = (
completion_params if completion_params is not None else {}
)
# Only set completion_params if they don't already exist
if not row.input_metadata.completion_params:
row.input_metadata.completion_params = (
completion_params if completion_params is not None else {}
)
# Add mode to session_data
if row.input_metadata.session_data is None:
row.input_metadata.session_data = {}
Expand Down
18 changes: 18 additions & 0 deletions tests/chinook/langfuse/generate_traces.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ async def test_complex_query_0(row: EvaluationRow) -> EvaluationRow:
"""
Complex queries - PydanticAI automatically creates rich Langfuse traces.
"""
if langfuse_client:
langfuse_client.update_current_trace(tags=["chinook_sql"])

return row


Expand Down Expand Up @@ -92,6 +95,9 @@ async def test_complex_query_1(row: EvaluationRow) -> EvaluationRow:
"""
Complex queries - PydanticAI automatically creates rich Langfuse traces.
"""
if langfuse_client:
langfuse_client.update_current_trace(tags=["chinook_sql"])

return row


Expand Down Expand Up @@ -121,6 +127,9 @@ async def test_complex_query_2(row: EvaluationRow) -> EvaluationRow:
"""
Complex queries - PydanticAI automatically creates rich Langfuse traces.
"""
if langfuse_client:
langfuse_client.update_current_trace(tags=["chinook_sql"])

return row


Expand Down Expand Up @@ -150,6 +159,9 @@ async def test_complex_query_3(row: EvaluationRow) -> EvaluationRow:
"""
Complex queries - PydanticAI automatically creates rich Langfuse traces.
"""
if langfuse_client:
langfuse_client.update_current_trace(tags=["chinook_sql"])

return row


Expand Down Expand Up @@ -179,6 +191,9 @@ async def test_complex_query_4(row: EvaluationRow) -> EvaluationRow:
"""
Complex queries - PydanticAI automatically creates rich Langfuse traces.
"""
if langfuse_client:
langfuse_client.update_current_trace(tags=["chinook_sql"])

return row


Expand Down Expand Up @@ -208,4 +223,7 @@ async def test_complex_query_5(row: EvaluationRow) -> EvaluationRow:
"""
Complex queries - PydanticAI automatically creates rich Langfuse traces.
"""
if langfuse_client:
langfuse_client.update_current_trace(tags=["chinook_sql"])

return row
6 changes: 4 additions & 2 deletions tests/chinook/langfuse/test_langfuse_chinook.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ class Response(BaseModel):
reason: str


def fetch_langfuse_traces_as_evaluation_rows(hours_back: int = 168) -> List[EvaluationRow]:
def fetch_langfuse_traces_as_evaluation_rows(
hours_back: int = 168, tags: List[str] = ["chinook_sql"]
) -> List[EvaluationRow]:
try:
from eval_protocol.adapters.langfuse import create_langfuse_adapter

Expand All @@ -59,7 +61,7 @@ def fetch_langfuse_traces_as_evaluation_rows(hours_back: int = 168) -> List[Eval
from_timestamp = now - timedelta(hours=hours_back)

return adapter.get_evaluation_rows(
limit=20, from_timestamp=from_timestamp, to_timestamp=now, include_tool_calls=True
limit=20, from_timestamp=from_timestamp, to_timestamp=now, include_tool_calls=True, tags=tags
)

except Exception as e:
Expand Down
Loading