From 19f8284957df8b48d02ff4d88e89c9d3af948d69 Mon Sep 17 00:00:00 2001 From: vismaytiwari Date: Sun, 21 Jun 2026 21:40:18 +0530 Subject: [PATCH] fix(langchain): mark handled tool errors as errors --- langfuse/langchain/CallbackHandler.py | 24 ++++++++++++++---- tests/unit/test_langchain.py | 35 ++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/langfuse/langchain/CallbackHandler.py b/langfuse/langchain/CallbackHandler.py index 2989ef216..2f6efe84f 100644 --- a/langfuse/langchain/CallbackHandler.py +++ b/langfuse/langchain/CallbackHandler.py @@ -1091,7 +1091,7 @@ def on_retriever_end( def on_tool_end( self, - output: str, + output: Any, *, run_id: UUID, parent_run_id: Optional[UUID] = None, @@ -1105,10 +1105,24 @@ def on_tool_end( if observation is not None: if parent_run_id is None: self._clear_root_run_resume_key(run_id) - observation.update( - output=output, - input=kwargs.get("inputs"), - ).end() + + update_kwargs: Dict[str, Any] = { + "output": output, + "input": kwargs.get("inputs"), + } + + if ( + isinstance(output, ToolMessage) + and getattr(output, "status", None) == "error" + ): + update_kwargs["level"] = "ERROR" + update_kwargs["status_message"] = ( + output.content + if isinstance(output.content, str) + else str(output.content) + ) + + observation.update(**update_kwargs).end() except Exception as e: langfuse_logger.exception(e) diff --git a/tests/unit/test_langchain.py b/tests/unit/test_langchain.py index fa7934ba7..8d3b3517b 100644 --- a/tests/unit/test_langchain.py +++ b/tests/unit/test_langchain.py @@ -5,7 +5,7 @@ import pytest from langchain.messages import HumanMessage -from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessage, ToolMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.outputs import ChatGeneration, ChatResult, Generation, LLMResult from langchain_core.prompts import ChatPromptTemplate @@ -791,6 +791,39 @@ class DummyControlFlowError(RuntimeError): assert not _has_run_state(handler, retriever_run_id) +def test_handled_tool_error_marks_observation_error( + langfuse_memory_client, get_span, json_attr +): + handler = CallbackHandler() + run_id = uuid4() + + handler.on_tool_start( + {"name": "failing_tool"}, + '{"query": "x"}', + run_id=run_id, + ) + handler.on_tool_end( + ToolMessage( + content="handled failure", + tool_call_id="call_1", + status="error", + ), + run_id=run_id, + ) + + langfuse_memory_client.flush() + span = get_span("failing_tool") + + assert span.attributes[LangfuseOtelSpanAttributes.OBSERVATION_LEVEL] == "ERROR" + assert ( + span.attributes[LangfuseOtelSpanAttributes.OBSERVATION_STATUS_MESSAGE] + == "handled failure" + ) + assert json_attr(span, LangfuseOtelSpanAttributes.OBSERVATION_OUTPUT)["status"] == ( + "error" + ) + + def test_pending_resume_contexts_are_capped(langfuse_memory_client, monkeypatch): class DummyControlFlowError(RuntimeError): pass