Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 70 additions & 141 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import contextvars
import itertools
import sys
import json
import warnings
from collections import OrderedDict
Expand All @@ -26,10 +24,8 @@
if TYPE_CHECKING:
from typing import (
Any,
AsyncIterator,
Callable,

Check warning on line 27 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[FBL-W62] Test assertions check wrong span data fields for tool execution span (additional location)

The test assertions were changed from checking `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` to checking `GEN_AI_REQUEST_MESSAGES` and `GEN_AI_RESPONSE_TEXT`. However, the langchain integration's `on_tool_start` and `on_tool_end` methods (lines 612-617 and 630-631 in langchain.py) set `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` on tool execution spans, not the message/text fields. The test will now pass regardless of whether tool input/output is properly hidden when PII is disabled, since it's checking for unrelated fields that were never set on tool execution spans.
Dict,
Iterator,
List,
Optional,
Union,

Check warning on line 31 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

Removed `Iterator` and `AsyncIterator` imports are still used in type comments

This change removes `Iterator` and `AsyncIterator` from the TYPE_CHECKING imports, but these types are still referenced in type comments at lines 1031 (`# type: () -> Iterator[Any]`) and 1050 (`# type: () -> AsyncIterator[Any]`). While this won't cause runtime errors (type comments are not evaluated at runtime), it will break type checking with tools like mypy, causing undefined name errors during static analysis.
Comment on lines 27 to 31

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed Iterator and AsyncIterator imports are still used in type comments

This change removes Iterator and AsyncIterator from the TYPE_CHECKING imports, but these types are still referenced in type comments at lines 1031 (# type: () -> Iterator[Any]) and 1050 (# type: () -> AsyncIterator[Any]). While this won't cause runtime errors (type comments are not evaluated at runtime), it will break type checking with tools like mypy, causing undefined name errors during static analysis.

Verification

Read the full langchain.py file and used grep to confirm Iterator is referenced at lines 1031 and 1050 as type comments. Verified these imports were in the TYPE_CHECKING block being modified by this diff.

Suggested fix: Keep the Iterator and AsyncIterator imports in the TYPE_CHECKING block since they are used in type comments later in the file.

Suggested change
Callable,
Dict,
Iterator,
List,
Optional,
Union,
AsyncIterator,
Iterator,

Identified by Warden code-review · KCC-CXG

Expand Down Expand Up @@ -153,44 +149,6 @@
return content


# Contextvar to track agent names in a stack for re-entrant agent support
_agent_stack: "contextvars.ContextVar[Optional[List[Optional[str]]]]" = (
contextvars.ContextVar("langchain_agent_stack", default=None)
)


def _push_agent(agent_name: "Optional[str]") -> None:
"""Push an agent name onto the stack."""
stack = _agent_stack.get()
if stack is None:
stack = []
else:
# Copy the list to maintain contextvar isolation across async contexts
stack = stack.copy()
stack.append(agent_name)
_agent_stack.set(stack)


def _pop_agent() -> "Optional[str]":
"""Pop an agent name from the stack and return it."""
stack = _agent_stack.get()
if stack:
# Copy the list to maintain contextvar isolation across async contexts
stack = stack.copy()
agent_name = stack.pop()
_agent_stack.set(stack)
return agent_name
return None


def _get_current_agent() -> "Optional[str]":
"""Get the current agent name (top of stack) without removing it."""
stack = _agent_stack.get()
if stack:
return stack[-1]
return None


def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]":
system_instructions = []

Expand Down Expand Up @@ -455,10 +413,6 @@
elif "openai" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")

agent_name = _get_current_agent()
if agent_name:
span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name)

for key, attribute in DATA_FIELDS.items():
if key in all_params and all_params[key] is not None:
set_data_normalized(span, attribute, all_params[key], unpack=False)
Expand Down Expand Up @@ -655,10 +609,6 @@
if tool_description is not None:
span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool_description)

agent_name = _get_current_agent()
if agent_name:
span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name)

if should_send_default_pii() and self.include_prompts:
set_data_normalized(
span,
Expand Down Expand Up @@ -985,50 +935,45 @@
name=f"invoke_agent {agent_name}" if agent_name else "invoke_agent",
origin=LangchainIntegration.origin,
) as span:
_push_agent(agent_name)
try:
if agent_name:
span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name)

span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent")
span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False)

_set_tools_on_span(span, tools)

# Run the agent
result = f(self, *args, **kwargs)

input = result.get("input")
if (
input is not None
and should_send_default_pii()
and integration.include_prompts
):
normalized_messages = normalize_message_roles([input])
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(
normalized_messages, span, scope
if agent_name:
span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name)

span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent")
span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, False)

_set_tools_on_span(span, tools)

# Run the agent
result = f(self, *args, **kwargs)

input = result.get("input")
if (
input is not None
and should_send_default_pii()
and integration.include_prompts
):
normalized_messages = normalize_message_roles([input])
scope = sentry_sdk.get_current_scope()
messages_data = truncate_and_annotate_messages(
normalized_messages, span, scope
)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)
if messages_data is not None:
set_data_normalized(
span,
SPANDATA.GEN_AI_REQUEST_MESSAGES,
messages_data,
unpack=False,
)

output = result.get("output")
if (
output is not None
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output)
output = result.get("output")
if (
output is not None
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output)

return result
finally:
# Ensure agent is popped even if an exception occurs
_pop_agent()
return result

return new_invoke

Expand All @@ -1045,13 +990,11 @@

span = start_span_function(
op=OP.GEN_AI_INVOKE_AGENT,
name=f"invoke_agent {agent_name}" if agent_name else "invoke_agent",
name=f"invoke_agent {agent_name}".strip(),

Check warning on line 993 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[FBL-W62] Test assertions check wrong span data fields for tool execution span (additional location)

The test assertions were changed from checking `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` to checking `GEN_AI_REQUEST_MESSAGES` and `GEN_AI_RESPONSE_TEXT`. However, the langchain integration's `on_tool_start` and `on_tool_end` methods (lines 612-617 and 630-631 in langchain.py) set `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` on tool execution spans, not the message/text fields. The test will now pass regardless of whether tool input/output is properly hidden when PII is disabled, since it's checking for unrelated fields that were never set on tool execution spans.
origin=LangchainIntegration.origin,
)
span.__enter__()

_push_agent(agent_name)

if agent_name:
span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent_name)

Expand Down Expand Up @@ -1081,60 +1024,46 @@

# Run the agent
result = f(self, *args, **kwargs)

old_iterator = result

def new_iterator() -> "Iterator[Any]":
exc_info: "tuple[Any, Any, Any]" = (None, None, None)
try:
for event in old_iterator:
yield event
def new_iterator():
# type: () -> Iterator[Any]
for event in old_iterator:
yield event

try:
output = event.get("output")
except Exception:
output = None

if (
output is not None
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output)
except Exception:
exc_info = sys.exc_info()
set_span_errored(span)
raise
finally:
# Ensure cleanup happens even if iterator is abandoned or fails
_pop_agent()
span.__exit__(*exc_info)

async def new_iterator_async() -> "AsyncIterator[Any]":
exc_info: "tuple[Any, Any, Any]" = (None, None, None)
try:
async for event in old_iterator:
yield event
output = event.get("output")
except Exception:
output = None

try:
output = event.get("output")
except Exception:
output = None

if (
output is not None
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output)
if (
output is not None
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output)

span.__exit__(None, None, None)

Check failure on line 1047 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

Iterator exception handling removed - span errors not captured during streaming

The revert removes exception handling from both `new_iterator()` and `new_iterator_async()`. Previously, exceptions were caught with `exc_info = sys.exc_info()`, the span was marked as errored via `set_span_errored(span)`, and the exception info was passed to `span.__exit__(*exc_info)`. Now, `span.__exit__(None, None, None)` is called unconditionally, meaning if an exception occurs during iteration, the span will be closed as successful rather than errored, and Sentry will not capture the error context.

Check failure on line 1047 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: find-bugs

Stream iterators don't handle exceptions, causing span leaks and missing error telemetry

The `new_iterator()` and `new_iterator_async()` functions in `_wrap_agent_executor_stream` no longer wrap iteration in try/except/finally. When an exception occurs during streaming, `span.__exit__()` is never called (span leak), and `set_span_errored(span)` is never invoked (errors not recorded). This causes incorrect telemetry and potential resource leaks.
Comment on lines 1027 to +1047

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Iterator exception handling removed - span errors not captured during streaming

The revert removes exception handling from both new_iterator() and new_iterator_async(). Previously, exceptions were caught with exc_info = sys.exc_info(), the span was marked as errored via set_span_errored(span), and the exception info was passed to span.__exit__(*exc_info). Now, span.__exit__(None, None, None) is called unconditionally, meaning if an exception occurs during iteration, the span will be closed as successful rather than errored, and Sentry will not capture the error context.

Verification

Read the current code at lines 1030-1047 (new_iterator) and 1049-1066 (new_iterator_async). Confirmed the diff removes try/except/finally blocks that previously called set_span_errored and passed exc_info to span.exit. The current code always calls span.exit(None, None, None).

Also found at 1 additional location
  • sentry_sdk/integrations/langchain.py:1049-1066

Identified by Warden code-review · FHU-PUV


async def new_iterator_async():
# type: () -> AsyncIterator[Any]
async for event in old_iterator:
yield event

try:
output = event.get("output")
except Exception:
exc_info = sys.exc_info()
set_span_errored(span)
raise
finally:
# Ensure cleanup happens even if iterator is abandoned or fails
_pop_agent()
span.__exit__(*exc_info)
output = None

if (
output is not None
and should_send_default_pii()
and integration.include_prompts
):
set_data_normalized(span, SPANDATA.GEN_AI_RESPONSE_TEXT, output)

span.__exit__(None, None, None)

Check failure on line 1066 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

[FHU-PUV] Iterator exception handling removed - span errors not captured during streaming (additional location)

The revert removes exception handling from both `new_iterator()` and `new_iterator_async()`. Previously, exceptions were caught with `exc_info = sys.exc_info()`, the span was marked as errored via `set_span_errored(span)`, and the exception info was passed to `span.__exit__(*exc_info)`. Now, `span.__exit__(None, None, None)` is called unconditionally, meaning if an exception occurs during iteration, the span will be closed as successful rather than errored, and Sentry will not capture the error context.

Check failure on line 1066 in sentry_sdk/integrations/langchain.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[QMM-EZC] Stream iterators don't handle exceptions, causing span leaks and missing error telemetry (additional location)

The `new_iterator()` and `new_iterator_async()` functions in `_wrap_agent_executor_stream` no longer wrap iteration in try/except/finally. When an exception occurs during streaming, `span.__exit__()` is never called (span leak), and `set_span_errored(span)` is never invoked (errors not recorded). This causes incorrect telemetry and potential resource leaks.

if str(type(result)) == "<class 'async_generator'>":
result = new_iterator_async()
Expand Down
28 changes: 16 additions & 12 deletions tests/integrations/langchain/test_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
return llm_type


@pytest.mark.xfail

Check warning on line 69 in tests/integrations/langchain/test_langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

Main integration test marked as xfail without explanation

The `test_langchain_agent` test is marked with `@pytest.mark.xfail`, meaning it is expected to fail and won't cause CI failures if it does. This test validates critical functionality including token usage recording, prompt capturing, tool calls, and PII handling. The PR description is empty, providing no explanation for why this test is being disabled or when it will be fixed. This reduces effective test coverage for the LangChain integration.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Main integration test marked as xfail without explanation

The test_langchain_agent test is marked with @pytest.mark.xfail, meaning it is expected to fail and won't cause CI failures if it does. This test validates critical functionality including token usage recording, prompt capturing, tool calls, and PII handling. The PR description is empty, providing no explanation for why this test is being disabled or when it will be fixed. This reduces effective test coverage for the LangChain integration.

Verification

Read tests/integrations/langchain/test_langchain.py to verify the test function scope (lines 69-318). The test covers 12 parameter combinations (4 send_default_pii/include_prompts x 3 system_instructions_content), validates token usage, response text, tool calls, finish reasons, and available tools. Confirmed this is a significant integration test being disabled.

Identified by Warden code-review · YW2-EYP

@pytest.mark.parametrize(
"send_default_pii, include_prompts, use_unknown_llm_type",
[
Expand Down Expand Up @@ -222,17 +223,20 @@
# We can't guarantee anything about the "shape" of the langchain execution graph
assert len(list(x for x in tx["spans"] if x["op"] == "gen_ai.chat")) > 0

# Token usage is only available in newer versions of langchain (v0.2+)
# where usage_metadata is supported on AIMessageChunk
if "gen_ai.usage.input_tokens" in chat_spans[0]["data"]:
assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192
assert "gen_ai.usage.input_tokens" in chat_spans[0]["data"]
assert "gen_ai.usage.output_tokens" in chat_spans[0]["data"]
assert "gen_ai.usage.total_tokens" in chat_spans[0]["data"]

if "gen_ai.usage.input_tokens" in chat_spans[1]["data"]:
assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
assert chat_spans[0]["data"]["gen_ai.usage.input_tokens"] == 142
assert chat_spans[0]["data"]["gen_ai.usage.output_tokens"] == 50
assert chat_spans[0]["data"]["gen_ai.usage.total_tokens"] == 192

assert "gen_ai.usage.input_tokens" in chat_spans[1]["data"]
assert "gen_ai.usage.output_tokens" in chat_spans[1]["data"]
assert "gen_ai.usage.total_tokens" in chat_spans[1]["data"]
assert chat_spans[1]["data"]["gen_ai.usage.input_tokens"] == 89
assert chat_spans[1]["data"]["gen_ai.usage.output_tokens"] == 28
assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117

if send_default_pii and include_prompts:
assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
Expand Down Expand Up @@ -280,8 +284,8 @@
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {})
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})
assert SPANDATA.GEN_AI_TOOL_OUTPUT not in tool_exec_span.get("data", {})
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in tool_exec_span.get("data", {})
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in tool_exec_span.get("data", {})

Check warning on line 288 in tests/integrations/langchain/test_langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

Test assertions no longer verify tool span PII protection

The assertions are changed from checking `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` to `GEN_AI_REQUEST_MESSAGES` and `GEN_AI_RESPONSE_TEXT`. However, the langchain integration's `on_tool_start` and `on_tool_end` methods only set `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` on tool execution spans (lines 615, 631 in langchain.py). The new assertions check for attributes that are never set on tool spans, so the test will always pass trivially without actually verifying that tool input/output is hidden when PII is disabled. This removes effective test coverage for PII protection on tool execution spans.

Check warning on line 288 in tests/integrations/langchain/test_langchain.py

View check run for this annotation

@sentry/warden / warden: find-bugs

Test assertions check wrong span data fields for tool execution span

The test assertions were changed from checking `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` to checking `GEN_AI_REQUEST_MESSAGES` and `GEN_AI_RESPONSE_TEXT`. However, the langchain integration's `on_tool_start` and `on_tool_end` methods (lines 612-617 and 630-631 in langchain.py) set `GEN_AI_TOOL_INPUT` and `GEN_AI_TOOL_OUTPUT` on tool execution spans, not the message/text fields. The test will now pass regardless of whether tool input/output is properly hidden when PII is disabled, since it's checking for unrelated fields that were never set on tool execution spans.
Comment on lines +287 to +288

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test assertions no longer verify tool span PII protection

The assertions are changed from checking GEN_AI_TOOL_INPUT and GEN_AI_TOOL_OUTPUT to GEN_AI_REQUEST_MESSAGES and GEN_AI_RESPONSE_TEXT. However, the langchain integration's on_tool_start and on_tool_end methods only set GEN_AI_TOOL_INPUT and GEN_AI_TOOL_OUTPUT on tool execution spans (lines 615, 631 in langchain.py). The new assertions check for attributes that are never set on tool spans, so the test will always pass trivially without actually verifying that tool input/output is hidden when PII is disabled. This removes effective test coverage for PII protection on tool execution spans.

Verification

Verified by reading langchain.py: on_tool_start sets SPANDATA.GEN_AI_TOOL_INPUT at line 615 and on_tool_end sets SPANDATA.GEN_AI_TOOL_OUTPUT at line 631. Neither method ever sets GEN_AI_REQUEST_MESSAGES or GEN_AI_RESPONSE_TEXT on tool spans. The test at lines 243-244 still expects GEN_AI_TOOL_INPUT and GEN_AI_TOOL_OUTPUT to be present when PII is enabled, confirming these are the correct attributes for tool spans.

Identified by Warden code-review · TAS-M8H


# Verify tool calls are NOT recorded when PII is disabled
assert SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS not in chat_spans[0].get(
Expand Down
Loading