From eebf58ed18e1f3bf6d4115423a907776f8d644ad Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 17:17:37 +0800 Subject: [PATCH 01/12] Add support for emitting inference events and enrich message types Change-Id: I8fd0b896fc103a986f78c7351ce627611e545a62 Co-developed-by: Cursor --- util/opentelemetry-util-genai/CHANGELOG.md | 1 + util/opentelemetry-util-genai/README.rst | 11 +- .../src/opentelemetry/util/genai/handler.py | 24 +- .../opentelemetry/util/genai/span_utils.py | 193 ++++++++++--- .../src/opentelemetry/util/genai/types.py | 54 +++- .../tests/test_utils.py | 260 +++++++++++++++++- 6 files changed, 486 insertions(+), 57 deletions(-) diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index ca2c2d0a0e..45a2d662de 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- Add support for emitting inference events and enrich message types. ([]()) - Minor change to check LRU cache in Completion Hook before acquiring semaphore/thread ([#3907](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3907)). - Add environment variable for genai upload hook queue size ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3943](#3943)) diff --git a/util/opentelemetry-util-genai/README.rst b/util/opentelemetry-util-genai/README.rst index a06b3a0fd0..c69530278a 100644 --- a/util/opentelemetry-util-genai/README.rst +++ b/util/opentelemetry-util-genai/README.rst @@ -9,7 +9,11 @@ while providing standardization for generating both types of otel, "spans and me This package relies on environment variables to configure capturing of message content. By default, message content will not be captured. Set the environment variable `OTEL_SEMCONV_STABILITY_OPT_IN` to `gen_ai_latest_experimental` to enable experimental features. -And set the environment variable `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to `SPAN_ONLY` or `SPAN_AND_EVENT` to capture message content in spans. +And set the environment variable `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` to one of: +- `NO_CONTENT`: Do not capture message content (default). +- `SPAN_ONLY`: Capture message content in spans only. +- `EVENT_ONLY`: Capture message content in events only. +- `SPAN_AND_EVENT`: Capture message content in both spans and events. This package provides these span attributes: @@ -23,6 +27,11 @@ This package provides these span attributes: - `gen_ai.usage.output_tokens`: Int(7) - `gen_ai.input.messages`: Str('[{"role": "Human", "parts": [{"content": "hello world", "type": "text"}]}]') - `gen_ai.output.messages`: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]') +- `gen_ai.system.instructions`: Str('[{"content": "You are a helpful assistant.", "type": "text"}]') (when system instruction is provided) + +When `EVENT_ONLY` or `SPAN_AND_EVENT` mode is enabled and a LoggerProvider is configured, +the package also emits `gen_ai.client.inference.operation.details` events with structured +message content (as dictionaries instead of JSON strings). Installation diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index bc2f2fa350..5271840246 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -66,6 +66,10 @@ from opentelemetry import context as otel_context from opentelemetry.metrics import MeterProvider, get_meter +from opentelemetry._logs import ( + LoggerProvider, + get_logger, +) from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -80,7 +84,8 @@ from opentelemetry.util.genai.metrics import InvocationMetricsRecorder from opentelemetry.util.genai.span_utils import ( _apply_error_attributes, - _apply_finish_attributes, + _apply_llm_finish_attributes, + _maybe_emit_llm_event, ) from opentelemetry.util.genai.types import Error, LLMInvocation from opentelemetry.util.genai.version import __version__ @@ -96,6 +101,7 @@ def __init__( self, tracer_provider: TracerProvider | None = None, meter_provider: MeterProvider | None = None, + logger_provider: LoggerProvider | None = None, ): self._tracer = get_tracer( __name__, @@ -106,6 +112,12 @@ def __init__( self._metrics_recorder: InvocationMetricsRecorder | None = None meter = get_meter(__name__, meter_provider=meter_provider) self._metrics_recorder = InvocationMetricsRecorder(meter) + self._logger = get_logger( + __name__, + __version__, + logger_provider, + schema_url=Schemas.V1_37_0.value, + ) def _record_llm_metrics( self, @@ -148,8 +160,9 @@ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disab return invocation span = invocation.span - _apply_finish_attributes(span, invocation) + _apply_llm_finish_attributes(span, invocation) self._record_llm_metrics(invocation, span) + _maybe_emit_llm_event(self._logger, invocation) # Detach context and end span otel_context.detach(invocation.context_token) span.end() @@ -164,10 +177,11 @@ def fail_llm( # pylint: disable=no-self-use return invocation span = invocation.span - _apply_finish_attributes(invocation.span, invocation) - _apply_error_attributes(span, error) + _apply_llm_finish_attributes(invocation.span, invocation) + _apply_error_attributes(invocation.span, error) error_type = getattr(error.type, "__qualname__", None) self._record_llm_metrics(invocation, span, error_type=error_type) + _maybe_emit_llm_event(self._logger, invocation, error) # Detach context and end span otel_context.detach(invocation.context_token) span.end() @@ -201,6 +215,7 @@ def llm( def get_telemetry_handler( tracer_provider: TracerProvider | None = None, meter_provider: MeterProvider | None = None, + logger_provider: LoggerProvider | None = None, ) -> TelemetryHandler: """ Returns a singleton TelemetryHandler instance. @@ -212,6 +227,7 @@ def get_telemetry_handler( handler = TelemetryHandler( tracer_provider=tracer_provider, meter_provider=meter_provider, + logger_provider=logger_provider ) setattr(get_telemetry_handler, "_default_handler", handler) return handler diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index b9b8777ec2..03c0d5349b 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -17,6 +17,7 @@ from dataclasses import asdict from typing import Any +from opentelemetry._logs import Logger, LogRecord from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -31,6 +32,7 @@ Error, InputMessage, LLMInvocation, + MessagePart, OutputMessage, ) from opentelemetry.util.genai.utils import ( @@ -41,63 +43,159 @@ ) -def _apply_common_span_attributes( - span: Span, invocation: LLMInvocation -) -> None: - """Apply attributes shared by finish() and error() and compute metrics. +def _get_llm_common_attributes( + invocation: LLMInvocation, +) -> dict[str, Any]: + """Get common LLM attributes shared by finish() and error() paths. - Returns (genai_attributes) for use with metrics. + Returns a dictionary of attributes. """ - span.update_name( - f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}".strip() - ) - span.set_attribute( - GenAI.GEN_AI_OPERATION_NAME, GenAI.GenAiOperationNameValues.CHAT.value + attributes: dict[str, Any] = {} + attributes[GenAI.GEN_AI_OPERATION_NAME] = ( + GenAI.GenAiOperationNameValues.CHAT.value ) if invocation.request_model: - span.set_attribute( - GenAI.GEN_AI_REQUEST_MODEL, invocation.request_model - ) + attributes[GenAI.GEN_AI_REQUEST_MODEL] = invocation.request_model if invocation.provider is not None: # TODO: clean provider name to match GenAiProviderNameValues? - span.set_attribute(GenAI.GEN_AI_PROVIDER_NAME, invocation.provider) + attributes[GenAI.GEN_AI_PROVIDER_NAME] = invocation.provider + return attributes + - _apply_response_attributes(span, invocation) +def _get_llm_span_name(invocation: LLMInvocation) -> str: + """Get the span name for an LLM invocation.""" + return f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}".strip() -def _maybe_set_span_messages( - span: Span, +def _get_llm_messages_attributes_for_span( input_messages: list[InputMessage], output_messages: list[OutputMessage], -) -> None: + system_instruction: list[MessagePart] | None = None, +) -> dict[str, Any]: + """Get message attributes formatted for span (JSON string format). + + Returns empty dict if not in experimental mode or content capturing is disabled. + """ + attributes: dict[str, Any] = {} if not is_experimental_mode() or get_content_capturing_mode() not in ( ContentCapturingMode.SPAN_ONLY, ContentCapturingMode.SPAN_AND_EVENT, ): - return + return attributes if input_messages: - span.set_attribute( - GenAI.GEN_AI_INPUT_MESSAGES, - gen_ai_json_dumps([asdict(message) for message in input_messages]), + attributes[GenAI.GEN_AI_INPUT_MESSAGES] = gen_ai_json_dumps( + [asdict(message) for message in input_messages] ) if output_messages: - span.set_attribute( - GenAI.GEN_AI_OUTPUT_MESSAGES, - gen_ai_json_dumps( - [asdict(message) for message in output_messages] - ), + attributes[GenAI.GEN_AI_OUTPUT_MESSAGES] = gen_ai_json_dumps( + [asdict(message) for message in output_messages] + ) + if system_instruction: + attributes[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = gen_ai_json_dumps( + [asdict(part) for part in system_instruction] ) + return attributes -def _apply_finish_attributes(span: Span, invocation: LLMInvocation) -> None: +def _get_llm_messages_attributes_for_event( + input_messages: list[InputMessage], + output_messages: list[OutputMessage], + system_instruction: list[MessagePart] | None = None, +) -> dict[str, Any]: + """Get message attributes formatted for event (structured format). + + Returns empty dict if not in experimental mode or content capturing is disabled. + """ + attributes: dict[str, Any] = {} + if not is_experimental_mode() or get_content_capturing_mode() not in ( + ContentCapturingMode.EVENT_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ): + return attributes + if input_messages: + attributes[GenAI.GEN_AI_INPUT_MESSAGES] = [ + asdict(message) for message in input_messages + ] + if output_messages: + attributes[GenAI.GEN_AI_OUTPUT_MESSAGES] = [ + asdict(message) for message in output_messages + ] + if system_instruction: + attributes[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = [ + asdict(part) for part in system_instruction + ] + return attributes + + +def _maybe_emit_llm_event( + logger: Logger | None, + invocation: LLMInvocation, + error: Error | None = None, +) -> None: + """Emit a gen_ai.client.inference.operation.details event to the logger. + + This function creates a LogRecord event following the semantic convention + for gen_ai.client.inference.operation.details as specified in the GenAI + event semantic conventions. + """ + if not is_experimental_mode() or get_content_capturing_mode() not in ( + ContentCapturingMode.EVENT_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ): + return + + if logger is None: + return + + # Build event attributes by reusing the attribute getter functions + attributes: dict[str, Any] = {} + attributes.update(_get_llm_common_attributes(invocation)) + attributes.update(_get_llm_request_attributes(invocation)) + attributes.update(_get_llm_response_attributes(invocation)) + attributes.update( + _get_llm_messages_attributes_for_event( + invocation.input_messages, + invocation.output_messages, + invocation.system_instruction, + ) + ) + + # Add error.type if operation ended in error + if error is not None: + attributes[ErrorAttributes.ERROR_TYPE] = error.type.__qualname__ + + # Create and emit the event + event = LogRecord( + event_name="gen_ai.client.inference.operation.details", + attributes=attributes, + ) + logger.emit(event) + + +def _apply_llm_finish_attributes( + span: Span, invocation: LLMInvocation +) -> None: """Apply attributes/messages common to finish() paths.""" - _apply_common_span_attributes(span, invocation) - _maybe_set_span_messages( - span, invocation.input_messages, invocation.output_messages + # Update span name + span.update_name(_get_llm_span_name(invocation)) + + # Build all attributes by reusing the attribute getter functions + attributes: dict[str, Any] = {} + attributes.update(_get_llm_common_attributes(invocation)) + attributes.update(_get_llm_request_attributes(invocation)) + attributes.update(_get_llm_response_attributes(invocation)) + attributes.update( + _get_llm_messages_attributes_for_span( + invocation.input_messages, + invocation.output_messages, + invocation.system_instruction, + ) ) - _apply_request_attributes(span, invocation) - _apply_response_attributes(span, invocation) - span.set_attributes(invocation.attributes) + attributes.update(invocation.attributes) + + # Set all attributes on the span + if attributes: + span.set_attributes(attributes) def _apply_error_attributes(span: Span, error: Error) -> None: @@ -107,8 +205,10 @@ def _apply_error_attributes(span: Span, error: Error) -> None: span.set_attribute(ErrorAttributes.ERROR_TYPE, error.type.__qualname__) -def _apply_request_attributes(span: Span, invocation: LLMInvocation) -> None: - """Attach GenAI request semantic convention attributes to the span.""" +def _get_llm_request_attributes( + invocation: LLMInvocation, +) -> dict[str, Any]: + """Get GenAI request semantic convention attributes.""" attributes: dict[str, Any] = {} if invocation.temperature is not None: attributes[GenAI.GEN_AI_REQUEST_TEMPERATURE] = invocation.temperature @@ -130,12 +230,13 @@ def _apply_request_attributes(span: Span, invocation: LLMInvocation) -> None: ) if invocation.seed is not None: attributes[GenAI.GEN_AI_REQUEST_SEED] = invocation.seed - if attributes: - span.set_attributes(attributes) + return attributes -def _apply_response_attributes(span: Span, invocation: LLMInvocation) -> None: - """Attach GenAI response semantic convention attributes to the span.""" +def _get_llm_response_attributes( + invocation: LLMInvocation, +) -> dict[str, Any]: + """Get GenAI response semantic convention attributes.""" attributes: dict[str, Any] = {} finish_reasons: list[str] | None @@ -169,13 +270,15 @@ def _apply_response_attributes(span: Span, invocation: LLMInvocation) -> None: if invocation.output_tokens is not None: attributes[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS] = invocation.output_tokens - if attributes: - span.set_attributes(attributes) + return attributes __all__ = [ - "_apply_finish_attributes", + "_apply_llm_finish_attributes", "_apply_error_attributes", - "_apply_request_attributes", - "_apply_response_attributes", + "_get_llm_common_attributes", + "_get_llm_request_attributes", + "_get_llm_response_attributes", + "_get_llm_span_name", + "_maybe_emit_llm_event", ] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 4fbb059e73..203abb1c5d 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -53,18 +53,53 @@ class ToolCallResponse: type: Literal["tool_call_response"] = "tool_call_response" -FinishReason = Literal[ - "content_filter", "error", "length", "stop", "tool_calls" -] - - @dataclass() class Text: content: str type: Literal["text"] = "text" -MessagePart = Union[Text, ToolCall, ToolCallResponse, Any] +@dataclass() +class Reasoning: + content: str + type: Literal["reasoning"] = "reasoning" + + +Modality = Literal["image", "video", "audio"] + + +@dataclass() +class Blob: + mime_type: str | None + modality: Union[Modality, str] + content: bytes + type: Literal["blob"] = "blob" + + +@dataclass() +class File: + mime_type: str | None + modality: Union[Modality, str] + file_id: str + type: Literal["file"] = "file" + + +@dataclass() +class Uri: + mime_type: str | None + modality: Union[Modality, str] + uri: str + type: Literal["uri"] = "uri" + + +MessagePart = Union[ + Text, ToolCall, ToolCallResponse, Blob, File, Uri, Reasoning, Any +] + + +FinishReason = Literal[ + "content_filter", "error", "length", "stop", "tool_calls" +] @dataclass() @@ -88,6 +123,10 @@ def _new_output_messages() -> list[OutputMessage]: return [] +def _new_system_instruction() -> list[MessagePart]: + return [] + + def _new_str_any_dict() -> dict[str, Any]: return {} @@ -109,6 +148,9 @@ class LLMInvocation: output_messages: list[OutputMessage] = field( default_factory=_new_output_messages ) + system_instruction: list[MessagePart] = field( + default_factory=_new_system_instruction + ) provider: str | None = None response_model_name: str | None = None response_id: str | None = None diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index aecb16c541..4e65c3b618 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -28,6 +28,23 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) + +# Backward compatibility for InMemoryLogExporter -> InMemoryLogRecordExporter rename +# Changed in opentelemetry-sdk@0.60b0 +try: + from opentelemetry.sdk._logs.export import ( # pylint: disable=no-name-in-module + InMemoryLogRecordExporter, + SimpleLogRecordProcessor, + ) +except ImportError: + # Fallback to old name for compatibility with older SDK versions + from opentelemetry.sdk._logs.export import ( + InMemoryLogExporter as InMemoryLogRecordExporter, + ) + from opentelemetry.sdk._logs.export import ( + SimpleLogRecordProcessor, + ) +from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -42,8 +59,10 @@ from opentelemetry.util.genai.handler import get_telemetry_handler from opentelemetry.util.genai.types import ( ContentCapturingMode, + Error, InputMessage, LLMInvocation, + MessagePart, OutputMessage, Text, ) @@ -84,6 +103,12 @@ def _create_output_message( ) +def _create_system_instruction( + content: str = "You are a helpful assistant.", +) -> list[MessagePart]: + return [Text(content=content)] + + def _get_single_span(span_exporter: InMemorySpanExporter) -> ReadableSpan: spans = span_exporter.get_finished_spans() assert len(spans) == 1 @@ -183,13 +208,19 @@ def setUp(self): tracer_provider.add_span_processor( SimpleSpanProcessor(self.span_exporter) ) + self.log_exporter = InMemoryLogRecordExporter() + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + SimpleLogRecordProcessor(self.log_exporter) + ) self.telemetry_handler = get_telemetry_handler( - tracer_provider=tracer_provider + tracer_provider=tracer_provider, logger_provider=logger_provider ) def tearDown(self): # Clear spans and reset the singleton telemetry handler so each test starts clean self.span_exporter.clear() + self.log_exporter.clear() if hasattr(get_telemetry_handler, "_default_handler"): delattr(get_telemetry_handler, "_default_handler") @@ -200,11 +231,13 @@ def tearDown(self): def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use message = _create_input_message("hello world") chat_generation = _create_output_message("hello back") + system_instruction = _create_system_instruction() with self.telemetry_handler.llm() as invocation: for attr, value in { "request_model": "test-model", "input_messages": [message], + "system_instruction": system_instruction, "provider": "test-provider", "attributes": {"custom_attr": "value"}, "temperature": 0.5, @@ -256,6 +289,15 @@ def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use self.assertEqual(invocation.attributes.get("custom_attr"), "value") self.assertEqual(invocation.attributes.get("extra"), "info") + # Verify system instruction is present in span as JSON string + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, span_attrs) + span_system = json.loads(span_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS]) + self.assertIsInstance(span_system, list) + self.assertEqual( + span_system[0]["content"], "You are a helpful assistant." + ) + self.assertEqual(span_system[0]["type"], "text") + @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_ONLY", @@ -467,3 +509,219 @@ class BoomError(RuntimeError): GenAI.GEN_AI_USAGE_OUTPUT_TOKENS: 22, }, ) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + ) + def test_emits_llm_event(self): + message = _create_input_message("test query") + chat_generation = _create_output_message("test response") + system_instruction = _create_system_instruction() + + invocation = LLMInvocation( + request_model="event-model", + input_messages=[message], + system_instruction=system_instruction, + provider="test-provider", + temperature=0.7, + max_tokens=100, + response_model_name="response-model", + response_id="event-response-id", + input_tokens=10, + output_tokens=20, + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check that event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_data = logs[0] + log_record = log_data.log_record + + # Verify event name + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + + # Verify event attributes + attrs = log_record.attributes + self.assertIsNotNone(attrs) + self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "event-model") + self.assertEqual(attrs[GenAI.GEN_AI_PROVIDER_NAME], "test-provider") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_TEMPERATURE], 0.7) + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MAX_TOKENS], 100) + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_MODEL], "response-model") + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_ID], "event-response-id") + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_INPUT_TOKENS], 10) + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS], 20) + + # Verify messages are in structured format (not JSON string) + # OpenTelemetry may convert lists to tuples, so we normalize + input_messages = attrs[GenAI.GEN_AI_INPUT_MESSAGES] + input_messages_list = ( + list(input_messages) + if isinstance(input_messages, tuple) + else input_messages + ) + self.assertEqual(len(input_messages_list), 1) + input_msg = ( + dict(input_messages_list[0]) + if isinstance(input_messages_list[0], tuple) + else input_messages_list[0] + ) + self.assertEqual(input_msg["role"], "Human") + parts = ( + list(input_msg["parts"]) + if isinstance(input_msg["parts"], tuple) + else input_msg["parts"] + ) + self.assertEqual(parts[0]["content"], "test query") + + output_messages = attrs[GenAI.GEN_AI_OUTPUT_MESSAGES] + output_messages_list = ( + list(output_messages) + if isinstance(output_messages, tuple) + else output_messages + ) + self.assertEqual(len(output_messages_list), 1) + output_msg = ( + dict(output_messages_list[0]) + if isinstance(output_messages_list[0], tuple) + else output_messages_list[0] + ) + self.assertEqual(output_msg["role"], "AI") + output_parts = ( + list(output_msg["parts"]) + if isinstance(output_msg["parts"], tuple) + else output_msg["parts"] + ) + self.assertEqual(output_parts[0]["content"], "test response") + self.assertEqual(output_msg["finish_reason"], "stop") + + # Verify system instruction is present in event in structured format + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, attrs) + system_instructions = attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] + system_instructions_list = ( + list(system_instructions) + if isinstance(system_instructions, tuple) + else system_instructions + ) + self.assertEqual(len(system_instructions_list), 1) + sys_instr = ( + dict(system_instructions_list[0]) + if isinstance(system_instructions_list[0], tuple) + else system_instructions_list[0] + ) + self.assertEqual(sys_instr["content"], "You are a helpful assistant.") + self.assertEqual(sys_instr["type"], "text") + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_AND_EVENT", + ) + def test_emits_llm_event_and_span(self): + message = _create_input_message("combined test") + chat_generation = _create_output_message("combined response") + system_instruction = _create_system_instruction("System prompt here") + + invocation = LLMInvocation( + request_model="combined-model", + input_messages=[message], + system_instruction=system_instruction, + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check span was created + span = _get_single_span(self.span_exporter) + span_attrs = _get_span_attributes(span) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, span_attrs) + + # Check event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, log_record.attributes) + # Verify system instruction in both span and event + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, span_attrs) + span_system = json.loads(span_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS]) + self.assertEqual(span_system[0]["content"], "System prompt here") + event_attrs = log_record.attributes + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, event_attrs) + event_system = event_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] + event_system_list = ( + list(event_system) + if isinstance(event_system, tuple) + else event_system + ) + event_sys_instr = ( + dict(event_system_list[0]) + if isinstance(event_system_list[0], tuple) + else event_system_list[0] + ) + self.assertEqual(event_sys_instr["content"], "System prompt here") + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + ) + def test_emits_llm_event_with_error(self): + class TestError(RuntimeError): + pass + + message = _create_input_message("error test") + invocation = LLMInvocation( + request_model="error-model", + input_messages=[message], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + error = Error(message="Test error occurred", type=TestError) + self.telemetry_handler.fail_llm(invocation, error) + + # Check event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + attrs = log_record.attributes + + # Verify error attribute is present + self.assertEqual( + attrs[ErrorAttributes.ERROR_TYPE], TestError.__qualname__ + ) + self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "error-model") + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="NO_CONTENT", + ) + def test_does_not_emit_llm_event_when_no_content(self): + message = _create_input_message("no content test") + chat_generation = _create_output_message("no content response") + + invocation = LLMInvocation( + request_model="no-content-model", + input_messages=[message], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check no event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 0) From cd4b29d2a1cf55399a75e523e2e0c83999beb051 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 17:23:35 +0800 Subject: [PATCH 02/12] Add change log Change-Id: I5c4c93613e3e1084245b7298955a08cbc7c9708d Co-developed-by: Cursor --- util/opentelemetry-util-genai/CHANGELOG.md | 2 +- util/opentelemetry-util-genai/tests/test_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 45a2d662de..0ad42baf5e 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased -- Add support for emitting inference events and enrich message types. ([]()) +- Add support for emitting inference events and enrich message types. ([#3994](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3994)) - Minor change to check LRU cache in Completion Hook before acquiring semaphore/thread ([#3907](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3907)). - Add environment variable for genai upload hook queue size ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3943](#3943)) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index 4e65c3b618..1aa3a4ba57 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -32,7 +32,7 @@ # Backward compatibility for InMemoryLogExporter -> InMemoryLogRecordExporter rename # Changed in opentelemetry-sdk@0.60b0 try: - from opentelemetry.sdk._logs.export import ( # pylint: disable=no-name-in-module + from opentelemetry.sdk._logs.export import (https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3994 # pylint: disable=no-name-in-module InMemoryLogRecordExporter, SimpleLogRecordProcessor, ) From b34297838a80be0cbfb39e3777b1e6d5a3c7e6a9 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 17:25:55 +0800 Subject: [PATCH 03/12] Fix unit tests Change-Id: If34cfce0e7eb130db6a1e8e30a5f4be7c215285f Co-developed-by: Cursor --- util/opentelemetry-util-genai/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index 1aa3a4ba57..4e65c3b618 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -32,7 +32,7 @@ # Backward compatibility for InMemoryLogExporter -> InMemoryLogRecordExporter rename # Changed in opentelemetry-sdk@0.60b0 try: - from opentelemetry.sdk._logs.export import (https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3994 # pylint: disable=no-name-in-module + from opentelemetry.sdk._logs.export import ( # pylint: disable=no-name-in-module InMemoryLogRecordExporter, SimpleLogRecordProcessor, ) From 7f7b3cf0305c92efba19b1e02b366d98dea777ff Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 18:00:33 +0800 Subject: [PATCH 04/12] Fix linting failure Change-Id: I847f75259e01729db88129a44b241afb0ea2aca4 Co-developed-by: Cursor --- .../tests/test_utils.py | 78 ++++++------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index 4e65c3b618..d7104d4311 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -165,6 +165,16 @@ def _assert_text_message( assert message.get("finish_reason") == finish_reason +def _normalize_to_list(value: Any) -> list[Any]: + """Normalize tuple or list to list for OpenTelemetry compatibility.""" + return list(value) if isinstance(value, tuple) else value + + +def _normalize_to_dict(value: Any) -> dict[str, Any]: + """Normalize tuple or dict to dict for OpenTelemetry compatibility.""" + return dict(value) if isinstance(value, tuple) else value + + class TestVersion(unittest.TestCase): @patch_env_vars( stability_mode="gen_ai_latest_experimental", @@ -515,14 +525,10 @@ class BoomError(RuntimeError): content_capturing="EVENT_ONLY", ) def test_emits_llm_event(self): - message = _create_input_message("test query") - chat_generation = _create_output_message("test response") - system_instruction = _create_system_instruction() - invocation = LLMInvocation( request_model="event-model", - input_messages=[message], - system_instruction=system_instruction, + input_messages=[_create_input_message("test query")], + system_instruction=_create_system_instruction(), provider="test-provider", temperature=0.7, max_tokens=100, @@ -533,14 +539,13 @@ def test_emits_llm_event(self): ) self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [chat_generation] + invocation.output_messages = [_create_output_message("test response")] self.telemetry_handler.stop_llm(invocation) # Check that event was emitted logs = self.log_exporter.get_finished_logs() self.assertEqual(len(logs), 1) - log_data = logs[0] - log_record = log_data.log_record + log_record = logs[0].log_record # Verify event name self.assertEqual( @@ -562,60 +567,27 @@ def test_emits_llm_event(self): # Verify messages are in structured format (not JSON string) # OpenTelemetry may convert lists to tuples, so we normalize - input_messages = attrs[GenAI.GEN_AI_INPUT_MESSAGES] - input_messages_list = ( - list(input_messages) - if isinstance(input_messages, tuple) - else input_messages - ) - self.assertEqual(len(input_messages_list), 1) - input_msg = ( - dict(input_messages_list[0]) - if isinstance(input_messages_list[0], tuple) - else input_messages_list[0] + input_msg = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_INPUT_MESSAGES])[0] ) self.assertEqual(input_msg["role"], "Human") - parts = ( - list(input_msg["parts"]) - if isinstance(input_msg["parts"], tuple) - else input_msg["parts"] + self.assertEqual( + _normalize_to_list(input_msg["parts"])[0]["content"], "test query" ) - self.assertEqual(parts[0]["content"], "test query") - output_messages = attrs[GenAI.GEN_AI_OUTPUT_MESSAGES] - output_messages_list = ( - list(output_messages) - if isinstance(output_messages, tuple) - else output_messages - ) - self.assertEqual(len(output_messages_list), 1) - output_msg = ( - dict(output_messages_list[0]) - if isinstance(output_messages_list[0], tuple) - else output_messages_list[0] + output_msg = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_OUTPUT_MESSAGES])[0] ) self.assertEqual(output_msg["role"], "AI") - output_parts = ( - list(output_msg["parts"]) - if isinstance(output_msg["parts"], tuple) - else output_msg["parts"] + self.assertEqual( + _normalize_to_list(output_msg["parts"])[0]["content"], + "test response", ) - self.assertEqual(output_parts[0]["content"], "test response") self.assertEqual(output_msg["finish_reason"], "stop") # Verify system instruction is present in event in structured format - self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, attrs) - system_instructions = attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] - system_instructions_list = ( - list(system_instructions) - if isinstance(system_instructions, tuple) - else system_instructions - ) - self.assertEqual(len(system_instructions_list), 1) - sys_instr = ( - dict(system_instructions_list[0]) - if isinstance(system_instructions_list[0], tuple) - else system_instructions_list[0] + sys_instr = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS])[0] ) self.assertEqual(sys_instr["content"], "You are a helpful assistant.") self.assertEqual(sys_instr["type"], "text") From 1d312c52fd46933e69692ac6e67ef910579b3ae2 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Wed, 3 Dec 2025 18:18:45 +0800 Subject: [PATCH 05/12] Fix readme Change-Id: I818a042d275d3c8e3348647d73e34560e7d92f54 Co-developed-by: Cursor --- util/opentelemetry-util-genai/README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/opentelemetry-util-genai/README.rst b/util/opentelemetry-util-genai/README.rst index c69530278a..50c869c517 100644 --- a/util/opentelemetry-util-genai/README.rst +++ b/util/opentelemetry-util-genai/README.rst @@ -27,7 +27,7 @@ This package provides these span attributes: - `gen_ai.usage.output_tokens`: Int(7) - `gen_ai.input.messages`: Str('[{"role": "Human", "parts": [{"content": "hello world", "type": "text"}]}]') - `gen_ai.output.messages`: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]') -- `gen_ai.system.instructions`: Str('[{"content": "You are a helpful assistant.", "type": "text"}]') (when system instruction is provided) +- `gen_ai.system_instructions`: Str('[{"content": "You are a helpful assistant.", "type": "text"}]') (when system instruction is provided) When `EVENT_ONLY` or `SPAN_AND_EVENT` mode is enabled and a LoggerProvider is configured, the package also emits `gen_ai.client.inference.operation.details` events with structured From d32ec3e5d16db8eb16dc494cd0e4d189e7466dc6 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Thu, 4 Dec 2025 13:55:36 +0800 Subject: [PATCH 06/12] Format codes Change-Id: I40b8e01bbe4fa9c182e99085a7c71d4536042247 Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index 5271840246..a49535a301 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -65,11 +65,11 @@ from typing import Iterator from opentelemetry import context as otel_context -from opentelemetry.metrics import MeterProvider, get_meter from opentelemetry._logs import ( LoggerProvider, get_logger, ) +from opentelemetry.metrics import MeterProvider, get_meter from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -227,7 +227,7 @@ def get_telemetry_handler( handler = TelemetryHandler( tracer_provider=tracer_provider, meter_provider=meter_provider, - logger_provider=logger_provider + logger_provider=logger_provider, ) setattr(get_telemetry_handler, "_default_handler", handler) return handler From e0d35b29cb9e0e29474cc2a63a02c0327be6387c Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Thu, 4 Dec 2025 14:45:07 +0800 Subject: [PATCH 07/12] Fix missing trace context in events Change-Id: Ie07c495002143fb2f0cf88033206290eb85386ad Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/handler.py | 4 ++-- .../opentelemetry/util/genai/span_utils.py | 5 +++++ .../tests/test_utils.py | 22 +++++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index a49535a301..0725fe0900 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -162,7 +162,7 @@ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disab span = invocation.span _apply_llm_finish_attributes(span, invocation) self._record_llm_metrics(invocation, span) - _maybe_emit_llm_event(self._logger, invocation) + _maybe_emit_llm_event(self._logger, span, invocation) # Detach context and end span otel_context.detach(invocation.context_token) span.end() @@ -181,7 +181,7 @@ def fail_llm( # pylint: disable=no-self-use _apply_error_attributes(invocation.span, error) error_type = getattr(error.type, "__qualname__", None) self._record_llm_metrics(invocation, span, error_type=error_type) - _maybe_emit_llm_event(self._logger, invocation, error) + _maybe_emit_llm_event(self._logger, span, invocation, error) # Detach context and end span otel_context.detach(invocation.context_token) span.end() diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 03c0d5349b..02e8c3bb28 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -18,6 +18,7 @@ from typing import Any from opentelemetry._logs import Logger, LogRecord +from opentelemetry.context import get_current from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -27,6 +28,7 @@ from opentelemetry.trace import ( Span, ) +from opentelemetry.trace.propagation import set_span_in_context from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util.genai.types import ( Error, @@ -129,6 +131,7 @@ def _get_llm_messages_attributes_for_event( def _maybe_emit_llm_event( logger: Logger | None, + span: Span, invocation: LLMInvocation, error: Error | None = None, ) -> None: @@ -165,9 +168,11 @@ def _maybe_emit_llm_event( attributes[ErrorAttributes.ERROR_TYPE] = error.type.__qualname__ # Create and emit the event + context = set_span_in_context(span, get_current()) event = LogRecord( event_name="gen_ai.client.inference.operation.details", attributes=attributes, + context=context, ) logger.emit(event) diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index d7104d4311..a6767ecb01 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -592,6 +592,14 @@ def test_emits_llm_event(self): self.assertEqual(sys_instr["content"], "You are a helpful assistant.") self.assertEqual(sys_instr["type"], "text") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) + @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_AND_EVENT", @@ -643,6 +651,13 @@ def test_emits_llm_event_and_span(self): else event_system_list[0] ) self.assertEqual(event_sys_instr["content"], "System prompt here") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) @patch_env_vars( stability_mode="gen_ai_latest_experimental", @@ -675,6 +690,13 @@ class TestError(RuntimeError): ) self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "error-model") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) @patch_env_vars( stability_mode="gen_ai_latest_experimental", From c2d623297300f355460e36d20e4d63af9b2dde11 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Thu, 11 Dec 2025 12:59:55 +0800 Subject: [PATCH 08/12] feedback Change-Id: Ida0c2305d950d978c31eb04a80e21e947fabdfba Co-developed-by: Cursor --- .../util/genai/environment_variables.py | 9 ++ .../opentelemetry/util/genai/span_utils.py | 12 +- .../src/opentelemetry/util/genai/types.py | 42 +++++ .../src/opentelemetry/util/genai/utils.py | 23 +++ .../tests/test_utils.py | 152 +++++++++++++++--- 5 files changed, 205 insertions(+), 33 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/environment_variables.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/environment_variables.py index 4a8dde216f..a1f5848372 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/environment_variables.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/environment_variables.py @@ -16,6 +16,15 @@ "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT" ) +OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT = "OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT" +""" +.. envvar:: OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT + +Controls whether to emit gen_ai.client.inference.operation.details events. +Must be one of ``true`` or ``false`` (case-insensitive). +Defaults to ``false``. +""" + OTEL_INSTRUMENTATION_GENAI_COMPLETION_HOOK = ( "OTEL_INSTRUMENTATION_GENAI_COMPLETION_HOOK" ) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 02e8c3bb28..9e2c35ebff 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -42,6 +42,7 @@ gen_ai_json_dumps, get_content_capturing_mode, is_experimental_mode, + should_emit_event, ) @@ -140,14 +141,11 @@ def _maybe_emit_llm_event( This function creates a LogRecord event following the semantic convention for gen_ai.client.inference.operation.details as specified in the GenAI event semantic conventions. - """ - if not is_experimental_mode() or get_content_capturing_mode() not in ( - ContentCapturingMode.EVENT_ONLY, - ContentCapturingMode.SPAN_AND_EVENT, - ): - return - if logger is None: + For more details, see the semantic convention documentation: + https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-events.md#event-eventgen_aiclientinferenceoperationdetails + """ + if not is_experimental_mode() or not should_emit_event() or logger is None: return # Build event attributes by reusing the attribute getter functions diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 203abb1c5d..2e8e5e85b8 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -40,6 +40,12 @@ class ContentCapturingMode(Enum): @dataclass() class ToolCall: + """Represents a tool call requested by the model + + This model is specified as part of semconv in `GenAI messages Python models - ToolCallRequestPart + `__. + """ + arguments: Any name: str id: str | None @@ -48,6 +54,12 @@ class ToolCall: @dataclass() class ToolCallResponse: + """Represents a tool call result sent to the model or a built-in tool call outcome and details + + This model is specified as part of semconv in `GenAI messages Python models - ToolCallResponsePart + `__. + """ + response: Any id: str | None type: Literal["tool_call_response"] = "tool_call_response" @@ -55,12 +67,24 @@ class ToolCallResponse: @dataclass() class Text: + """Represents text content sent to or received from the model + + This model is specified as part of semconv in `GenAI messages Python models - TextPart + `__. + """ + content: str type: Literal["text"] = "text" @dataclass() class Reasoning: + """Represents reasoning/thinking content received from the model + + This model is specified as part of semconv in `GenAI messages Python models - ReasoningPart + `__. + """ + content: str type: Literal["reasoning"] = "reasoning" @@ -70,6 +94,12 @@ class Reasoning: @dataclass() class Blob: + """Represents blob binary data sent inline to the model + + This model is specified as part of semconv in `GenAI messages Python models - BlobPart + `__. + """ + mime_type: str | None modality: Union[Modality, str] content: bytes @@ -78,6 +108,12 @@ class Blob: @dataclass() class File: + """Represents an external referenced file sent to the model by file id + + This model is specified as part of semconv in `GenAI messages Python models - FilePart + `__. + """ + mime_type: str | None modality: Union[Modality, str] file_id: str @@ -86,6 +122,12 @@ class File: @dataclass() class Uri: + """Represents an external referenced file sent to the model by URI + + This model is specified as part of semconv in `GenAI messages Python models - UriPart + `__. + """ + mime_type: str | None modality: Union[Modality, str] uri: str diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/utils.py index e9dd43cea6..dc47508d6f 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/utils.py @@ -26,6 +26,7 @@ ) from opentelemetry.util.genai.environment_variables import ( OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, + OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT, ) from opentelemetry.util.genai.types import ContentCapturingMode @@ -64,6 +65,28 @@ def get_content_capturing_mode() -> ContentCapturingMode: return ContentCapturingMode.NO_CONTENT +def should_emit_event() -> bool: + """Check if event emission is enabled. + + Returns True if event emission is enabled, False otherwise. + Defaults to False if the environment variable is not set. + """ + envvar = os.environ.get(OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT) + if not envvar: + return False + envvar_lower = envvar.lower() + if envvar_lower == "true": + return True + if envvar_lower == "false": + return False + logger.warning( + "%s is not a valid option for `%s` environment variable. Must be one of true or false (case-insensitive). Defaulting to `false`.", + envvar, + OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT, + ) + return False + + class _GenAiJsonEncoder(json.JSONEncoder): def default(self, o: Any) -> Any: if isinstance(o, bytes): diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index a6767ecb01..969f17cbb4 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -23,28 +23,16 @@ OTEL_SEMCONV_STABILITY_OPT_IN, _OpenTelemetrySemanticConventionStability, ) +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import ( + InMemoryLogRecordExporter, + SimpleLogRecordProcessor, +) from opentelemetry.sdk.trace import ReadableSpan, TracerProvider from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) - -# Backward compatibility for InMemoryLogExporter -> InMemoryLogRecordExporter rename -# Changed in opentelemetry-sdk@0.60b0 -try: - from opentelemetry.sdk._logs.export import ( # pylint: disable=no-name-in-module - InMemoryLogRecordExporter, - SimpleLogRecordProcessor, - ) -except ImportError: - # Fallback to old name for compatibility with older SDK versions - from opentelemetry.sdk._logs.export import ( - InMemoryLogExporter as InMemoryLogRecordExporter, - ) - from opentelemetry.sdk._logs.export import ( - SimpleLogRecordProcessor, - ) -from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) @@ -55,6 +43,7 @@ from opentelemetry.trace.status import StatusCode from opentelemetry.util.genai.environment_variables import ( OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, + OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT, ) from opentelemetry.util.genai.handler import get_telemetry_handler from opentelemetry.util.genai.types import ( @@ -66,16 +55,20 @@ OutputMessage, Text, ) -from opentelemetry.util.genai.utils import get_content_capturing_mode +from opentelemetry.util.genai.utils import ( + get_content_capturing_mode, + should_emit_event, +) -def patch_env_vars(stability_mode, content_capturing): +def patch_env_vars(stability_mode, content_capturing, emit_event): def decorator(test_case): @patch.dict( os.environ, { OTEL_SEMCONV_STABILITY_OPT_IN: stability_mode, OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: content_capturing, + OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT: emit_event, }, ) def wrapper(*args, **kwargs): @@ -179,17 +172,24 @@ class TestVersion(unittest.TestCase): @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_ONLY", + emit_event="", ) def test_get_content_capturing_mode_parses_valid_envvar(self): # pylint: disable=no-self-use assert get_content_capturing_mode() == ContentCapturingMode.SPAN_ONLY @patch_env_vars( - stability_mode="gen_ai_latest_experimental", content_capturing="" + stability_mode="gen_ai_latest_experimental", + content_capturing="", + emit_event="", ) def test_empty_content_capturing_envvar(self): # pylint: disable=no-self-use assert get_content_capturing_mode() == ContentCapturingMode.NO_CONTENT - @patch_env_vars(stability_mode="default", content_capturing="True") + @patch_env_vars( + stability_mode="default", + content_capturing="True", + emit_event="", + ) def test_get_content_capturing_mode_raises_exception_when_semconv_stability_default( self, ): # pylint: disable=no-self-use @@ -199,6 +199,7 @@ def test_get_content_capturing_mode_raises_exception_when_semconv_stability_defa @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="INVALID_VALUE", + emit_event="", ) def test_get_content_capturing_mode_raises_exception_on_invalid_envvar( self, @@ -211,6 +212,75 @@ def test_get_content_capturing_mode_raises_exception_on_invalid_envvar( self.assertIn("INVALID_VALUE is not a valid option for ", cm.output[0]) +class TestShouldEmitEvent(unittest.TestCase): + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="true", + ) + def test_should_emit_event_returns_true_when_set_to_true( + self, + ): # pylint: disable=no-self-use + assert should_emit_event() is True + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="True", + ) + def test_should_emit_event_case_insensitive_true( + self, + ): # pylint: disable=no-self-use + assert should_emit_event() is True + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="false", + ) + def test_should_emit_event_returns_false_when_set_to_false( + self, + ): # pylint: disable=no-self-use + assert should_emit_event() is False + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="False", + ) + def test_should_emit_event_case_insensitive_false( + self, + ): # pylint: disable=no-self-use + assert should_emit_event() is False + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="", + ) + def test_should_emit_event_by_defaults( + self, + ): # pylint: disable=no-self-use + assert should_emit_event() is False + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="INVALID_VALUE", + ) + def test_should_emit_event_with_invalid_value( + self, + ): # pylint: disable=no-self-use + with self.assertLogs(level="WARNING") as cm: + result = should_emit_event() + assert result is False, f"Expected False but got {result}" + self.assertEqual(len(cm.output), 1) + self.assertIn("INVALID_VALUE is not a valid option for", cm.output[0]) + self.assertIn( + "Must be one of true or false (case-insensitive)", cm.output[0] + ) + + class TestTelemetryHandler(unittest.TestCase): def setUp(self): self.span_exporter = InMemorySpanExporter() @@ -237,6 +307,7 @@ def tearDown(self): @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_ONLY", + emit_event="", ) def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use message = _create_input_message("hello world") @@ -311,6 +382,7 @@ def test_llm_start_and_stop_creates_span(self): # pylint: disable=no-self-use @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_ONLY", + emit_event="", ) def test_llm_manual_start_and_stop_creates_span(self): message = _create_input_message("hi") @@ -436,6 +508,7 @@ def test_llm_span_uses_expected_schema_url(self): @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_ONLY", + emit_event="", ) def test_parent_child_span_relationship(self): message = _create_input_message("hi") @@ -523,6 +596,7 @@ class BoomError(RuntimeError): @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="EVENT_ONLY", + emit_event="true", ) def test_emits_llm_event(self): invocation = LLMInvocation( @@ -603,6 +677,7 @@ def test_emits_llm_event(self): @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_AND_EVENT", + emit_event="true", ) def test_emits_llm_event_and_span(self): message = _create_input_message("combined test") @@ -662,6 +737,7 @@ def test_emits_llm_event_and_span(self): @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="EVENT_ONLY", + emit_event="true", ) def test_emits_llm_event_with_error(self): class TestError(RuntimeError): @@ -700,14 +776,15 @@ class TestError(RuntimeError): @patch_env_vars( stability_mode="gen_ai_latest_experimental", - content_capturing="NO_CONTENT", + content_capturing="EVENT_ONLY", + emit_event="false", ) - def test_does_not_emit_llm_event_when_no_content(self): - message = _create_input_message("no content test") - chat_generation = _create_output_message("no content response") + def test_does_not_emit_llm_event_when_emit_event_false(self): + message = _create_input_message("emit false test") + chat_generation = _create_output_message("emit false response") invocation = LLMInvocation( - request_model="no-content-model", + request_model="emit-false-model", input_messages=[message], provider="test-provider", ) @@ -719,3 +796,26 @@ def test_does_not_emit_llm_event_when_no_content(self): # Check no event was emitted logs = self.log_exporter.get_finished_logs() self.assertEqual(len(logs), 0) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="", + ) + def test_does_not_emit_llm_event_by_default(self): + """Test that event is not emitted by default when OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" + invocation = LLMInvocation( + request_model="default-model", + input_messages=[_create_input_message("default test")], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [ + _create_output_message("default response") + ] + self.telemetry_handler.stop_llm(invocation) + + # Check that no event was emitted (default behavior is false) + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 0) From b4f0887786f023c6fe23bb48a6121032eba9ef38 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Thu, 11 Dec 2025 14:03:19 +0800 Subject: [PATCH 09/12] fix type check Change-Id: I1da48b52b76042a9efd124057681f579cc93fb6e Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/_upload/completion_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_upload/completion_hook.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_upload/completion_hook.py index d2b18fd6be..4f0b98e6fd 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_upload/completion_hook.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_upload/completion_hook.py @@ -172,7 +172,7 @@ def _calculate_ref_path( if is_system_instructions_hashable(system_instruction): # Get a hash of the text. system_instruction_hash = hashlib.sha256( - "\n".join(x.content for x in system_instruction).encode( # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue, reportUnknownArgumentType] + "\n".join(x.content for x in system_instruction).encode( # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue, reportUnknownArgumentType, reportCallIssue, reportArgumentType] "utf-8" ), usedforsecurity=False, From 71572c6063a70fd9646394bae7617abe714dc551 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Fri, 12 Dec 2025 14:23:16 +0800 Subject: [PATCH 10/12] Fix the span name of LLM invocations Change-Id: I1b9f40e5576e699b1f61fa3d7e7790ee4b1448a5 Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/handler.py | 5 +---- .../src/opentelemetry/util/genai/types.py | 6 ++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index 0725fe0900..54e626deaa 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -70,9 +70,6 @@ get_logger, ) from opentelemetry.metrics import MeterProvider, get_meter -from opentelemetry.semconv._incubating.attributes import ( - gen_ai_attributes as GenAI, -) from opentelemetry.semconv.schemas import Schemas from opentelemetry.trace import ( Span, @@ -141,7 +138,7 @@ def start_llm( """Start an LLM invocation and create a pending span entry.""" # Create a span and attach it as current; keep the token to detach later span = self._tracer.start_span( - name=f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}", + name=f"{invocation.operation_name} {invocation.request_model}", kind=SpanKind.CLIENT, ) # Record a monotonic start timestamp (seconds) for duration diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 2e8e5e85b8..0b92031a35 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -24,6 +24,10 @@ from opentelemetry.context import Context from opentelemetry.trace import Span +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) + ContextToken: TypeAlias = Token[Context] @@ -182,6 +186,8 @@ class LLMInvocation: """ request_model: str + # Chat by default + operation_name: str = GenAI.GenAiOperationNameValues.CHAT.value context_token: ContextToken | None = None span: Span | None = None input_messages: list[InputMessage] = field( From 20b740c4c624d61b9f255055dd8f0362b2b8f430 Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Fri, 12 Dec 2025 14:41:57 +0800 Subject: [PATCH 11/12] Fix span name Change-Id: I7adf6fc9cf0bcbed3927862ff87d441bb2a1f78b Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/span_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 9e2c35ebff..c5d8356a24 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -67,7 +67,7 @@ def _get_llm_common_attributes( def _get_llm_span_name(invocation: LLMInvocation) -> str: """Get the span name for an LLM invocation.""" - return f"{GenAI.GenAiOperationNameValues.CHAT.value} {invocation.request_model}".strip() + return f"{invocation.operation_name} {invocation.request_model}".strip() def _get_llm_messages_attributes_for_span( From 17347095b98e817889354b35c767d2ef2baaec9d Mon Sep 17 00:00:00 2001 From: cirilla-zmh Date: Sun, 14 Dec 2025 12:09:53 +0800 Subject: [PATCH 12/12] Fix operation name of llm span Change-Id: I4a6f48d0f66b8ad00a6ce4be8dcf4a46aba68e33 Co-developed-by: Cursor --- .../src/opentelemetry/util/genai/span_utils.py | 4 +--- .../src/opentelemetry/util/genai/types.py | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index c5d8356a24..d52be22eed 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -54,9 +54,7 @@ def _get_llm_common_attributes( Returns a dictionary of attributes. """ attributes: dict[str, Any] = {} - attributes[GenAI.GEN_AI_OPERATION_NAME] = ( - GenAI.GenAiOperationNameValues.CHAT.value - ) + attributes[GenAI.GEN_AI_OPERATION_NAME] = invocation.operation_name if invocation.request_model: attributes[GenAI.GEN_AI_REQUEST_MODEL] = invocation.request_model if invocation.provider is not None: diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 0b92031a35..e8b4148f47 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -22,11 +22,10 @@ from typing_extensions import TypeAlias from opentelemetry.context import Context -from opentelemetry.trace import Span - from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) +from opentelemetry.trace import Span ContextToken: TypeAlias = Token[Context]