From cbf6970d2920573705c9cdf56aa489729729af02 Mon Sep 17 00:00:00 2001 From: Dylan Russell Date: Thu, 4 Dec 2025 21:17:00 +0000 Subject: [PATCH 1/3] Add some sem convs to non-experimental instrumentations --- .../google_genai/generate_content.py | 48 ++++++++----------- .../instrumentation/vertexai/utils.py | 33 +++++++------ .../tests/test_chat_completions.py | 1 + 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index 82dda55d17..43519453ca 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -172,7 +172,6 @@ def _to_dict(value: object): def _create_request_attributes( config: Optional[GenerateContentConfigOrDict], - is_experimental_mode: bool, allow_list: AllowList, ) -> dict[str, Any]: if not config: @@ -207,7 +206,7 @@ def _create_request_attributes( }, ) response_mime_type = config.get("response_mime_type") - if response_mime_type and is_experimental_mode: + if response_mime_type: if response_mime_type == "text/plain": attributes[gen_ai_attributes.GEN_AI_OUTPUT_TYPE] = "text" elif response_mime_type == "application/json": @@ -716,13 +715,8 @@ def instrumented_generate_content( completion_hook, generate_content_config_key_allowlist=generate_content_config_key_allowlist, ) - is_experimental_mode = ( - helper.sem_conv_opt_in_mode - == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL - ) request_attributes = _create_request_attributes( config, - is_experimental_mode, helper._generate_content_config_key_allowlist, ) with helper.start_span_as_current_span( @@ -739,7 +733,10 @@ def instrumented_generate_content( config=helper.wrapped_config(config), **kwargs, ) - if is_experimental_mode: + if ( + helper.sem_conv_opt_in_mode + == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + ): helper._update_response(response) if response.candidates: candidates += response.candidates @@ -791,13 +788,8 @@ def instrumented_generate_content_stream( completion_hook, generate_content_config_key_allowlist=generate_content_config_key_allowlist, ) - is_experimental_mode = ( - helper.sem_conv_opt_in_mode - == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL - ) request_attributes = _create_request_attributes( config, - is_experimental_mode, helper._generate_content_config_key_allowlist, ) with helper.start_span_as_current_span( @@ -814,7 +806,10 @@ def instrumented_generate_content_stream( config=helper.wrapped_config(config), **kwargs, ): - if is_experimental_mode: + if ( + helper.sem_conv_opt_in_mode + == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + ): helper._update_response(response) if response.candidates: candidates += response.candidates @@ -865,13 +860,8 @@ async def instrumented_generate_content( completion_hook, generate_content_config_key_allowlist=generate_content_config_key_allowlist, ) - is_experimental_mode = ( - helper.sem_conv_opt_in_mode - == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL - ) request_attributes = _create_request_attributes( config, - is_experimental_mode, helper._generate_content_config_key_allowlist, ) candidates: list[Candidate] = [] @@ -889,7 +879,10 @@ async def instrumented_generate_content( config=helper.wrapped_config(config), **kwargs, ) - if is_experimental_mode: + if ( + helper.sem_conv_opt_in_mode + == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + ): helper._update_response(response) if response.candidates: candidates += response.candidates @@ -940,13 +933,8 @@ async def instrumented_generate_content_stream( completion_hook, generate_content_config_key_allowlist=generate_content_config_key_allowlist, ) - is_experimental_mode = ( - helper.sem_conv_opt_in_mode - == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL - ) request_attributes = _create_request_attributes( config, - is_experimental_mode, helper._generate_content_config_key_allowlist, ) with helper.start_span_as_current_span( @@ -955,7 +943,10 @@ async def instrumented_generate_content_stream( end_on_exit=False, ) as span: span.set_attributes(request_attributes) - if not is_experimental_mode: + if ( + not helper.sem_conv_opt_in_mode + == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + ): helper.process_request(contents, config, span) try: response_async_generator = await wrapped_func( @@ -986,7 +977,10 @@ async def _response_async_generator_wrapper(): with trace.use_span(span, end_on_exit=True): try: async for response in response_async_generator: - if is_experimental_mode: + if ( + helper.sem_conv_opt_in_mode + == _StabilityMode.GEN_AI_LATEST_EXPERIMENTAL + ): helper._update_response(response) if response.candidates: candidates += response.candidates diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py index da590851f4..3221cdf9bc 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/src/opentelemetry/instrumentation/vertexai/utils.py @@ -167,24 +167,23 @@ def get_genai_request_attributes( # pylint: disable=too-many-branches attributes[GenAIAttributes.GEN_AI_REQUEST_STOP_SEQUENCES] = ( generation_config.stop_sequences ) - if use_latest_semconvs: - if "seed" in generation_config: - attributes[GenAIAttributes.GEN_AI_REQUEST_SEED] = ( - generation_config.seed - ) - if "candidate_count" in generation_config: - attributes[GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT] = ( - generation_config.candidate_count + if "seed" in generation_config: + attributes[GenAIAttributes.GEN_AI_REQUEST_SEED] = ( + generation_config.seed + ) + if "candidate_count" in generation_config: + attributes[GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT] = ( + generation_config.candidate_count + ) + if "response_mime_type" in generation_config: + if generation_config.response_mime_type == "text/plain": + attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = "text" + elif generation_config.response_mime_type == "application/json": + attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = "json" + else: + attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = ( + generation_config.response_mime_type ) - if "response_mime_type" in generation_config: - if generation_config.response_mime_type == "text/plain": - attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = "text" - elif generation_config.response_mime_type == "application/json": - attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = "json" - else: - attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = ( - generation_config.response_mime_type - ) return attributes diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/test_chat_completions.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/test_chat_completions.py index 9e05a64a4c..05e729e1f4 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/test_chat_completions.py +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/tests/test_chat_completions.py @@ -314,6 +314,7 @@ def test_generate_content_extra_params( "gen_ai.operation.name": "chat", "gen_ai.request.frequency_penalty": 1.0, "gen_ai.request.max_tokens": 5, + "gen_ai.request.seed": 12345, "gen_ai.request.model": "gemini-2.5-pro", "gen_ai.request.presence_penalty": -1.5, "gen_ai.request.stop_sequences": ("\n\n\n",), From 9cdf081b5721a4eb1449d624ea0d4cab361f1dbf Mon Sep 17 00:00:00 2001 From: Dylan Russell Date: Fri, 5 Dec 2025 20:42:43 +0000 Subject: [PATCH 2/3] Fix sys instruct bug --- .../CHANGELOG.md | 1 + .../google_genai/generate_content.py | 46 ++++++++-------- .../generate_content/nonstreaming_base.py | 53 ++++++++++++++++++- .../CHANGELOG.md | 1 + 4 files changed, 78 insertions(+), 23 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md index 7f0e70877a..3981f7dd77 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Ensure log event is written and completion hook is called even when model call results in exception. Put new log event (` gen_ai.client.inference.operation.details`) behind the flag `OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental`. Ensure same sem conv attributes are on the log and span. Fix an issue where the instrumentation would crash when a pydantic.BaseModel class was passed as the response schema ([#3905](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3905)). +- Add the `GEN_AI_OUTPUT_TYPE` sem conv request attributes to events/spans generated in the non-experimental instrumentation. This was added pre sem conv 1.36 so it should be in the non-experimental instrumentation. Fix a bug in how system instructions were recorded in the `gen_ai.system.message` log event. It will now always be recorded as `{"content" : "text of system instructions"}`. ## Version 0.4b0 (2025-10-16) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index 43519453ca..f65b80087d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -170,6 +170,10 @@ def _to_dict(value: object): return json.loads(json.dumps(value)) +def system_instruction_to_text(value: object) -> str: + return "" + + def _create_request_attributes( config: Optional[GenerateContentConfigOrDict], allow_list: AllowList, @@ -504,31 +508,29 @@ def _maybe_log_completion_details( def _maybe_log_system_instruction( self, config: Optional[GenerateContentConfigOrDict] = None ): - system_instruction = None - if config is not None: - if isinstance(config, dict): - system_instruction = config.get("system_instruction") - else: - system_instruction = config.system_instruction + content_union = _config_to_system_instruction(config) + if not content_union: + return + content = transformers.t_contents(content_union)[0] + if not content.parts: + return + # System instruction is required to be text. An error will be returned by the API if it isn't. + system_instruction = " ".join( + part.text for part in content.parts if part.text + ) if not system_instruction: return - attributes = { - gen_ai_attributes.GEN_AI_SYSTEM: self._genai_system, - } - # TODO: determine if "role" should be reported here or not. It is unclear - # since the caller does not supply a "role" and since this comes through - # a property named "system_instruction" which would seem to align with - # the default "role" that is allowed to be omitted by default. - # - # See also: "TODOS.md" - body = {} - if self._content_recording_enabled: - body["content"] = _to_dict(system_instruction) - else: - body["content"] = _CONTENT_ELIDED self._otel_wrapper.log_system_prompt( - attributes=attributes, - body=body, + attributes={ + gen_ai_attributes.GEN_AI_SYSTEM: self._genai_system, + }, + body={ + "content": ( + system_instruction + if self._content_recording_enabled + else _CONTENT_ELIDED + ) + }, ) def _maybe_log_user_prompt( diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py index 897a8716b6..c669f99dd3 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/tests/generate_content/nonstreaming_base.py @@ -17,7 +17,7 @@ from unittest.mock import patch import pytest -from google.genai.types import GenerateContentConfig +from google.genai.types import GenerateContentConfig, Part from pydantic import BaseModel, Field from opentelemetry.instrumentation._semconv import ( @@ -180,6 +180,57 @@ def test_records_system_prompt_as_log(self): self.assertEqual(event_record.attributes["gen_ai.system"], "gemini") self.assertEqual(event_record.body["content"], "foo") + @patch.dict( + "os.environ", + {"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "true"}, + ) + def test_system_prompt_passed_as_list_of_text(self): + config = GenerateContentConfig( + system_instruction=["help", "me please."] + ) + self.configure_valid_response() + self.generate_content( + model="gemini-2.0-flash", contents="Some input", config=config + ) + self.otel.assert_has_event_named("gen_ai.system.message") + event_record = self.otel.get_event_named("gen_ai.system.message") + self.assertEqual(event_record.body["content"], "help me please.") + + @patch.dict( + "os.environ", + {"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "true"}, + ) + def test_system_prompt_passed_as_list_of_text_parts(self): + config = GenerateContentConfig( + system_instruction=[ + Part.from_text(text="help"), + Part.from_text(text="me please."), + ] + ) + self.configure_valid_response() + self.generate_content( + model="gemini-2.0-flash", contents="Some input", config=config + ) + self.otel.assert_has_event_named("gen_ai.system.message") + event_record = self.otel.get_event_named("gen_ai.system.message") + self.assertEqual(event_record.body["content"], "help me please.") + + @patch.dict( + "os.environ", + {"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "true"}, + ) + def test_system_prompt_passed_is_invalid(self): + config = GenerateContentConfig( + system_instruction=[ + Part.from_uri(file_uri="test.jpg"), + ] + ) + self.configure_valid_response() + self.generate_content( + model="gemini-2.0-flash", contents="Some input", config=config + ) + self.otel.assert_does_not_have_event_named("gen_ai.system.message") + @patch.dict( "os.environ", {"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": "false"}, diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md index 65dfcf15b3..eb08e13d16 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased - Fix overwritten log attributes in vertexai instrumentation ([#3925](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3925)) +- Add the `GEN_AI_OUTPUT_TYPE`, `GEN_AI_REQUEST_CHOICE_COUNT` and `GEN_AI_REQUEST_SEED` sem conv request attributes to events/spans generated in the non-experimental instrumentation. This was added pre sem conv 1.36 so it should be in the non-experimental instrumentation. ## Version 2.1b0 (2025-10-16) From f1f5a62cc08cc93416d7ab5f5d34db4580c4de18 Mon Sep 17 00:00:00 2001 From: Dylan Russell Date: Fri, 5 Dec 2025 21:20:37 +0000 Subject: [PATCH 3/3] Respond to comments.. --- .../opentelemetry-instrumentation-google-genai/CHANGELOG.md | 2 +- .../instrumentation/google_genai/generate_content.py | 4 ---- .../opentelemetry-instrumentation-vertexai/CHANGELOG.md | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md index 3981f7dd77..e0274fe6d9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Ensure log event is written and completion hook is called even when model call results in exception. Put new log event (` gen_ai.client.inference.operation.details`) behind the flag `OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental`. Ensure same sem conv attributes are on the log and span. Fix an issue where the instrumentation would crash when a pydantic.BaseModel class was passed as the response schema ([#3905](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3905)). -- Add the `GEN_AI_OUTPUT_TYPE` sem conv request attributes to events/spans generated in the non-experimental instrumentation. This was added pre sem conv 1.36 so it should be in the non-experimental instrumentation. Fix a bug in how system instructions were recorded in the `gen_ai.system.message` log event. It will now always be recorded as `{"content" : "text of system instructions"}`. +- Add the `GEN_AI_OUTPUT_TYPE` sem conv request attributes to events/spans generated in the stable instrumentation. This was added pre sem conv 1.36 so it should be in the stable instrumentation. Fix a bug in how system instructions were recorded in the `gen_ai.system.message` log event. It will now always be recorded as `{"content" : "text of system instructions"}`. See ([#4011](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4011)). ## Version 0.4b0 (2025-10-16) diff --git a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py index f65b80087d..6d6e02ca5e 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py +++ b/instrumentation-genai/opentelemetry-instrumentation-google-genai/src/opentelemetry/instrumentation/google_genai/generate_content.py @@ -170,10 +170,6 @@ def _to_dict(value: object): return json.loads(json.dumps(value)) -def system_instruction_to_text(value: object) -> str: - return "" - - def _create_request_attributes( config: Optional[GenerateContentConfigOrDict], allow_list: AllowList, diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md index eb08e13d16..4082e9f52d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased - Fix overwritten log attributes in vertexai instrumentation ([#3925](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3925)) -- Add the `GEN_AI_OUTPUT_TYPE`, `GEN_AI_REQUEST_CHOICE_COUNT` and `GEN_AI_REQUEST_SEED` sem conv request attributes to events/spans generated in the non-experimental instrumentation. This was added pre sem conv 1.36 so it should be in the non-experimental instrumentation. +- Add the `GEN_AI_OUTPUT_TYPE`, `GEN_AI_REQUEST_CHOICE_COUNT` and `GEN_AI_REQUEST_SEED` sem conv request attributes to events/spans generated in the stable instrumentation. This was added pre sem conv 1.36 so it should be in the stable instrumentation. See ([#4011](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4011)). ## Version 2.1b0 (2025-10-16)