diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py index 7c805d6995..f392bb7830 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py @@ -41,7 +41,10 @@ should_emit_events, should_send_prompts, ) -from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.instrumentation.utils import ( + _SUPPRESS_INSTRUMENTATION_KEY, + suppress_http_instrumentation, +) from opentelemetry.metrics import Counter, Histogram from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE from opentelemetry.semconv._incubating.attributes import ( @@ -99,7 +102,9 @@ def chat_wrapper( run_async(_handle_request(span, kwargs, instance)) try: start_time = time.time() - response = wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = wrapped(*args, **kwargs) end_time = time.time() except Exception as e: # pylint: disable=broad-except end_time = time.time() @@ -198,7 +203,9 @@ async def achat_wrapper( try: start_time = time.time() - response = await wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = await wrapped(*args, **kwargs) end_time = time.time() except Exception as e: # pylint: disable=broad-except end_time = time.time() diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py index b4461cd841..9fc7b9a008 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py @@ -29,7 +29,10 @@ should_emit_events, should_send_prompts, ) -from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.instrumentation.utils import ( + _SUPPRESS_INSTRUMENTATION_KEY, + suppress_http_instrumentation, +) from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, ) @@ -67,7 +70,9 @@ def completion_wrapper(tracer, wrapped, instance, args, kwargs): _handle_request(span, kwargs, instance) try: - response = wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = wrapped(*args, **kwargs) except Exception as e: span.set_attribute(ERROR_TYPE, e.__class__.__name__) span.record_exception(e) @@ -103,7 +108,9 @@ async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs): _handle_request(span, kwargs, instance) try: - response = await wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = await wrapped(*args, **kwargs) except Exception as e: span.set_attribute(ERROR_TYPE, e.__class__.__name__) span.record_exception(e) diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py index 7899095f8a..a2c4ed8edd 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py @@ -30,7 +30,10 @@ should_send_prompts, start_as_current_span_async, ) -from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.instrumentation.utils import ( + _SUPPRESS_INSTRUMENTATION_KEY, + suppress_http_instrumentation, +) from opentelemetry.metrics import Counter, Histogram from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE from opentelemetry.semconv._incubating.attributes import ( @@ -80,7 +83,9 @@ def embeddings_wrapper( try: # record time for duration start_time = time.time() - response = wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = wrapped(*args, **kwargs) end_time = time.time() except Exception as e: # pylint: disable=broad-except end_time = time.time() @@ -145,7 +150,9 @@ async def aembeddings_wrapper( try: # record time for duration start_time = time.time() - response = await wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = await wrapped(*args, **kwargs) end_time = time.time() except Exception as e: # pylint: disable=broad-except end_time = time.time() diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py index eddeac50da..d631995ffb 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py @@ -10,7 +10,10 @@ from opentelemetry.instrumentation.openai.utils import ( _with_image_gen_metric_wrapper, ) -from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.instrumentation.utils import ( + _SUPPRESS_INSTRUMENTATION_KEY, + suppress_http_instrumentation, +) from opentelemetry.metrics import Counter, Histogram from opentelemetry.semconv_ai import SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY @@ -32,7 +35,9 @@ def image_gen_metrics_wrapper( try: # record time for duration start_time = time.time() - response = wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = wrapped(*args, **kwargs) end_time = time.time() except Exception as e: # pylint: disable=broad-except end_time = time.time() diff --git a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py index 88aaca8034..df4845dbef 100644 --- a/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py +++ b/packages/opentelemetry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py @@ -9,7 +9,10 @@ from openai._legacy_response import LegacyAPIResponse from openai._response import APIResponse, AsyncAPIResponse from opentelemetry import context as context_api -from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY +from opentelemetry.instrumentation.utils import ( + _SUPPRESS_INSTRUMENTATION_KEY, + suppress_http_instrumentation, +) from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAIAttributes, openai_attributes as OpenAIAttributes, @@ -491,7 +494,9 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa non_sentinel_kwargs = _sanitize_sentinel_values(kwargs) try: - response = wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = wrapped(*args, **kwargs) if isinstance(response, Stream): # Capture current trace context to maintain trace continuity ctx = context_api.get_current() @@ -662,7 +667,9 @@ async def async_responses_get_or_create_wrapper( non_sentinel_kwargs = _sanitize_sentinel_values(kwargs) try: - response = await wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = await wrapped(*args, **kwargs) if isinstance(response, (Stream, AsyncStream)): # Capture current trace context to maintain trace continuity ctx = context_api.get_current() @@ -825,7 +832,9 @@ def responses_cancel_wrapper(tracer: Tracer, wrapped, instance, args, kwargs): non_sentinel_kwargs = _sanitize_sentinel_values(kwargs) - response = wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = wrapped(*args, **kwargs) if isinstance(response, Stream): return response parsed_response = parse_response(response) @@ -857,7 +866,9 @@ async def async_responses_cancel_wrapper( non_sentinel_kwargs = _sanitize_sentinel_values(kwargs) - response = await wrapped(*args, **kwargs) + # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845). + with suppress_http_instrumentation(): + response = await wrapped(*args, **kwargs) if isinstance(response, (Stream, AsyncStream)): return response parsed_response = await async_parse_response(response) diff --git a/packages/opentelemetry-instrumentation-openai/pyproject.toml b/packages/opentelemetry-instrumentation-openai/pyproject.toml index 7fae9b0296..1c0a020428 100644 --- a/packages/opentelemetry-instrumentation-openai/pyproject.toml +++ b/packages/opentelemetry-instrumentation-openai/pyproject.toml @@ -33,6 +33,7 @@ dev = [ ] test = [ "openai[datalib]==1.99.7", + "opentelemetry-instrumentation-httpx>=0.63b1", "opentelemetry-sdk>=1.38.0,<2", "pytest-asyncio>=0.23.7,<0.24.0", "pytest-recording>=0.13.1,<0.14.0", diff --git a/packages/opentelemetry-instrumentation-openai/tests/traces/test_suppress_http_instrumentation.py b/packages/opentelemetry-instrumentation-openai/tests/traces/test_suppress_http_instrumentation.py new file mode 100644 index 0000000000..12336fb4b7 --- /dev/null +++ b/packages/opentelemetry-instrumentation-openai/tests/traces/test_suppress_http_instrumentation.py @@ -0,0 +1,144 @@ +"""Regression tests for https://github.com/traceloop/openllmetry/issues/2845 + +When the user instruments the underlying HTTP client (e.g. httpx) in addition +to OpenAI, every OpenAI call used to be traced twice: once by this +instrumentation (``openai.chat``) and once by the HTTP instrumentation (a raw +``POST`` span). The OpenAI instrumentation now suppresses HTTP-client +instrumentation around the request, so only the ``openai.chat`` span is emitted. + +These tests hit a local HTTP server over a real socket so the httpx +instrumentation actually runs (VCR replay would bypass the transport it wraps). +""" + +import json +import threading +from http.server import BaseHTTPRequestHandler, HTTPServer + +import pytest +from openai import OpenAI +from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor + +_CHAT_RESPONSE = { + "id": "chatcmpl-test", + "object": "chat.completion", + "created": 1700000000, + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": {"role": "assistant", "content": "hello"}, + "finish_reason": "stop", + } + ], + "usage": {"prompt_tokens": 5, "completion_tokens": 1, "total_tokens": 6}, +} + +_STREAM_CHUNKS = [ + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 1700000000, + "model": "gpt-4o-mini", + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": "hello"}, + "finish_reason": None} + ], + }, + { + "id": "chatcmpl-test", + "object": "chat.completion.chunk", + "created": 1700000000, + "model": "gpt-4o-mini", + "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], + }, +] + + +class _OpenAIStubHandler(BaseHTTPRequestHandler): + def do_POST(self): + length = int(self.headers.get("Content-Length", 0)) + payload = json.loads(self.rfile.read(length) or b"{}") + if payload.get("stream"): + self.send_response(200) + self.send_header("Content-Type", "text/event-stream") + self.end_headers() + for chunk in _STREAM_CHUNKS: + self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode()) + self.wfile.write(b"data: [DONE]\n\n") + return + body = json.dumps(_CHAT_RESPONSE).encode() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, *args): # silence the default stderr logging + pass + + +@pytest.fixture +def openai_stub_server(): + server = HTTPServer(("127.0.0.1", 0), _OpenAIStubHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + host, port = server.server_address + try: + yield f"http://{host}:{port}/v1" + finally: + server.shutdown() + thread.join() + server.server_close() + + +@pytest.fixture +def instrument_httpx(tracer_provider): + HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider) + yield + HTTPXClientInstrumentor().uninstrument() + + +def _http_spans(span_exporter): + return [ + span + for span in span_exporter.get_finished_spans() + if span.attributes.get("http.request.method") + or span.attributes.get("http.method") + ] + + +def test_chat_does_not_emit_duplicate_httpx_span( + instrument_legacy, instrument_httpx, span_exporter, openai_stub_server +): + # No api_key: the autouse `environment` fixture sets OPENAI_API_KEY, matching + # the repo's existing local-server client fixtures (e.g. vllm_openai_client). + client = OpenAI(base_url=openai_stub_server) + + client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "say hello only"}], + ) + + span_names = [span.name for span in span_exporter.get_finished_spans()] + assert span_names == ["openai.chat"] + assert _http_spans(span_exporter) == [] + + +def test_chat_streaming_does_not_emit_duplicate_httpx_span( + instrument_legacy, instrument_httpx, span_exporter, openai_stub_server +): + # No api_key: the autouse `environment` fixture sets OPENAI_API_KEY, matching + # the repo's existing local-server client fixtures (e.g. vllm_openai_client). + client = OpenAI(base_url=openai_stub_server) + + stream = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "say hello only"}], + stream=True, + ) + for _ in stream: + pass + + span_names = [span.name for span in span_exporter.get_finished_spans()] + assert span_names == ["openai.chat"] + assert _http_spans(span_exporter) == [] diff --git a/packages/opentelemetry-instrumentation-openai/uv.lock b/packages/opentelemetry-instrumentation-openai/uv.lock index 40a60b6a95..b1f64b307e 100644 --- a/packages/opentelemetry-instrumentation-openai/uv.lock +++ b/packages/opentelemetry-instrumentation-openai/uv.lock @@ -531,6 +531,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/35/a1/9314e621c143e4d82a5bf7a43c2ff7a745d31023506336857607c8c543cc/opentelemetry_instrumentation-0.63b1-py3-none-any.whl", hash = "sha256:f1986716d52cc316ea5f60189098726a9071d8ecc0eee96c9ed110be08bade9c", size = 35577, upload-time = "2026-05-21T16:34:56.818Z" }, ] +[[package]] +name = "opentelemetry-instrumentation-httpx" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/27/c2b4335bca030e893acbe5ff2b4f434868773bf94508be7e6bf5af981b24/opentelemetry_instrumentation_httpx-0.63b1.tar.gz", hash = "sha256:f41ec82f25c3abcdada621052db3e5fd648e3b43d55eec4b9c0c5d3ecb7b4ff4", size = 23557, upload-time = "2026-05-21T16:36:34.583Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/b8/f536780996195c3b9f2354998554671e05a7a262df8c043f63fe9e5a6f0b/opentelemetry_instrumentation_httpx-0.63b1-py3-none-any.whl", hash = "sha256:14df6e99d81be9a8cd238f6639b6fa52404c4d3ce219058fcb5dc8c0f2211f86", size = 16336, upload-time = "2026-05-21T16:35:32.221Z" }, +] + [[package]] name = "opentelemetry-instrumentation-openai" version = "0.61.0" @@ -554,6 +570,7 @@ dev = [ ] test = [ { name = "openai", extra = ["datalib"] }, + { name = "opentelemetry-instrumentation-httpx" }, { name = "opentelemetry-sdk" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -580,6 +597,7 @@ dev = [ ] test = [ { name = "openai", extras = ["datalib"], specifier = "==1.99.7" }, + { name = "opentelemetry-instrumentation-httpx", specifier = ">=0.63b1" }, { name = "opentelemetry-sdk", specifier = ">=1.38.0,<2" }, { name = "pytest", specifier = ">=8.2.2,<9" }, { name = "pytest-asyncio", specifier = ">=0.23.7,<0.24.0" }, @@ -629,6 +647,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/22/41fb05f1dc5fda2c468e05a41814c20859016c85117b66c8a257cae814f6/opentelemetry_semantic_conventions_ai-0.5.1-py3-none-any.whl", hash = "sha256:25aeb22bd261543b4898a73824026d96770e5351209c7d07a0b1314762b1f6e4", size = 11250, upload-time = "2026-03-26T14:20:37.108Z" }, ] +[[package]] +name = "opentelemetry-util-http" +version = "0.63b1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/d8/7bf5e4cec0578ac3c28c18eb7b88f34279139cbc8c568d6aa02b9c5ae53e/opentelemetry_util_http-0.63b1.tar.gz", hash = "sha256:ba1268f00922ee522dba2ae38458060f99486e7385a8056985901ca9685adfff", size = 11102, upload-time = "2026-05-21T16:36:56.675Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/f1/34e047e8f6a3c67e5220acf1af7b9f62868c25d77791bca74457bd2180a6/opentelemetry_util_http-0.63b1-py3-none-any.whl", hash = "sha256:6284194028c59cd439f8acfe388145069a6127f11dc077e1344a2094adacc3f8", size = 8205, upload-time = "2026-05-21T16:36:09.736Z" }, +] + [[package]] name = "packaging" version = "25.0"