traceloop · leorivastech · Jun 16, 2026
diff --git a/...metry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py b/...metry-instrumentation-openai/opentelemetry/instrumentation/openai/shared/chat_wrappers.py
@@ -41,7 +41,10 @@
     should_emit_events,
     should_send_prompts,
 )
-from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.instrumentation.utils import (
+    _SUPPRESS_INSTRUMENTATION_KEY,
+    suppress_http_instrumentation,
+)
 from opentelemetry.metrics import Counter, Histogram
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
 from opentelemetry.semconv._incubating.attributes import (
@@ -99,7 +102,9 @@ def chat_wrapper(
         run_async(_handle_request(span, kwargs, instance))
         try:
             start_time = time.time()
-            response = wrapped(*args, **kwargs)
+            # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+            with suppress_http_instrumentation():
+                response = wrapped(*args, **kwargs)
             end_time = time.time()
         except Exception as e:  # pylint: disable=broad-except
             end_time = time.time()
@@ -198,7 +203,9 @@ async def achat_wrapper(
 
         try:
             start_time = time.time()
-            response = await wrapped(*args, **kwargs)
+            # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+            with suppress_http_instrumentation():
+                response = await wrapped(*args, **kwargs)
             end_time = time.time()
         except Exception as e:  # pylint: disable=broad-except
             end_time = time.time()

diff --git a/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py b/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/completion_wrappers.py
@@ -29,7 +29,10 @@
     should_emit_events,
     should_send_prompts,
 )
-from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.instrumentation.utils import (
+    _SUPPRESS_INSTRUMENTATION_KEY,
+    suppress_http_instrumentation,
+)
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAIAttributes,
 )
@@ -67,7 +70,9 @@ def completion_wrapper(tracer, wrapped, instance, args, kwargs):
         _handle_request(span, kwargs, instance)
 
         try:
-            response = wrapped(*args, **kwargs)
+            # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+            with suppress_http_instrumentation():
+                response = wrapped(*args, **kwargs)
         except Exception as e:
             span.set_attribute(ERROR_TYPE, e.__class__.__name__)
             span.record_exception(e)
@@ -103,7 +108,9 @@ async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
         _handle_request(span, kwargs, instance)
 
         try:
-            response = await wrapped(*args, **kwargs)
+            # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+            with suppress_http_instrumentation():
+                response = await wrapped(*args, **kwargs)
         except Exception as e:
             span.set_attribute(ERROR_TYPE, e.__class__.__name__)
             span.record_exception(e)

diff --git a/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py b/...instrumentation-openai/opentelemetry/instrumentation/openai/shared/embeddings_wrappers.py
@@ -30,7 +30,10 @@
     should_send_prompts,
     start_as_current_span_async,
 )
-from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.instrumentation.utils import (
+    _SUPPRESS_INSTRUMENTATION_KEY,
+    suppress_http_instrumentation,
+)
 from opentelemetry.metrics import Counter, Histogram
 from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
 from opentelemetry.semconv._incubating.attributes import (
@@ -80,7 +83,9 @@ def embeddings_wrapper(
         try:
             # record time for duration
             start_time = time.time()
-            response = wrapped(*args, **kwargs)
+            # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+            with suppress_http_instrumentation():
+                response = wrapped(*args, **kwargs)
             end_time = time.time()
         except Exception as e:  # pylint: disable=broad-except
             end_time = time.time()
@@ -145,7 +150,9 @@ async def aembeddings_wrapper(
         try:
             # record time for duration
             start_time = time.time()
-            response = await wrapped(*args, **kwargs)
+            # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+            with suppress_http_instrumentation():
+                response = await wrapped(*args, **kwargs)
             end_time = time.time()
         except Exception as e:  # pylint: disable=broad-except
             end_time = time.time()

diff --git a/...-instrumentation-openai/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py b/...-instrumentation-openai/opentelemetry/instrumentation/openai/shared/image_gen_wrappers.py
@@ -10,7 +10,10 @@
 from opentelemetry.instrumentation.openai.utils import (
     _with_image_gen_metric_wrapper,
 )
-from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.instrumentation.utils import (
+    _SUPPRESS_INSTRUMENTATION_KEY,
+    suppress_http_instrumentation,
+)
 from opentelemetry.metrics import Counter, Histogram
 from opentelemetry.semconv_ai import SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY
 
@@ -32,7 +35,9 @@ def image_gen_metrics_wrapper(
     try:
         # record time for duration
         start_time = time.time()
-        response = wrapped(*args, **kwargs)
+        # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+        with suppress_http_instrumentation():
+            response = wrapped(*args, **kwargs)
         end_time = time.time()
     except Exception as e:  # pylint: disable=broad-except
         end_time = time.time()

diff --git a/...etry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py b/...etry-instrumentation-openai/opentelemetry/instrumentation/openai/v1/responses_wrappers.py
@@ -9,7 +9,10 @@
 from openai._legacy_response import LegacyAPIResponse
 from openai._response import APIResponse, AsyncAPIResponse
 from opentelemetry import context as context_api
-from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
+from opentelemetry.instrumentation.utils import (
+    _SUPPRESS_INSTRUMENTATION_KEY,
+    suppress_http_instrumentation,
+)
 from opentelemetry.semconv._incubating.attributes import (
     gen_ai_attributes as GenAIAttributes,
     openai_attributes as OpenAIAttributes,
@@ -491,7 +494,9 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
     non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)
 
     try:
-        response = wrapped(*args, **kwargs)
+        # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+        with suppress_http_instrumentation():
+            response = wrapped(*args, **kwargs)
         if isinstance(response, Stream):
             # Capture current trace context to maintain trace continuity
             ctx = context_api.get_current()
@@ -662,7 +667,9 @@ async def async_responses_get_or_create_wrapper(
     non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)
 
     try:
-        response = await wrapped(*args, **kwargs)
+        # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+        with suppress_http_instrumentation():
+            response = await wrapped(*args, **kwargs)
         if isinstance(response, (Stream, AsyncStream)):
             # Capture current trace context to maintain trace continuity
             ctx = context_api.get_current()
@@ -825,7 +832,9 @@ def responses_cancel_wrapper(tracer: Tracer, wrapped, instance, args, kwargs):
 
     non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)
 
-    response = wrapped(*args, **kwargs)
+    # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+    with suppress_http_instrumentation():
+        response = wrapped(*args, **kwargs)
     if isinstance(response, Stream):
         return response
     parsed_response = parse_response(response)
@@ -857,7 +866,9 @@ async def async_responses_cancel_wrapper(
 
     non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)
 
-    response = await wrapped(*args, **kwargs)
+    # Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
+    with suppress_http_instrumentation():
+        response = await wrapped(*args, **kwargs)
     if isinstance(response, (Stream, AsyncStream)):
         return response
     parsed_response = await async_parse_response(response)

diff --git a/packages/opentelemetry-instrumentation-openai/pyproject.toml b/packages/opentelemetry-instrumentation-openai/pyproject.toml
@@ -33,6 +33,7 @@ dev = [
 ]
 test = [
   "openai[datalib]==1.99.7",
+  "opentelemetry-instrumentation-httpx>=0.63b1",
   "opentelemetry-sdk>=1.38.0,<2",
   "pytest-asyncio>=0.23.7,<0.24.0",
   "pytest-recording>=0.13.1,<0.14.0",

diff --git a/...s/opentelemetry-instrumentation-openai/tests/traces/test_suppress_http_instrumentation.py b/...s/opentelemetry-instrumentation-openai/tests/traces/test_suppress_http_instrumentation.py
@@ -0,0 +1,144 @@
+"""Regression tests for https://github.com/traceloop/openllmetry/issues/2845
+
+When the user instruments the underlying HTTP client (e.g. httpx) in addition
+to OpenAI, every OpenAI call used to be traced twice: once by this
+instrumentation (``openai.chat``) and once by the HTTP instrumentation (a raw
+``POST`` span). The OpenAI instrumentation now suppresses HTTP-client
+instrumentation around the request, so only the ``openai.chat`` span is emitted.
+
+These tests hit a local HTTP server over a real socket so the httpx
+instrumentation actually runs (VCR replay would bypass the transport it wraps).
+"""
+
+import json
+import threading
+from http.server import BaseHTTPRequestHandler, HTTPServer
+
+import pytest
+from openai import OpenAI
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+
+_CHAT_RESPONSE = {
+    "id": "chatcmpl-test",
+    "object": "chat.completion",
+    "created": 1700000000,
+    "model": "gpt-4o-mini",
+    "choices": [
+        {
+            "index": 0,
+            "message": {"role": "assistant", "content": "hello"},
+            "finish_reason": "stop",
+        }
+    ],
+    "usage": {"prompt_tokens": 5, "completion_tokens": 1, "total_tokens": 6},
+}
+
+_STREAM_CHUNKS = [
+    {
+        "id": "chatcmpl-test",
+        "object": "chat.completion.chunk",
+        "created": 1700000000,
+        "model": "gpt-4o-mini",
+        "choices": [
+            {"index": 0, "delta": {"role": "assistant", "content": "hello"},
+             "finish_reason": None}
+        ],
+    },
+    {
+        "id": "chatcmpl-test",
+        "object": "chat.completion.chunk",
+        "created": 1700000000,
+        "model": "gpt-4o-mini",
+        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+    },
+]
+
+
+class _OpenAIStubHandler(BaseHTTPRequestHandler):
+    def do_POST(self):
+        length = int(self.headers.get("Content-Length", 0))
+        payload = json.loads(self.rfile.read(length) or b"{}")
+        if payload.get("stream"):
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.end_headers()
+            for chunk in _STREAM_CHUNKS:
+                self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode())
+            self.wfile.write(b"data: [DONE]\n\n")
+            return
+        body = json.dumps(_CHAT_RESPONSE).encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def log_message(self, *args):  # silence the default stderr logging
+        pass
+
+
+@pytest.fixture
+def openai_stub_server():
+    server = HTTPServer(("127.0.0.1", 0), _OpenAIStubHandler)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    host, port = server.server_address
+    try:
+        yield f"http://{host}:{port}/v1"
+    finally:
+        server.shutdown()
+        thread.join()
+        server.server_close()
+
+
+@pytest.fixture
+def instrument_httpx(tracer_provider):
+    HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider)
+    yield
+    HTTPXClientInstrumentor().uninstrument()
+
+
+def _http_spans(span_exporter):
+    return [
+        span
+        for span in span_exporter.get_finished_spans()
+        if span.attributes.get("http.request.method")
+        or span.attributes.get("http.method")
+    ]
+
+
+def test_chat_does_not_emit_duplicate_httpx_span(
+    instrument_legacy, instrument_httpx, span_exporter, openai_stub_server
+):
+    # No api_key: the autouse `environment` fixture sets OPENAI_API_KEY, matching
+    # the repo's existing local-server client fixtures (e.g. vllm_openai_client).
+    client = OpenAI(base_url=openai_stub_server)
+
+    client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "say hello only"}],
+    )
+
+    span_names = [span.name for span in span_exporter.get_finished_spans()]
+    assert span_names == ["openai.chat"]
+    assert _http_spans(span_exporter) == []
+
+
+def test_chat_streaming_does_not_emit_duplicate_httpx_span(
+    instrument_legacy, instrument_httpx, span_exporter, openai_stub_server
+):
+    # No api_key: the autouse `environment` fixture sets OPENAI_API_KEY, matching
+    # the repo's existing local-server client fixtures (e.g. vllm_openai_client).
+    client = OpenAI(base_url=openai_stub_server)
+
+    stream = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "say hello only"}],
+        stream=True,
+    )
+    for _ in stream:
+        pass
+
+    span_names = [span.name for span in span_exporter.get_finished_spans()]
+    assert span_names == ["openai.chat"]
+    assert _http_spans(span_exporter) == []
diff --git a/packages/opentelemetry-instrumentation-openai/uv.lock b/packages/opentelemetry-instrumentation-openai/uv.lock