Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@
should_emit_events,
should_send_prompts,
)
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
from opentelemetry.instrumentation.utils import (
_SUPPRESS_INSTRUMENTATION_KEY,
suppress_http_instrumentation,
)
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
from opentelemetry.semconv._incubating.attributes import (
Expand Down Expand Up @@ -99,7 +102,9 @@ def chat_wrapper(
run_async(_handle_request(span, kwargs, instance))
try:
start_time = time.time()
response = wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = wrapped(*args, **kwargs)
end_time = time.time()
except Exception as e: # pylint: disable=broad-except
end_time = time.time()
Expand Down Expand Up @@ -198,7 +203,9 @@ async def achat_wrapper(

try:
start_time = time.time()
response = await wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = await wrapped(*args, **kwargs)
end_time = time.time()
except Exception as e: # pylint: disable=broad-except
end_time = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
should_emit_events,
should_send_prompts,
)
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
from opentelemetry.instrumentation.utils import (
_SUPPRESS_INSTRUMENTATION_KEY,
suppress_http_instrumentation,
)
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
Expand Down Expand Up @@ -67,7 +70,9 @@ def completion_wrapper(tracer, wrapped, instance, args, kwargs):
_handle_request(span, kwargs, instance)

try:
response = wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = wrapped(*args, **kwargs)
except Exception as e:
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
span.record_exception(e)
Expand Down Expand Up @@ -103,7 +108,9 @@ async def acompletion_wrapper(tracer, wrapped, instance, args, kwargs):
_handle_request(span, kwargs, instance)

try:
response = await wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = await wrapped(*args, **kwargs)
except Exception as e:
span.set_attribute(ERROR_TYPE, e.__class__.__name__)
span.record_exception(e)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@
should_send_prompts,
start_as_current_span_async,
)
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
from opentelemetry.instrumentation.utils import (
_SUPPRESS_INSTRUMENTATION_KEY,
suppress_http_instrumentation,
)
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
from opentelemetry.semconv._incubating.attributes import (
Expand Down Expand Up @@ -80,7 +83,9 @@ def embeddings_wrapper(
try:
# record time for duration
start_time = time.time()
response = wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = wrapped(*args, **kwargs)
end_time = time.time()
except Exception as e: # pylint: disable=broad-except
end_time = time.time()
Expand Down Expand Up @@ -145,7 +150,9 @@ async def aembeddings_wrapper(
try:
# record time for duration
start_time = time.time()
response = await wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = await wrapped(*args, **kwargs)
end_time = time.time()
except Exception as e: # pylint: disable=broad-except
end_time = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
from opentelemetry.instrumentation.openai.utils import (
_with_image_gen_metric_wrapper,
)
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
from opentelemetry.instrumentation.utils import (
_SUPPRESS_INSTRUMENTATION_KEY,
suppress_http_instrumentation,
)
from opentelemetry.metrics import Counter, Histogram
from opentelemetry.semconv_ai import SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY

Expand All @@ -32,7 +35,9 @@ def image_gen_metrics_wrapper(
try:
# record time for duration
start_time = time.time()
response = wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = wrapped(*args, **kwargs)
end_time = time.time()
except Exception as e: # pylint: disable=broad-except
end_time = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
from openai._legacy_response import LegacyAPIResponse
from openai._response import APIResponse, AsyncAPIResponse
from opentelemetry import context as context_api
from opentelemetry.instrumentation.utils import _SUPPRESS_INSTRUMENTATION_KEY
from opentelemetry.instrumentation.utils import (
_SUPPRESS_INSTRUMENTATION_KEY,
suppress_http_instrumentation,
)
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
openai_attributes as OpenAIAttributes,
Expand Down Expand Up @@ -491,7 +494,9 @@ def responses_get_or_create_wrapper(tracer: Tracer, wrapped, instance, args, kwa
non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)

try:
response = wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = wrapped(*args, **kwargs)
if isinstance(response, Stream):
# Capture current trace context to maintain trace continuity
ctx = context_api.get_current()
Expand Down Expand Up @@ -662,7 +667,9 @@ async def async_responses_get_or_create_wrapper(
non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)

try:
response = await wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = await wrapped(*args, **kwargs)
if isinstance(response, (Stream, AsyncStream)):
# Capture current trace context to maintain trace continuity
ctx = context_api.get_current()
Expand Down Expand Up @@ -825,7 +832,9 @@ def responses_cancel_wrapper(tracer: Tracer, wrapped, instance, args, kwargs):

non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)

response = wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = wrapped(*args, **kwargs)
if isinstance(response, Stream):
return response
parsed_response = parse_response(response)
Expand Down Expand Up @@ -857,7 +866,9 @@ async def async_responses_cancel_wrapper(

non_sentinel_kwargs = _sanitize_sentinel_values(kwargs)

response = await wrapped(*args, **kwargs)
# Suppress the HTTP-client span (e.g. httpx) so the request isn't double-traced (#2845).
with suppress_http_instrumentation():
response = await wrapped(*args, **kwargs)
if isinstance(response, (Stream, AsyncStream)):
return response
parsed_response = await async_parse_response(response)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ dev = [
]
test = [
"openai[datalib]==1.99.7",
"opentelemetry-instrumentation-httpx>=0.63b1",
"opentelemetry-sdk>=1.38.0,<2",
"pytest-asyncio>=0.23.7,<0.24.0",
"pytest-recording>=0.13.1,<0.14.0",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
"""Regression tests for https://github.com/traceloop/openllmetry/issues/2845

When the user instruments the underlying HTTP client (e.g. httpx) in addition
to OpenAI, every OpenAI call used to be traced twice: once by this
instrumentation (``openai.chat``) and once by the HTTP instrumentation (a raw
``POST`` span). The OpenAI instrumentation now suppresses HTTP-client
instrumentation around the request, so only the ``openai.chat`` span is emitted.

These tests hit a local HTTP server over a real socket so the httpx
instrumentation actually runs (VCR replay would bypass the transport it wraps).
"""

import json
import threading
from http.server import BaseHTTPRequestHandler, HTTPServer

import pytest
from openai import OpenAI
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor

_CHAT_RESPONSE = {
"id": "chatcmpl-test",
"object": "chat.completion",
"created": 1700000000,
"model": "gpt-4o-mini",
"choices": [
{
"index": 0,
"message": {"role": "assistant", "content": "hello"},
"finish_reason": "stop",
}
],
"usage": {"prompt_tokens": 5, "completion_tokens": 1, "total_tokens": 6},
}

_STREAM_CHUNKS = [
{
"id": "chatcmpl-test",
"object": "chat.completion.chunk",
"created": 1700000000,
"model": "gpt-4o-mini",
"choices": [
{"index": 0, "delta": {"role": "assistant", "content": "hello"},
"finish_reason": None}
],
},
{
"id": "chatcmpl-test",
"object": "chat.completion.chunk",
"created": 1700000000,
"model": "gpt-4o-mini",
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
},
]


class _OpenAIStubHandler(BaseHTTPRequestHandler):
def do_POST(self):
length = int(self.headers.get("Content-Length", 0))
payload = json.loads(self.rfile.read(length) or b"{}")
if payload.get("stream"):
self.send_response(200)
self.send_header("Content-Type", "text/event-stream")
self.end_headers()
for chunk in _STREAM_CHUNKS:
self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode())
self.wfile.write(b"data: [DONE]\n\n")
return
body = json.dumps(_CHAT_RESPONSE).encode()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)

def log_message(self, *args): # silence the default stderr logging
pass


@pytest.fixture
def openai_stub_server():
server = HTTPServer(("127.0.0.1", 0), _OpenAIStubHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
host, port = server.server_address
try:
yield f"http://{host}:{port}/v1"
finally:
server.shutdown()
thread.join()
Comment thread
coderabbitai[bot] marked this conversation as resolved.
server.server_close()


@pytest.fixture
def instrument_httpx(tracer_provider):
HTTPXClientInstrumentor().instrument(tracer_provider=tracer_provider)
yield
HTTPXClientInstrumentor().uninstrument()


def _http_spans(span_exporter):
return [
span
for span in span_exporter.get_finished_spans()
if span.attributes.get("http.request.method")
or span.attributes.get("http.method")
]


def test_chat_does_not_emit_duplicate_httpx_span(
instrument_legacy, instrument_httpx, span_exporter, openai_stub_server
):
# No api_key: the autouse `environment` fixture sets OPENAI_API_KEY, matching
# the repo's existing local-server client fixtures (e.g. vllm_openai_client).
client = OpenAI(base_url=openai_stub_server)

client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "say hello only"}],
)

span_names = [span.name for span in span_exporter.get_finished_spans()]
assert span_names == ["openai.chat"]
assert _http_spans(span_exporter) == []


def test_chat_streaming_does_not_emit_duplicate_httpx_span(
instrument_legacy, instrument_httpx, span_exporter, openai_stub_server
):
# No api_key: the autouse `environment` fixture sets OPENAI_API_KEY, matching
# the repo's existing local-server client fixtures (e.g. vllm_openai_client).
client = OpenAI(base_url=openai_stub_server)

stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "say hello only"}],
stream=True,
)
for _ in stream:
pass

span_names = [span.name for span in span_exporter.get_finished_spans()]
assert span_names == ["openai.chat"]
assert _http_spans(span_exporter) == []
27 changes: 27 additions & 0 deletions packages/opentelemetry-instrumentation-openai/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.