Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions helm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,14 @@ This table documents all available configuration values for the Production Stack
| `routerSpec.readinessProbe.failureThreshold` | integer |`3`| Failure threshold for router's readiness probe |
| `routerSpec.readinessProbe.httpGet.path` | string |`"/health"`| Endpoint that the router's readiness probe will be testing |

#### Router OpenTelemetry Configuration

| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `routerSpec.otel.endpoint` | string | `""` | OTLP endpoint for tracing (e.g., "otel-collector:4317"). Tracing is enabled when this is set. |
| `routerSpec.otel.serviceName` | string | `"vllm-router"` | Service name for OpenTelemetry traces |
| `routerSpec.otel.secure` | boolean | `false` | Use secure (TLS) connection for OTLP exporter |

#### Router Ingress Configuration

| Field | Type | Default | Description |
Expand Down
9 changes: 9 additions & 0 deletions helm/templates/deployment-router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,15 @@ spec:
- "--lmcache-controller-port"
- "{{ .Values.routerSpec.lmcacheControllerPort }}"
{{- end }}
{{- if .Values.routerSpec.otel.endpoint }}
- "--otel-endpoint"
- "{{ .Values.routerSpec.otel.endpoint }}"
- "--otel-service-name"
- "{{ .Values.routerSpec.otel.serviceName | default "vllm-router" }}"
{{- if .Values.routerSpec.otel.secure }}
- "--otel-secure"
{{- end }}
{{- end }}
{{- if .Values.routerSpec.resources }}
resources:
{{- if .Values.routerSpec.resources.requests }}
Expand Down
77 changes: 77 additions & 0 deletions helm/tests/routerOtel_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
suite: test router OpenTelemetry configuration
templates:
- deployment-router.yaml
tests:
- it: should not include otel args when endpoint is not set
set:
routerSpec:
enableRouter: true
otel:
endpoint: ""
asserts:
- template: deployment-router.yaml
notContains:
path: spec.template.spec.containers[0].args
content: "--otel-endpoint"

- it: should include otel args when endpoint is set
set:
routerSpec:
enableRouter: true
otel:
endpoint: "otel-collector:4317"
serviceName: "vllm-router"
secure: false
asserts:
- template: deployment-router.yaml
contains:
path: spec.template.spec.containers[0].args
content: "--otel-endpoint"
- template: deployment-router.yaml
contains:
path: spec.template.spec.containers[0].args
content: "otel-collector:4317"
- template: deployment-router.yaml
contains:
path: spec.template.spec.containers[0].args
content: "--otel-service-name"
- template: deployment-router.yaml
contains:
path: spec.template.spec.containers[0].args
content: "vllm-router"
- template: deployment-router.yaml
notContains:
path: spec.template.spec.containers[0].args
content: "--otel-secure"

- it: should use custom service name when specified
set:
routerSpec:
enableRouter: true
otel:
endpoint: "jaeger:4317"
serviceName: "my-custom-router"
secure: false
asserts:
- template: deployment-router.yaml
contains:
path: spec.template.spec.containers[0].args
content: "my-custom-router"

- it: should include otel-secure flag when secure is true
set:
routerSpec:
enableRouter: true
otel:
endpoint: "otel-collector:4317"
serviceName: "vllm-router"
secure: true
asserts:
- template: deployment-router.yaml
contains:
path: spec.template.spec.containers[0].args
content: "--otel-endpoint"
- template: deployment-router.yaml
contains:
path: spec.template.spec.containers[0].args
content: "--otel-secure"
20 changes: 20 additions & 0 deletions helm/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,26 @@
"additionalProperties": {
"type": "string"
}
},
"otel": {
"type": "object",
"description": "OpenTelemetry tracing configuration for the router",
"properties": {
"endpoint": {
"type": "string",
"description": "OTLP endpoint for tracing (e.g., 'otel-collector:4317'). Tracing is enabled when this is set."
},
"serviceName": {
"type": "string",
"description": "Service name for OpenTelemetry traces",
"default": "vllm-router"
},
"secure": {
"type": "boolean",
"description": "Use secure (TLS) connection for OTLP exporter",
"default": false
}
}
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,16 @@ routerSpec:
# -- Window size in seconds to calculate the request statistics
requestStatsWindow: 60

# -- OpenTelemetry tracing configuration
# When otelEndpoint is set, tracing is automatically enabled
otel:
# -- OTLP endpoint for tracing (e.g., "localhost:4317" or "otel-collector:4317")
endpoint: ""
# -- Service name for traces (default: "vllm-router")
serviceName: "vllm-router"
# -- Use secure (TLS) connection for OTLP exporter (default: false, i.e., insecure)
secure: false

# -- deployment strategy
strategy: {}

Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ dependencies = [
"xxhash==3.5.0",
"psutil==7.0.0",
"pyyaml>=6.0.2",
"opentelemetry-api>=1.28.0",
"opentelemetry-sdk>=1.28.0",
"opentelemetry-exporter-otlp>=1.28.0",
]

[project.scripts]
Expand Down
81 changes: 81 additions & 0 deletions src/tests/test_otel_tracing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import pytest
from opentelemetry.trace import SpanKind

import vllm_router.experimental.otel.tracing as tracing_module
from vllm_router.experimental.otel.tracing import (
end_span,
extract_context,
initialize_tracing,
inject_context,
is_tracing_enabled,
shutdown_tracing,
start_span,
)


@pytest.fixture(autouse=True)
def reset_tracing_state():
"""Reset global tracing state before each test."""
tracing_module._tracer = None
tracing_module._provider = None
tracing_module._tracing_enabled = False
yield
# Cleanup after test
if tracing_module._tracing_enabled:
shutdown_tracing()


class TestTracingIntegration:
def test_full_request_flow(self):
"""Test a complete request tracing flow."""
initialize_tracing(service_name="vllm-router", otlp_endpoint="localhost:4317")

# Simulate incoming request with trace context
incoming_headers = {
"traceparent": "00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01",
}
incoming_context = extract_context(incoming_headers)

# Create parent span (router)
parent_span, parent_context = start_span(
name="router /v1/chat/completions",
parent_context=incoming_context,
kind=SpanKind.SERVER,
attributes={
"http.method": "POST",
"vllm.model": "Qwen/Qwen2.5-7B-Instruct",
},
)

# Create child span (backend request)
child_span, child_context = start_span(
name="backend_request",
parent_context=parent_context,
kind=SpanKind.CLIENT,
attributes={
"http.url": "http://backend:8000/v1/chat/completions",
},
)

# Inject context into outgoing headers
outgoing_headers = {}
inject_context(outgoing_headers, child_context)

assert "traceparent" in outgoing_headers

# End spans in reverse order
end_span(child_span, status_code=200)
end_span(parent_span, status_code=200)

def test_tracing_disabled_flow(self):
"""Test that operations handle disabled tracing gracefully."""
assert is_tracing_enabled() is False

# These should not raise even when tracing is disabled
headers = {}
inject_context(headers)
end_span(None)


if __name__ == "__main__":
pytest.main([__file__, "-v"])
35 changes: 35 additions & 0 deletions src/vllm_router/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@
except ImportError:
semantic_cache_available = False

try:
# OpenTelemetry tracing integration
from vllm_router.experimental.otel import (
initialize_tracing,
is_tracing_enabled,
shutdown_tracing,
)

otel_available = True
except ImportError:
otel_available = False

logger = logging.getLogger("uvicorn")


Expand Down Expand Up @@ -121,6 +133,11 @@ async def lifespan(app: FastAPI):
logger.info("Closing routing logic instances")
cleanup_routing_logic()

# Shutdown OpenTelemetry tracing if enabled
if otel_available and app.state.otel_enabled:
logger.info("Shutting down OpenTelemetry tracing")
shutdown_tracing()


def initialize_all(app: FastAPI, args):
"""
Expand All @@ -142,6 +159,23 @@ def initialize_all(app: FastAPI, args):
profile_session_sample_rate=args.sentry_profile_session_sample_rate,
)

if otel_available and args.otel_endpoint:
initialize_tracing(
service_name=args.otel_service_name,
otlp_endpoint=args.otel_endpoint,
insecure=not args.otel_secure,
)
app.state.otel_enabled = is_tracing_enabled()
if app.state.otel_enabled:
logger.info(
f"OpenTelemetry tracing enabled, exporting to {args.otel_endpoint}"
)
elif args.otel_endpoint and not otel_available:
logger.warning(
"OpenTelemetry endpoint specified but OpenTelemetry packages not installed. "
"Install with: pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp"
)

if args.service_discovery == "static":
initialize_service_discovery(
ServiceDiscoveryType.STATIC,
Expand Down Expand Up @@ -292,6 +326,7 @@ def initialize_all(app: FastAPI, args):
app.include_router(metrics_router)
app.state.aiohttp_client_wrapper = AiohttpClientWrapper()
app.state.semantic_cache_available = semantic_cache_available
app.state.otel_enabled = False


def main():
Expand Down
37 changes: 37 additions & 0 deletions src/vllm_router/experimental/otel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2024-2025 The vLLM Production Stack Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""OpenTelemetry tracing module for vLLM Router."""

from vllm_router.experimental.otel.tracing import (
end_span,
extract_context,
get_tracer,
initialize_tracing,
inject_context,
is_tracing_enabled,
shutdown_tracing,
start_span,
)

__all__ = [
"initialize_tracing",
"shutdown_tracing",
"get_tracer",
"is_tracing_enabled",
"extract_context",
"inject_context",
"start_span",
"end_span",
]
Loading