feat: Use instrumentation setup from framework and add metrics

g3force · g3force · commit b53e88438f78 · 2026-03-06T09:01:41.000+01:00
diff --git a/msaf/README.md b/msaf/README.md
@@ -8,13 +8,14 @@ This package provides utilities to convert a [Microsoft Agent Framework](https:/
 
 ```python
 from agent_framework import Agent
+from agenticlayer.msaf import create_metrics_middleware, create_openai_client
 from agenticlayer.msaf.agent_to_a2a import to_a2a
-from agenticlayer.msaf.client import create_openai_client
 
 agent = Agent(
     client=create_openai_client(),
     name="MyAgent",
     instructions="You are a helpful assistant.",
+    middleware=create_metrics_middleware(),
 )
 app = to_a2a(agent, name="MyAgent", rpc_url="http://localhost:8000/")
 # Then run with: uvicorn module:app
@@ -35,3 +36,62 @@ such as [LiteLLM proxy](https://docs.litellm.ai/docs/proxy/quick_start):
 
 `create_openai_client()` reads these variables automatically and passes them to
 `OpenAIChatClient` as `base_url` and `api_key`.
+
+## Observability
+
+### OpenTelemetry setup
+
+Call `setup_otel()` before creating agents to configure OTLP exporters and enable instrumentation:
+
+```python
+from agenticlayer.msaf.otel import setup_otel
+
+setup_otel()
+```
+
+This reads standard `OTEL_EXPORTER_OTLP_ENDPOINT` / `OTEL_EXPORTER_OTLP_PROTOCOL` environment
+variables, sets up trace/log/metric providers, and enables the built-in Agent Framework telemetry
+layers.
+
+### Metrics
+
+The SDK emits the following OpenTelemetry metrics:
+
+**Built-in** (provided by Agent Framework telemetry layers, enabled by `setup_otel()`):
+
+| Metric | Type | Description |
+|---|---|---|
+| `gen_ai.client.token.usage` | Histogram | Input and output token counts per LLM call |
+| `gen_ai.client.operation.duration` | Histogram | Duration of LLM / agent operations |
+
+**Custom** (provided by `create_metrics_middleware()`, must be added to the agent):
+
+| Metric | Type | Description |
+|---|---|---|
+| `agent.invocations` | Counter | Number of agent invocations |
+| `agent.llm.calls` | Counter | Number of LLM calls |
+| `agent.tool.calls` | Counter | Number of tool calls |
+| `agent.errors` | Counter | Number of errors (with `error_source` attribute) |
+
+Add the metrics middleware to your agent:
+
+```python
+from agent_framework import Agent
+from agenticlayer.msaf import create_metrics_middleware, create_openai_client
+
+agent = Agent(
+    client=create_openai_client(),
+    instructions="You are a helpful assistant.",
+    middleware=create_metrics_middleware(),
+)
+```
+
+If you already have other middleware, combine them:
+
+```python
+agent = Agent(
+    client=create_openai_client(),
+    instructions="You are a helpful assistant.",
+    middleware=[MyCustomMiddleware(), *create_metrics_middleware()],
+)
+```
diff --git a/msaf/agenticlayer/msaf/__init__.py b/msaf/agenticlayer/msaf/__init__.py
@@ -5,5 +5,6 @@
 """
 
 from agenticlayer.msaf.client import create_openai_client
+from agenticlayer.msaf.metrics_middleware import create_metrics_middleware
 
-__all__ = ["create_openai_client"]
+__all__ = ["create_openai_client", "create_metrics_middleware"]
diff --git a/msaf/agenticlayer/msaf/metrics_middleware.py b/msaf/agenticlayer/msaf/metrics_middleware.py
@@ -0,0 +1,114 @@
+"""
+Middleware that records agent metrics using OpenTelemetry.
+Tracks agent invocations, LLM calls, tool calls, and errors.
+
+Token usage and operation duration are already provided by the built-in
+``agent_framework.observability`` telemetry layers and do not need to be
+duplicated here.
+"""
+
+from collections.abc import Awaitable, Callable
+from typing import Any
+
+from agent_framework import (
+    AgentContext,
+    AgentMiddleware,
+    ChatContext,
+    ChatMiddleware,
+    FunctionInvocationContext,
+    FunctionMiddleware,
+    MiddlewareTypes,
+)
+from opentelemetry import metrics
+
+_meter = metrics.get_meter("agenticlayer.agent")
+
+_agent_invocations = _meter.create_counter(
+    "agent.invocations",
+    unit="{invocation}",
+    description="Number of agent invocations",
+)
+_llm_calls = _meter.create_counter(
+    "agent.llm.calls",
+    unit="{call}",
+    description="Number of LLM calls",
+)
+_tool_calls = _meter.create_counter(
+    "agent.tool.calls",
+    unit="{call}",
+    description="Number of tool calls",
+)
+_agent_errors = _meter.create_counter(
+    "agent.errors",
+    unit="{error}",
+    description="Number of agent errors",
+)
+
+
+class AgentInvocationMetrics(AgentMiddleware):
+    """Counts agent invocations and errors."""
+
+    async def process(
+        self,
+        context: AgentContext,
+        call_next: Callable[[], Awaitable[None]],
+    ) -> None:
+        agent_name = getattr(context.agent, "name", None) or "unknown"
+        _agent_invocations.add(1, {"agent_name": agent_name})
+        try:
+            await call_next()
+        except Exception:
+            _agent_errors.add(1, {"agent_name": agent_name, "error_source": "agent"})
+            raise
+
+
+class LlmCallMetrics(ChatMiddleware):
+    """Counts LLM / chat-client calls and records model-level errors."""
+
+    async def process(
+        self,
+        context: ChatContext,
+        call_next: Callable[[], Awaitable[None]],
+    ) -> None:
+        options = context.options or {}
+        model: str = options.get("model_id") or getattr(context.client, "model_id", None) or "unknown"
+        attrs: dict[str, Any] = {"model": model}
+        _llm_calls.add(1, attrs)
+        try:
+            await call_next()
+        except Exception:
+            _agent_errors.add(1, {**attrs, "error_source": "model"})
+            raise
+
+
+class ToolCallMetrics(FunctionMiddleware):
+    """Counts tool / function calls and records tool-level errors."""
+
+    async def process(
+        self,
+        context: FunctionInvocationContext,
+        call_next: Callable[[], Awaitable[None]],
+    ) -> None:
+        tool_name = getattr(context.function, "name", None) or "unknown"
+        _tool_calls.add(1, {"tool_name": tool_name})
+        try:
+            await call_next()
+        except Exception:
+            _agent_errors.add(1, {"tool_name": tool_name, "error_source": "tool"})
+            raise
+
+
+def create_metrics_middleware() -> list[MiddlewareTypes]:
+    """Return the full set of metrics middleware ready to pass to an Agent.
+
+    Example::
+
+        from agent_framework import Agent
+        from agenticlayer.msaf.metrics_middleware import create_metrics_middleware
+
+        agent = Agent(
+            client=client,
+            middleware=create_metrics_middleware(),
+        )
+    """
+    return [AgentInvocationMetrics(), LlmCallMetrics(), ToolCallMetrics()]
diff --git a/msaf/agenticlayer/msaf/otel.py b/msaf/agenticlayer/msaf/otel.py
@@ -1,10 +1,37 @@
 """OpenTelemetry setup for a Microsoft Agent Framework Agent App."""
 
-from agenticlayer.shared.otel import setup_otel as _setup_otel_shared
+import logging
+
+from agent_framework.observability import configure_otel_providers
+from agenticlayer.shared.otel import request_hook, response_hook
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
 
 __all__ = ["setup_otel"]
 
+_logger = logging.getLogger(__name__)
+
 
 def setup_otel() -> None:
-    """Set up OpenTelemetry tracing, logging and metrics for a Microsoft Agent Framework agent."""
-    _setup_otel_shared()
+    """Set up OpenTelemetry tracing, logging and metrics for a Microsoft Agent Framework agent.
+
+    Uses the built-in ``agent_framework`` OTLP provider setup (reads standard
+    ``OTEL_EXPORTER_OTLP_*`` environment variables) and enables the telemetry
+    layers that emit ``gen_ai.client.token.usage`` and
+    ``gen_ai.client.operation.duration`` metrics.
+
+    Additionally instruments HTTPX clients so outgoing HTTP calls (to
+    sub-agents, MCP servers, LLM gateways) are traced with debug-level
+    request/response body logging.
+
+    Starlette server instrumentation is handled separately by
+    :func:`agenticlayer.shared.otel_starlette.instrument_starlette_app`.
+    """
+    # Set log level for urllib to WARNING to reduce noise
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+
+    configure_otel_providers()
+
+    HTTPXClientInstrumentor().instrument(
+        request_hook=request_hook,
+        response_hook=response_hook,
+    )