diff --git a/CHANGELOG.md b/CHANGELOG.md index 6732273..a18ed6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,31 @@ All notable changes to the AxonFlow Python SDK will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.8.0] - 2026-03-03 + +### Added + +- `health_check_detailed()` method (async + sync) returning `HealthResponse` with platform version, capabilities, and SDK compatibility info +- `has_capability(name)` method on `HealthResponse` to check if platform supports a specific feature +- User-Agent header (`axonflow-sdk-python/{version}`) sent on all HTTP requests +- Version mismatch warning logged when SDK version is below platform's `min_sdk_version` +- `PlatformCapability`, `SDKCompatibility`, `HealthResponse` dataclasses +- `trace_id` field on `CreateWorkflowRequest`, `CreateWorkflowResponse`, `WorkflowStatusResponse`, and `ListWorkflowsOptions` for distributed tracing correlation +- `ToolContext` dataclass for per-tool governance within workflow steps +- `tool_context` field on `StepGateRequest` for tool-level policy enforcement +- `check_tool_gate()` method on LangGraph adapter for per-tool governance gate checks +- `tool_completed()` method on LangGraph adapter for per-tool step completion +- `list_workflows()` now supports `trace_id` filter parameter +- Anonymous runtime telemetry for version adoption tracking and feature usage signals +- `TelemetryEnabled` / `telemetry` configuration option to explicitly control telemetry +- `AXONFLOW_TELEMETRY=off` and `DO_NOT_TRACK=1` environment variable opt-out support + +### Fixed + +- `__version__` corrected from `3.6.0` to `3.8.0` + +--- + ## [3.7.0] - 2026-02-28 ### Added diff --git a/README.md b/README.md index 700478a..c194a5c 100644 --- a/README.md +++ b/README.md @@ -418,6 +418,12 @@ If you are evaluating AxonFlow in a company setting and cannot open a public iss No email required. Optional contact if you want a response. +## Telemetry + +This SDK sends anonymous usage telemetry (SDK version, OS, enabled features) to help improve AxonFlow. +No prompts, payloads, or PII are ever collected. Opt out: `AXONFLOW_TELEMETRY=off` or `DO_NOT_TRACK=1`. +See [Telemetry Documentation](https://docs.getaxonflow.com/docs/telemetry) for full details. + ## License MIT - See [LICENSE](LICENSE) for details. diff --git a/axonflow/__init__.py b/axonflow/__init__.py index 2c825aa..6f8deeb 100644 --- a/axonflow/__init__.py +++ b/axonflow/__init__.py @@ -25,7 +25,14 @@ >>> result = client.proxy_llm_call("user-token", "What is AI?", "chat") """ -from axonflow.client import AxonFlow, SyncAxonFlow +from axonflow._version import __version__ +from axonflow.client import ( + AxonFlow, + HealthResponse, + PlatformCapability, + SDKCompatibility, + SyncAxonFlow, +) from axonflow.code_governance import ( CodeFile, CodeGovernanceMetrics, @@ -207,17 +214,23 @@ StepGateRequest, StepGateResponse, StepType, + ToolContext, WorkflowSource, WorkflowStatus, WorkflowStatusResponse, WorkflowStepInfo, ) -__version__ = "3.6.0" __all__ = [ + # Version + "__version__", # Main client "AxonFlow", "SyncAxonFlow", + # Version discovery types + "PlatformCapability", + "SDKCompatibility", + "HealthResponse", # Configuration "Mode", "RetryConfig", @@ -368,6 +381,7 @@ "MarkStepCompletedRequest", "AbortWorkflowRequest", "PolicyMatch", + "ToolContext", # WCP Approval types (Feature 5) "ApproveStepResponse", "RejectStepResponse", diff --git a/axonflow/_version.py b/axonflow/_version.py new file mode 100644 index 0000000..487424a --- /dev/null +++ b/axonflow/_version.py @@ -0,0 +1,3 @@ +"""Single source of truth for the AxonFlow SDK version.""" + +__version__ = "3.8.0" diff --git a/axonflow/adapters/langgraph.py b/axonflow/adapters/langgraph.py index c8724be..6a088f2 100644 --- a/axonflow/adapters/langgraph.py +++ b/axonflow/adapters/langgraph.py @@ -42,6 +42,7 @@ MarkStepCompletedRequest, StepGateRequest, StepType, + ToolContext, WorkflowSource, ) @@ -132,6 +133,7 @@ async def start_workflow( self, total_steps: int | None = None, metadata: dict[str, Any] | None = None, + trace_id: str | None = None, ) -> str: """Register the workflow with AxonFlow. @@ -140,6 +142,7 @@ async def start_workflow( Args: total_steps: Total number of steps (if known) metadata: Additional workflow metadata + trace_id: External trace ID for correlation (Langsmith, Datadog, OTel) Returns: The assigned workflow ID @@ -147,7 +150,8 @@ async def start_workflow( Example: >>> workflow_id = await adapter.start_workflow( ... total_steps=5, - ... metadata={"customer_id": "cust-123"} + ... metadata={"customer_id": "cust-123"}, + ... trace_id="langsmith-run-abc123", ... ) """ request = CreateWorkflowRequest( @@ -155,6 +159,7 @@ async def start_workflow( source=self.source, total_steps=total_steps, metadata=metadata or {}, + trace_id=trace_id, ) response = await self.client.create_workflow(request) @@ -170,6 +175,7 @@ async def check_gate( step_input: dict[str, Any] | None = None, model: str | None = None, provider: str | None = None, + tool_context: ToolContext | None = None, ) -> bool: """Check if a step is allowed to proceed. @@ -182,6 +188,7 @@ async def check_gate( step_input: Input data for the step (for policy evaluation) model: LLM model being used provider: LLM provider being used + tool_context: Tool-level context for per-tool governance (tool_call steps) Returns: True if step is allowed, False if blocked (when auto_block=False) @@ -206,7 +213,8 @@ async def check_gate( # Generate step ID if not provided if step_id is None: self._step_counter += 1 - step_id = f"step-{self._step_counter}-{step_name.lower().replace(' ', '-')}" + safe_name = step_name.lower().replace(" ", "-").replace("/", "-") + step_id = f"step-{self._step_counter}-{safe_name}" request = StepGateRequest( step_name=step_name, @@ -214,6 +222,7 @@ async def check_gate( step_input=step_input or {}, model=model, provider=provider, + tool_context=tool_context, ) response = await self.client.step_gate(self.workflow_id, step_id, request) @@ -247,6 +256,9 @@ async def step_completed( step_id: str | None = None, output: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None, + tokens_in: int | None = None, + tokens_out: int | None = None, + cost_usd: float | None = None, ) -> None: """Mark a step as completed. @@ -257,6 +269,9 @@ async def step_completed( step_id: Optional step ID (must match the one used in check_gate) output: Output data from the step metadata: Additional metadata + tokens_in: Input tokens consumed + tokens_out: Output tokens produced + cost_usd: Cost in USD Example: >>> await adapter.step_completed("generate", output={"code": result}) @@ -267,15 +282,120 @@ async def step_completed( # Generate step ID if not provided (must match check_gate) if step_id is None: - step_id = f"step-{self._step_counter}-{step_name.lower().replace(' ', '-')}" + safe_name = step_name.lower().replace(" ", "-").replace("/", "-") + step_id = f"step-{self._step_counter}-{safe_name}" request = MarkStepCompletedRequest( output=output or {}, metadata=metadata or {}, + tokens_in=tokens_in, + tokens_out=tokens_out, + cost_usd=cost_usd, ) await self.client.mark_step_completed(self.workflow_id, step_id, request) + async def check_tool_gate( + self, + tool_name: str, + tool_type: str | None = None, + *, + step_name: str | None = None, + step_id: str | None = None, + tool_input: dict[str, Any] | None = None, + model: str | None = None, + provider: str | None = None, + ) -> bool: + """Check if a specific tool invocation is allowed. + + Convenience wrapper around check_gate() that sets step_type=TOOL_CALL + and includes ToolContext for per-tool governance. + + Args: + tool_name: Name of the tool being invoked + tool_type: Tool type (function, mcp, api) + step_name: Step name (defaults to "tools/{tool_name}") + step_id: Optional step ID (auto-generated if not provided) + tool_input: Input arguments for the tool + model: LLM model being used + provider: LLM provider being used + + Returns: + True if tool invocation is allowed, False if blocked (when auto_block=False) + + Raises: + WorkflowBlockedError: If tool is blocked and auto_block=True + WorkflowApprovalRequiredError: If tool requires approval + ValueError: If workflow not started + + Example: + >>> if await adapter.check_tool_gate("web_search", "function", + ... tool_input={"query": "latest news"}): + ... result = await web_search(query="latest news") + ... await adapter.tool_completed("web_search", output={"results": result}) + """ + if step_name is None: + step_name = f"tools/{tool_name}" + + tool_context = ToolContext( + tool_name=tool_name, + tool_type=tool_type, + tool_input=tool_input or {}, + ) + + return await self.check_gate( + step_name=step_name, + step_type=StepType.TOOL_CALL, + step_id=step_id, + model=model, + provider=provider, + tool_context=tool_context, + ) + + async def tool_completed( + self, + tool_name: str, + *, + step_name: str | None = None, + step_id: str | None = None, + output: dict[str, Any] | None = None, + tokens_in: int | None = None, + tokens_out: int | None = None, + cost_usd: float | None = None, + ) -> None: + """Mark a tool invocation as completed. + + Convenience wrapper around step_completed() for tool-level tracking. + + Args: + tool_name: Name of the tool that was invoked + step_name: Step name (defaults to "tools/{tool_name}") + step_id: Optional step ID (must match the one used in check_tool_gate) + output: Output data from the tool + tokens_in: Input tokens consumed + tokens_out: Output tokens produced + cost_usd: Cost in USD + + Example: + >>> await adapter.tool_completed("web_search", + ... output={"results": search_results}, + ... tokens_in=150, + ... tokens_out=500, + ... cost_usd=0.002, + ... ) + """ + if step_name is None: + step_name = f"tools/{tool_name}" + + await self.step_completed( + step_name=step_name, + step_id=step_id, + output=output, + tokens_in=tokens_in, + tokens_out=tokens_out, + cost_usd=cost_usd, + ) + async def complete_workflow(self) -> None: """Mark the workflow as completed. diff --git a/axonflow/client.py b/axonflow/client.py index fc56bb8..60d6340 100644 --- a/axonflow/client.py +++ b/axonflow/client.py @@ -29,9 +29,11 @@ import contextlib import hashlib import json +import logging import os import re from collections.abc import AsyncIterator, Coroutine, Iterator +from dataclasses import dataclass from datetime import datetime from typing import TYPE_CHECKING, Any, TypeVar @@ -56,6 +58,7 @@ ) from axonflow import masfeat +from axonflow._version import __version__ as _SDK_VERSION from axonflow.code_governance import ( CodeGovernanceMetrics, ConfigureGitProviderRequest, @@ -119,6 +122,7 @@ UpdateDynamicPolicyRequest, UpdateStaticPolicyRequest, ) +from axonflow.telemetry import send_telemetry_ping from axonflow.types import ( AuditLogEntry, AuditQueryOptions, @@ -236,6 +240,51 @@ def normalize_fractional_seconds(match: re.Match[str]) -> str: T = TypeVar("T") +def _parse_version(v: str) -> tuple[int, ...]: + """Parse a semver string into a tuple of ints for correct numeric comparison.""" + parts: list[int] = [] + for part in v.split("."): + # Strip pre-release suffix (e.g., "0-beta" -> "0") + numeric = part.split("-")[0].split("+")[0] + try: + parts.append(int(numeric)) + except ValueError: + parts.append(0) + return tuple(parts) + + +@dataclass +class PlatformCapability: + """Describes a feature supported by the platform.""" + + name: str + since: str + description: str + + +@dataclass +class SDKCompatibility: + """SDK version compatibility information.""" + + min_sdk_version: str + recommended_sdk_version: str + + +@dataclass +class HealthResponse: + """Detailed health check response from the platform.""" + + status: str + service: str + version: str + capabilities: list[PlatformCapability] + sdk_compatibility: SDKCompatibility | None = None + + def has_capability(self, name: str) -> bool: + """Check if the platform supports a named capability.""" + return any(c.name == name for c in self.capabilities) + + class AxonFlow: """Main AxonFlow client for AI governance. @@ -264,6 +313,7 @@ def __init__( *, mode: Mode | str = Mode.PRODUCTION, debug: bool = False, + telemetry: bool | None = None, timeout: float = 60.0, map_timeout: float = 120.0, insecure_skip_verify: bool = False, @@ -280,6 +330,9 @@ def __init__( client_secret: Client secret (optional for community/self-hosted mode) mode: Operation mode (production or sandbox) debug: Enable debug logging + telemetry: Enable/disable anonymous telemetry. ``None`` uses mode default + (ON for production, OFF for sandbox). Set ``DO_NOT_TRACK=1`` or + ``AXONFLOW_TELEMETRY=off`` to opt out via environment. timeout: Request timeout in seconds map_timeout: Timeout for MAP operations in seconds (default: 120s) MAP operations involve multiple LLM calls and need longer timeouts @@ -323,6 +376,7 @@ def __init__( # Build headers headers: dict[str, str] = { "Content-Type": "application/json", + "User-Agent": f"axonflow-sdk-python/{_SDK_VERSION}", } # Add authentication and tenant headers # client_id is always required for policy APIs (sets X-Tenant-ID) @@ -372,6 +426,15 @@ def __init__( endpoint=endpoint, ) + # Send telemetry ping (fire-and-forget). + send_telemetry_ping( + mode=self._config.mode.value, + endpoint=self._config.endpoint, + telemetry_enabled=telemetry, + has_credentials=bool(client_id and client_secret), + debug=debug, + ) + @property def masfeat(self) -> MASFEATNamespace: """MAS FEAT compliance methods namespace. @@ -660,6 +723,50 @@ async def health_check(self) -> bool: except AxonFlowError: return False + async def health_check_detailed(self) -> HealthResponse: + """Get detailed health info including capabilities and version. + + Returns: + HealthResponse with platform version, capabilities, and SDK compatibility. + + Raises: + AxonFlowError: If the health check request fails. + """ + response = await self._request("GET", "/health") + caps = [ + PlatformCapability( + name=c.get("name", ""), + since=c.get("since", ""), + description=c.get("description", ""), + ) + for c in response.get("capabilities", []) + ] + compat_data = response.get("sdk_compatibility") + compat = None + if compat_data: + compat = SDKCompatibility( + min_sdk_version=compat_data.get("min_sdk_version", ""), + recommended_sdk_version=compat_data.get("recommended_sdk_version", ""), + ) + health = HealthResponse( + status=response.get("status", "unknown"), + service=response.get("service", ""), + version=response.get("version", ""), + capabilities=caps, + sdk_compatibility=compat, + ) + if ( + compat + and compat.min_sdk_version + and _parse_version(_SDK_VERSION) < _parse_version(compat.min_sdk_version) + ): + logging.getLogger("axonflow").warning( + "SDK version %s is below minimum supported version %s. Please upgrade.", + _SDK_VERSION, + compat.min_sdk_version, + ) + return health + async def orchestrator_health_check(self) -> bool: """Check if AxonFlow Orchestrator is healthy. @@ -3344,6 +3451,8 @@ async def create_workflow( "total_steps": request.total_steps, "metadata": request.metadata, } + if request.trace_id: + body["trace_id"] = request.trace_id if self._config.debug: self._logger.debug("Creating workflow", workflow_name=request.workflow_name) @@ -3359,6 +3468,7 @@ async def create_workflow( source=WorkflowSource(response["source"]), status=WorkflowStatus(response["status"]), created_at=_parse_datetime(response["created_at"]), + trace_id=response.get("trace_id"), ) async def get_workflow(self, workflow_id: str) -> WorkflowStatusResponse: @@ -3422,6 +3532,14 @@ async def step_gate( "model": request.model, "provider": request.provider, } + if request.tool_context: + tc: dict[str, Any] = { + "tool_name": request.tool_context.tool_name, + "tool_input": request.tool_context.tool_input, + } + if request.tool_context.tool_type is not None: + tc["tool_type"] = request.tool_context.tool_type + body["tool_context"] = tc if self._config.debug: self._logger.debug( @@ -3606,6 +3724,8 @@ async def list_workflows( params.append(f"status={options.status.value}") if options.source: params.append(f"source={options.source.value}") + if options.trace_id: + params.append(f"trace_id={options.trace_id}") if options.limit: params.append(f"limit={options.limit}") if options.offset: @@ -5155,6 +5275,7 @@ def _map_workflow_response(self, data: dict[str, Any]) -> WorkflowStatusResponse completed_at=( _parse_datetime(data["completed_at"]) if data.get("completed_at") else None ), + trace_id=data.get("trace_id"), steps=steps, ) @@ -5852,6 +5973,10 @@ def health_check(self) -> bool: """Check if AxonFlow Agent is healthy.""" return self._run_sync(self._async_client.health_check()) + def health_check_detailed(self) -> HealthResponse: + """Get detailed health info including capabilities and version.""" + return self._run_sync(self._async_client.health_check_detailed()) + def orchestrator_health_check(self) -> bool: """Check if AxonFlow Orchestrator is healthy.""" return self._run_sync(self._async_client.orchestrator_health_check()) diff --git a/axonflow/telemetry.py b/axonflow/telemetry.py new file mode 100644 index 0000000..1d678bd --- /dev/null +++ b/axonflow/telemetry.py @@ -0,0 +1,132 @@ +"""SDK telemetry: fire-and-forget checkpoint ping on client init. + +Collects anonymous, non-PII usage data (SDK version, OS, architecture) and +sends it to the AxonFlow checkpoint service. The response may include the +latest available SDK version so we can warn about outdated installs. + +Opt-out: + Set ``DO_NOT_TRACK=1`` or ``AXONFLOW_TELEMETRY=off`` in your environment. + +Override endpoint: + Set ``AXONFLOW_CHECKPOINT_URL`` to a custom URL. +""" + +from __future__ import annotations + +import logging +import os +import platform +import threading +import uuid + +import httpx + +from axonflow._version import __version__ as _SDK_VERSION + +logger = logging.getLogger(__name__) + +_DEFAULT_CHECKPOINT_URL = "https://checkpoint.getaxonflow.com/v1/ping" +_TIMEOUT_SECONDS = 3 +_HTTP_OK = 200 + + +def _is_telemetry_enabled( + mode: str, + telemetry_enabled: bool | None, + has_credentials: bool, # noqa: ARG001 kept for API compat +) -> bool: + """Determine whether telemetry should fire. + + Priority (highest to lowest): + 1. ``DO_NOT_TRACK=1`` environment variable -> disabled + 2. ``AXONFLOW_TELEMETRY=off`` environment variable -> disabled + 3. Explicit config value (``telemetry_enabled``) -> use that + 4. Default: ON for all modes except sandbox + """ + # Environment-level opt-out always wins. + if os.environ.get("DO_NOT_TRACK", "").strip() == "1": + return False + if os.environ.get("AXONFLOW_TELEMETRY", "").strip().lower() == "off": + return False + + # Explicit config override. + if telemetry_enabled is not None: + return telemetry_enabled + + # Default: ON everywhere except sandbox mode. + return mode != "sandbox" + + +def _build_payload(mode: str) -> dict[str, object]: + """Build the JSON payload for the checkpoint ping.""" + return { + "sdk": "python", + "sdk_version": _SDK_VERSION, + "platform_version": None, + "os": platform.system(), + "arch": platform.machine(), + "runtime_version": platform.python_version(), + "deployment_mode": mode, + "features": [], + "instance_id": str(uuid.uuid4()), + } + + +def _do_ping(url: str, payload: dict[str, object], debug: bool) -> None: + """Execute the HTTP POST (runs inside a daemon thread).""" + try: + resp = httpx.post(url, json=payload, timeout=_TIMEOUT_SECONDS) + if resp.status_code == _HTTP_OK: + try: + body = resp.json() + except (ValueError, KeyError): + return + latest = body.get("latest_version") + if latest and latest != _SDK_VERSION: + logger.warning( + "A newer AxonFlow Python SDK is available: %s (current: %s). " + "Upgrade with: pip install --upgrade axonflow", + latest, + _SDK_VERSION, + ) + if debug: + logger.debug("Telemetry ping successful: %s", body) + except (httpx.HTTPError, OSError, ValueError): + # Silent failure -- never disrupt the caller. + if debug: + logger.debug("Telemetry ping failed (non-fatal)", exc_info=True) + + +def send_telemetry_ping( + mode: str, + endpoint: str, # noqa: ARG001 kept for future platform_version detection + telemetry_enabled: bool | None, + has_credentials: bool = False, + debug: bool = False, +) -> None: + """Fire-and-forget telemetry ping. Runs in a daemon thread. + + Args: + mode: SDK operation mode (``"production"`` or ``"sandbox"``). + endpoint: The AxonFlow agent endpoint (reserved for future + platform_version detection). + telemetry_enabled: Explicit config override. ``None`` means use the + mode-based default. + has_credentials: Whether the client was initialized with credentials + (clientId + clientSecret). Used to distinguish managed cloud from + self-hosted/community deployments for the default behavior. + debug: When ``True``, log debug-level messages about the ping. + """ + if not _is_telemetry_enabled(mode, telemetry_enabled, has_credentials): + return + + logger.info( + "AxonFlow: anonymous telemetry enabled. " + "Opt out: AXONFLOW_TELEMETRY=off | https://docs.getaxonflow.com/telemetry" + ) + + url = os.environ.get("AXONFLOW_CHECKPOINT_URL", "").strip() or _DEFAULT_CHECKPOINT_URL + payload = _build_payload(mode) + + t = threading.Thread(target=_do_ping, args=(url, payload, debug), daemon=True) + t.start() diff --git a/axonflow/types.py b/axonflow/types.py index b97e0a2..0e02962 100644 --- a/axonflow/types.py +++ b/axonflow/types.py @@ -70,6 +70,10 @@ class AxonFlowConfig(BaseModel): client_secret: str | None = Field(default=None, description="Client secret (optional)") mode: Mode = Field(default=Mode.PRODUCTION, description="Operation mode") debug: bool = Field(default=False, description="Enable debug logging") + telemetry: bool | None = Field( + default=None, + description="Enable/disable anonymous telemetry (None = mode default)", + ) timeout: float = Field(default=60.0, gt=0, description="Request timeout (seconds)") map_timeout: float = Field(default=120.0, gt=0, description="MAP operations timeout (seconds)") insecure_skip_verify: bool = Field(default=False, description="Skip TLS verify") diff --git a/axonflow/workflow.py b/axonflow/workflow.py index 70ff19a..ab49623 100644 --- a/axonflow/workflow.py +++ b/axonflow/workflow.py @@ -81,6 +81,10 @@ class CreateWorkflowRequest(BaseModel): metadata: dict[str, Any] = Field( default_factory=dict, description="Additional metadata for the workflow" ) + trace_id: str | None = Field( + default=None, + description="External trace ID for correlation (Langsmith, Datadog, OTel)", + ) class CreateWorkflowResponse(BaseModel): @@ -91,6 +95,17 @@ class CreateWorkflowResponse(BaseModel): source: WorkflowSource = Field(..., description="Source orchestrator") status: WorkflowStatus = Field(..., description="Current status (always 'in_progress' for new)") created_at: datetime = Field(..., description="When the workflow was created") + trace_id: str | None = None + + +class ToolContext(BaseModel): + """Tool-level context for per-tool governance within tool_call steps.""" + + model_config = ConfigDict(frozen=True) + + tool_name: str + tool_type: str | None = Field(default=None, description="Tool type: function, mcp, api") + tool_input: dict[str, Any] = Field(default_factory=dict) class StepGateRequest(BaseModel): @@ -105,6 +120,7 @@ class StepGateRequest(BaseModel): ) model: str | None = Field(default=None, description="LLM model being used (if applicable)") provider: str | None = Field(default=None, description="LLM provider (if applicable)") + tool_context: ToolContext | None = None class StepGateResponse(BaseModel): @@ -178,6 +194,7 @@ class WorkflowStatusResponse(BaseModel): steps: list[WorkflowStepInfo] = Field( default_factory=list, description="List of steps in the workflow" ) + trace_id: str | None = None def is_terminal(self) -> bool: """Check if the workflow is in a terminal state (completed, aborted, or failed).""" @@ -191,6 +208,7 @@ class ListWorkflowsOptions(BaseModel): status: WorkflowStatus | None = Field(default=None, description="Filter by workflow status") source: WorkflowSource | None = Field(default=None, description="Filter by source") + trace_id: str | None = Field(default=None, description="Filter by external trace ID") limit: int = Field(default=50, ge=1, le=100, description="Maximum number of results to return") offset: int = Field(default=0, ge=0, description="Offset for pagination") diff --git a/pyproject.toml b/pyproject.toml index 85177cd..0b09102 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "axonflow" -version = "3.6.0" +version = "3.8.0" description = "AxonFlow Python SDK - Enterprise AI Governance in 3 Lines of Code" readme = "README.md" license = {text = "MIT"} diff --git a/tests/conftest.py b/tests/conftest.py index 7ee8488..03d26fd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ from __future__ import annotations import json +import os from pathlib import Path from typing import Any, AsyncGenerator @@ -15,6 +16,13 @@ from axonflow import AxonFlow + +@pytest.fixture(autouse=True) +def _disable_telemetry(monkeypatch: pytest.MonkeyPatch) -> None: + """Disable telemetry in all tests to prevent unexpected HTTP calls.""" + monkeypatch.setenv("DO_NOT_TRACK", "1") + + # ============================================================================ # Fixture Loading Utilities # ============================================================================ diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py new file mode 100644 index 0000000..9aff2cc --- /dev/null +++ b/tests/test_telemetry.py @@ -0,0 +1,340 @@ +"""Tests for the SDK telemetry module.""" + +from __future__ import annotations + +import json +import threading +import time +from typing import Any +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from axonflow.telemetry import ( + _DEFAULT_CHECKPOINT_URL, + _build_payload, + _is_telemetry_enabled, + send_telemetry_ping, +) + +# --------------------------------------------------------------------------- +# _is_telemetry_enabled tests +# --------------------------------------------------------------------------- + + +class TestIsTelemetryEnabled: + """Tests for the telemetry opt-in / opt-out logic.""" + + def test_disabled_by_env_do_not_track(self) -> None: + """DO_NOT_TRACK=1 disables telemetry regardless of other settings.""" + with patch.dict("os.environ", {"DO_NOT_TRACK": "1"}): + assert _is_telemetry_enabled("production", None, True) is False + assert _is_telemetry_enabled("production", True, True) is False + + def test_disabled_by_env_axonflow(self) -> None: + """AXONFLOW_TELEMETRY=off disables telemetry.""" + with patch.dict("os.environ", {"AXONFLOW_TELEMETRY": "off"}): + assert _is_telemetry_enabled("production", None, True) is False + + def test_disabled_by_env_axonflow_case_insensitive(self) -> None: + """AXONFLOW_TELEMETRY=OFF (uppercase) also disables.""" + with patch.dict("os.environ", {"AXONFLOW_TELEMETRY": "OFF"}): + assert _is_telemetry_enabled("production", None, True) is False + + def test_disabled_sandbox_mode(self) -> None: + """Default OFF for sandbox mode when no explicit config.""" + with patch.dict("os.environ", {}, clear=True): + assert _is_telemetry_enabled("sandbox", None, True) is False + + def test_enabled_production_with_credentials(self) -> None: + """Default ON for production mode with credentials.""" + with patch.dict("os.environ", {}, clear=True): + assert _is_telemetry_enabled("production", None, True) is True + + def test_enabled_production_without_credentials(self) -> None: + """Default ON for production mode even without credentials.""" + with patch.dict("os.environ", {}, clear=True): + assert _is_telemetry_enabled("production", None, False) is True + + def test_config_override_true(self) -> None: + """Explicit True enables even in sandbox mode.""" + with patch.dict("os.environ", {}, clear=True): + assert _is_telemetry_enabled("sandbox", True, False) is True + + def test_config_override_false(self) -> None: + """Explicit False disables even in production mode.""" + with patch.dict("os.environ", {}, clear=True): + assert _is_telemetry_enabled("production", False, True) is False + + def test_env_do_not_track_beats_config_true(self) -> None: + """Environment opt-out always wins over config=True.""" + with patch.dict("os.environ", {"DO_NOT_TRACK": "1"}): + assert _is_telemetry_enabled("production", True, True) is False + + def test_env_axonflow_telemetry_beats_config_true(self) -> None: + """AXONFLOW_TELEMETRY=off beats config=True.""" + with patch.dict("os.environ", {"AXONFLOW_TELEMETRY": "off"}): + assert _is_telemetry_enabled("production", True, True) is False + + +# --------------------------------------------------------------------------- +# _build_payload tests +# --------------------------------------------------------------------------- + + +class TestBuildPayload: + """Tests for payload construction.""" + + def test_payload_format(self) -> None: + """Verify all expected fields are present and correctly typed.""" + payload = _build_payload("production") + + assert payload["sdk"] == "python" + assert isinstance(payload["sdk_version"], str) + assert payload["platform_version"] is None + assert isinstance(payload["os"], str) + assert isinstance(payload["arch"], str) + assert isinstance(payload["runtime_version"], str) + assert payload["deployment_mode"] == "production" + assert payload["features"] == [] + assert isinstance(payload["instance_id"], str) + # Should be a valid UUID + assert len(payload["instance_id"]) == 36 # UUID v4 string length + + def test_payload_mode_propagated(self) -> None: + """deployment_mode reflects the supplied mode.""" + assert _build_payload("sandbox")["deployment_mode"] == "sandbox" + assert _build_payload("production")["deployment_mode"] == "production" + + def test_payload_instance_id_unique(self) -> None: + """Each call generates a new instance_id.""" + p1 = _build_payload("production") + p2 = _build_payload("production") + assert p1["instance_id"] != p2["instance_id"] + + +# --------------------------------------------------------------------------- +# send_telemetry_ping integration tests +# --------------------------------------------------------------------------- + + +class TestSendTelemetryPing: + """End-to-end tests for send_telemetry_ping.""" + + @patch("axonflow.telemetry.httpx") + def test_payload_posted_correctly(self, mock_httpx: MagicMock) -> None: + """Verify the HTTP POST is made with correct JSON payload.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"latest_version": None, "alerts": []} + mock_httpx.post.return_value = mock_response + + with patch.dict("os.environ", {}, clear=True): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + has_credentials=True, + ) + # Wait for daemon thread to complete. + _wait_for_threads() + + mock_httpx.post.assert_called_once() + call_args = mock_httpx.post.call_args + url = call_args[0][0] if call_args[0] else call_args[1]["url"] + assert url == _DEFAULT_CHECKPOINT_URL + + payload = call_args[1].get("json") or call_args[0][1] + assert payload["sdk"] == "python" + assert payload["deployment_mode"] == "production" + assert "instance_id" in payload + + @patch("axonflow.telemetry.httpx") + def test_disabled_skips_post(self, mock_httpx: MagicMock) -> None: + """When telemetry is disabled, no HTTP call is made.""" + with patch.dict("os.environ", {"DO_NOT_TRACK": "1"}): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + ) + _wait_for_threads() + mock_httpx.post.assert_not_called() + + @patch("axonflow.telemetry.httpx") + def test_sandbox_default_skips_post(self, mock_httpx: MagicMock) -> None: + """Sandbox mode with no config override skips telemetry.""" + with patch.dict("os.environ", {}, clear=True): + send_telemetry_ping( + mode="sandbox", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + ) + _wait_for_threads() + mock_httpx.post.assert_not_called() + + @patch("axonflow.telemetry.httpx") + def test_config_override_true_in_sandbox(self, mock_httpx: MagicMock) -> None: + """Config override=True enables ping even in sandbox mode.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_httpx.post.return_value = mock_response + + with patch.dict("os.environ", {}, clear=True): + send_telemetry_ping( + mode="sandbox", + endpoint="https://agent.axonflow.com", + telemetry_enabled=True, + ) + _wait_for_threads() + mock_httpx.post.assert_called_once() + + @patch("axonflow.telemetry.httpx") + def test_config_override_false_in_production(self, mock_httpx: MagicMock) -> None: + """Config override=False disables ping even in production mode.""" + with patch.dict("os.environ", {}, clear=True): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=False, + ) + _wait_for_threads() + mock_httpx.post.assert_not_called() + + @patch("axonflow.telemetry.httpx") + def test_silent_failure_on_connection_error(self, mock_httpx: MagicMock) -> None: + """Connection errors are swallowed silently.""" + mock_httpx.post.side_effect = httpx.ConnectError("connection refused") + + with patch.dict("os.environ", {}, clear=True): + # Should not raise. + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + has_credentials=True, + ) + _wait_for_threads() + # No exception = pass. + + @patch("axonflow.telemetry.httpx") + def test_silent_failure_on_timeout(self, mock_httpx: MagicMock) -> None: + """Timeout errors are swallowed silently.""" + mock_httpx.post.side_effect = httpx.TimeoutException("timed out") + + with patch.dict("os.environ", {}, clear=True): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + has_credentials=True, + ) + _wait_for_threads() + + @patch("axonflow.telemetry.httpx") + def test_custom_endpoint_via_env(self, mock_httpx: MagicMock) -> None: + """AXONFLOW_CHECKPOINT_URL overrides the default endpoint.""" + custom_url = "https://custom-checkpoint.example.com/v1/ping" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_httpx.post.return_value = mock_response + + with patch.dict( + "os.environ", + {"AXONFLOW_CHECKPOINT_URL": custom_url}, + clear=True, + ): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=True, + ) + _wait_for_threads() + + call_args = mock_httpx.post.call_args + url = call_args[0][0] if call_args[0] else call_args[1]["url"] + assert url == custom_url + + @patch("axonflow.telemetry.httpx") + def test_outdated_version_warning(self, mock_httpx: MagicMock) -> None: + """When server reports a newer version, a warning is logged.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"latest_version": "99.0.0", "alerts": []} + mock_httpx.post.return_value = mock_response + + with ( + patch.dict("os.environ", {}, clear=True), + patch("axonflow.telemetry.logger") as mock_logger, + ): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + has_credentials=True, + ) + _wait_for_threads() + + mock_logger.warning.assert_called_once() + warning_msg = mock_logger.warning.call_args[0][0] + assert "newer" in warning_msg.lower() or "available" in warning_msg.lower() + + @patch("axonflow.telemetry.httpx") + def test_timeout_passed_to_post(self, mock_httpx: MagicMock) -> None: + """Verify the 3-second timeout is passed to httpx.post.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {} + mock_httpx.post.return_value = mock_response + + with patch.dict("os.environ", {}, clear=True): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + has_credentials=True, + ) + _wait_for_threads() + + call_kwargs = mock_httpx.post.call_args[1] + assert call_kwargs["timeout"] == 3 + + @patch("axonflow.telemetry.httpx") + def test_non_200_response_no_crash(self, mock_httpx: MagicMock) -> None: + """Non-200 responses are handled gracefully.""" + mock_response = MagicMock() + mock_response.status_code = 500 + mock_httpx.post.return_value = mock_response + + with patch.dict("os.environ", {}, clear=True): + send_telemetry_ping( + mode="production", + endpoint="https://agent.axonflow.com", + telemetry_enabled=None, + has_credentials=True, + ) + _wait_for_threads() + # No exception = pass. + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _wait_for_threads(timeout: float = 5.0) -> None: + """Wait for all non-main daemon threads to finish (up to *timeout* seconds). + + This is needed because send_telemetry_ping spawns a daemon thread. + """ + deadline = time.monotonic() + timeout + for t in threading.enumerate(): + if t is threading.current_thread(): + continue + if t.daemon: + remaining = deadline - time.monotonic() + if remaining > 0: + t.join(timeout=remaining)