From f7e37c1d6d8c3ed0e1fe2bfa1605958439922793 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 22:47:40 +0000 Subject: [PATCH 1/3] feat: add PostHog event tracking for SDK installs (ENG-2277) - Create lightweight PostHog client using httpx (no external dependencies) - Track 'Installed SDK' event once per new version via ~/.blaxel/telemetry.json - Inject __posthog_key__ at build time via sed in CI workflows - Fire-and-forget async capture, respects DO_NOT_TRACK and config.yaml - Anonymous UUID as distinct_id (generated via uuid.uuid4) Co-Authored-By: tcrochet --- .github/workflows/dev.yaml | 2 + .github/workflows/prod.yaml | 2 + src/blaxel/__init__.py | 1 + src/blaxel/core/common/autoload.py | 6 + src/blaxel/core/common/posthog.py | 189 +++++++++++++++++++++++++++++ 5 files changed, 200 insertions(+) create mode 100644 src/blaxel/core/common/posthog.py diff --git a/.github/workflows/dev.yaml b/.github/workflows/dev.yaml index 6aab51c4..21b1f8d2 100644 --- a/.github/workflows/dev.yaml +++ b/.github/workflows/dev.yaml @@ -27,6 +27,7 @@ jobs: - name: Set version and build config env: SENTRY_DSN: ${{ secrets.SENTRY_DSN }} + POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }} run: | VERSION=$(git describe --tags --abbrev=0) VERSION="${VERSION#v}" @@ -42,6 +43,7 @@ jobs: sed -i -E "s|^__version__ = .*|__version__ = \"${VERSION}\"|" src/blaxel/__init__.py sed -i -E "s|^__commit__ = .*|__commit__ = \"${COMMIT}\"|" src/blaxel/__init__.py sed -i -E "s|^__sentry_dsn__ = .*|__sentry_dsn__ = \"${SENTRY_DSN}\"|" src/blaxel/__init__.py + sed -i -E "s|^__posthog_key__ = .*|__posthog_key__ = \"${POSTHOG_KEY}\"|" src/blaxel/__init__.py - name: Test package installation run: | uv venv diff --git a/.github/workflows/prod.yaml b/.github/workflows/prod.yaml index b1948463..49589671 100644 --- a/.github/workflows/prod.yaml +++ b/.github/workflows/prod.yaml @@ -23,6 +23,7 @@ jobs: - name: Set version and build config env: SENTRY_DSN: ${{ secrets.SENTRY_DSN }} + POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }} run: | VERSION="${{ github.ref_name }}" VERSION="${VERSION#v}" @@ -34,6 +35,7 @@ jobs: sed -i -E "s|^__version__ = .*|__version__ = \"${VERSION}\"|" src/blaxel/__init__.py sed -i -E "s|^__commit__ = .*|__commit__ = \"${COMMIT}\"|" src/blaxel/__init__.py sed -i -E "s|^__sentry_dsn__ = .*|__sentry_dsn__ = \"${SENTRY_DSN}\"|" src/blaxel/__init__.py + sed -i -E "s|^__posthog_key__ = .*|__posthog_key__ = \"${POSTHOG_KEY}\"|" src/blaxel/__init__.py - name: Test package installation run: | uv venv diff --git a/src/blaxel/__init__.py b/src/blaxel/__init__.py index 3a3cd50a..0c026e0f 100644 --- a/src/blaxel/__init__.py +++ b/src/blaxel/__init__.py @@ -7,6 +7,7 @@ __version__ = "" __commit__ = "" __sentry_dsn__ = "" +__posthog_key__ = "" __all__ = ["autoload", "settings", "env"] autoload() diff --git a/src/blaxel/core/common/autoload.py b/src/blaxel/core/common/autoload.py index 4f810146..148f2187 100644 --- a/src/blaxel/core/common/autoload.py +++ b/src/blaxel/core/common/autoload.py @@ -6,6 +6,7 @@ response_interceptors_sync, ) from ..sandbox.client import client as client_sandbox +from .posthog import track_sdk_installed from .sentry import init_sentry from .settings import settings @@ -43,6 +44,11 @@ def autoload() -> None: except Exception: pass + try: + track_sdk_installed() + except Exception: + pass + try: telemetry() except Exception: diff --git a/src/blaxel/core/common/posthog.py b/src/blaxel/core/common/posthog.py new file mode 100644 index 00000000..f2a245f2 --- /dev/null +++ b/src/blaxel/core/common/posthog.py @@ -0,0 +1,189 @@ +import json +import logging +import platform +import threading +import uuid +from pathlib import Path + +import httpx + +from .settings import settings + +logger = logging.getLogger(__name__) + +# PostHog API key injected at build time via sed in CI +_POSTHOG_KEY = "" + +# PostHog API endpoint +_POSTHOG_HOST = "https://us.i.posthog.com" + +# Telemetry state file path: ~/.blaxel/telemetry.json +_telemetry_state: dict | None = None + + +def _get_posthog_key() -> str: + """Return the PostHog API key injected at build time.""" + import blaxel + + return getattr(blaxel, "__posthog_key__", "") or _POSTHOG_KEY + + +def _get_telemetry_path() -> Path | None: + """Return the path to the telemetry state file.""" + try: + return Path.home() / ".blaxel" / "telemetry.json" + except Exception: + return None + + +def _load_telemetry_state() -> dict: + """Load the telemetry state from disk.""" + global _telemetry_state + if _telemetry_state is not None: + return _telemetry_state + + _telemetry_state = {"distinct_id": "", "sdks": {}} + + telemetry_path = _get_telemetry_path() + if not telemetry_path: + return _telemetry_state + + try: + data = telemetry_path.read_text(encoding="utf-8") + parsed = json.loads(data) + _telemetry_state = { + "distinct_id": parsed.get("distinct_id", ""), + "cli": parsed.get("cli"), + "sdks": parsed.get("sdks") or {}, + } + except Exception: + # File doesn't exist or is invalid - use defaults + pass + + return _telemetry_state + + +def _save_telemetry_state(state: dict) -> None: + """Save the telemetry state to disk.""" + telemetry_path = _get_telemetry_path() + if not telemetry_path: + return + + try: + telemetry_path.parent.mkdir(parents=True, exist_ok=True) + telemetry_path.write_text( + json.dumps(state, indent=2), + encoding="utf-8", + ) + telemetry_path.chmod(0o600) + except Exception: + # Silently fail + pass + + +def _get_distinct_id() -> str: + """Return a persistent anonymous UUID for PostHog events. + + The UUID is generated on first use and stored in ~/.blaxel/telemetry.json. + """ + state = _load_telemetry_state() + if state["distinct_id"]: + return state["distinct_id"] + + state["distinct_id"] = str(uuid.uuid4()) + _save_telemetry_state(state) + return state["distinct_id"] + + +def _get_os_arch() -> str: + """Get OS and architecture string.""" + try: + system = platform.system().lower() + machine = platform.machine().lower() + if machine in ("x86_64", "amd64"): + arch = "amd64" + elif machine in ("aarch64", "arm64"): + arch = "arm64" + else: + arch = machine + return f"{system}/{arch}" + except Exception: + return "unknown/unknown" + + +def _capture_posthog_event(event: str, properties: dict | None = None) -> None: + """Fire-and-forget HTTP POST to PostHog capture endpoint.""" + api_key = _get_posthog_key() + if not api_key: + return + + distinct_id = _get_distinct_id() + payload = { + "api_key": api_key, + "event": event, + "distinct_id": distinct_id, + "properties": { + "$lib": "blaxel-sdk-python", + "$lib_version": settings.version, + "os_arch": _get_os_arch(), + **(properties or {}), + }, + } + + def send() -> None: + try: + httpx.post( + f"{_POSTHOG_HOST}/capture/", + json=payload, + headers={"Content-Type": "application/json"}, + timeout=5.0, + ) + except Exception: + # Silently fail - telemetry should never break the SDK + pass + + thread = threading.Thread(target=send, daemon=True) + thread.start() + + +def track_sdk_installed() -> None: + """Track 'Installed SDK' event, deduplicated by version. + + Only fires once per SDK version. Respects DO_NOT_TRACK env var + and ~/.blaxel/config.yaml tracking setting. + """ + try: + # Check tracking consent + if not settings.tracking: + return + + api_key = _get_posthog_key() + if not api_key: + return + + version = settings.version + if not version or version == "unknown": + return + + state = _load_telemetry_state() + sdk_key = "python" + + # Check if we already reported this version + if state.get("sdks", {}).get(sdk_key) == version: + return + + # Update state and save + if "sdks" not in state: + state["sdks"] = {} + state["sdks"][sdk_key] = version + _save_telemetry_state(state) + + # Fire event + _capture_posthog_event("Installed SDK", { + "sdk": "python", + "version": version, + "environment": settings.env, + }) + except Exception: + # Silently fail - telemetry should never break the SDK + pass From 68594423257dd3d4e2dcad1b6ef0b7d617c77005 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 22:50:48 +0000 Subject: [PATCH 2/3] fix: add threading lock to prevent TOCTOU race in telemetry state loading Co-Authored-By: tcrochet --- src/blaxel/core/common/posthog.py | 38 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/blaxel/core/common/posthog.py b/src/blaxel/core/common/posthog.py index f2a245f2..fcae5a50 100644 --- a/src/blaxel/core/common/posthog.py +++ b/src/blaxel/core/common/posthog.py @@ -19,6 +19,7 @@ # Telemetry state file path: ~/.blaxel/telemetry.json _telemetry_state: dict | None = None +_telemetry_lock = threading.Lock() def _get_posthog_key() -> str: @@ -39,28 +40,29 @@ def _get_telemetry_path() -> Path | None: def _load_telemetry_state() -> dict: """Load the telemetry state from disk.""" global _telemetry_state - if _telemetry_state is not None: - return _telemetry_state + with _telemetry_lock: + if _telemetry_state is not None: + return _telemetry_state - _telemetry_state = {"distinct_id": "", "sdks": {}} + _telemetry_state = {"distinct_id": "", "sdks": {}} - telemetry_path = _get_telemetry_path() - if not telemetry_path: - return _telemetry_state + telemetry_path = _get_telemetry_path() + if not telemetry_path: + return _telemetry_state - try: - data = telemetry_path.read_text(encoding="utf-8") - parsed = json.loads(data) - _telemetry_state = { - "distinct_id": parsed.get("distinct_id", ""), - "cli": parsed.get("cli"), - "sdks": parsed.get("sdks") or {}, - } - except Exception: - # File doesn't exist or is invalid - use defaults - pass + try: + data = telemetry_path.read_text(encoding="utf-8") + parsed = json.loads(data) + _telemetry_state = { + "distinct_id": parsed.get("distinct_id", ""), + "cli": parsed.get("cli"), + "sdks": parsed.get("sdks") or {}, + } + except Exception: + # File doesn't exist or is invalid - use defaults + pass - return _telemetry_state + return _telemetry_state def _save_telemetry_state(state: dict) -> None: From 2cf98827775ccfb19ece01e59af12fad94b60c00 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 22:55:01 +0000 Subject: [PATCH 3/3] fix: preserve unknown fields in telemetry.json to prevent data loss Co-Authored-By: tcrochet --- src/blaxel/core/common/posthog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/blaxel/core/common/posthog.py b/src/blaxel/core/common/posthog.py index fcae5a50..806ade63 100644 --- a/src/blaxel/core/common/posthog.py +++ b/src/blaxel/core/common/posthog.py @@ -54,8 +54,8 @@ def _load_telemetry_state() -> dict: data = telemetry_path.read_text(encoding="utf-8") parsed = json.loads(data) _telemetry_state = { + **parsed, "distinct_id": parsed.get("distinct_id", ""), - "cli": parsed.get("cli"), "sdks": parsed.get("sdks") or {}, } except Exception: