feat/add-pipeline-analytics-processor-for-batching-events

Zaimwa9 · Zaimwa9 · commit 307014735d38 · 2026-04-10T16:48:35.000+02:00
diff --git a/flagsmith/analytics.py b/flagsmith/analytics.py
@@ -1,9 +1,17 @@
 import json
+import logging
+import threading
+import time
 import typing
+from dataclasses import dataclass
 from datetime import datetime
 
 from requests_futures.sessions import FuturesSession  # type: ignore
 
+from flagsmith.version import __version__
+
+logger = logging.getLogger(__name__)
+
 ANALYTICS_ENDPOINT: typing.Final[str] = "analytics/flags/"
 
 # Used to control how often we send data(in seconds)
@@ -60,3 +68,147 @@ def track_feature(self, feature_name: str) -> None:
         self.analytics_data[feature_name] = self.analytics_data.get(feature_name, 0) + 1
         if (datetime.now() - self._last_flushed).seconds > ANALYTICS_TIMER:
             self.flush()
+
+
+@dataclass
+class PipelineAnalyticsConfig:
+    analytics_server_url: str
+    max_buffer: int = 1000
+    flush_interval_seconds: float = 10.0
+
+
+class PipelineAnalyticsProcessor:
+    def __init__(
+        self,
+        config: PipelineAnalyticsConfig,
+        environment_key: str,
+    ) -> None:
+        url = config.analytics_server_url
+        if not url.endswith("/"):
+            url = f"{url}/"
+        self._batch_endpoint = f"{url}v1/analytics/batch"
+        self._environment_key = environment_key
+        self._max_buffer = config.max_buffer
+        self._flush_interval_seconds = config.flush_interval_seconds
+
+        self._buffer: typing.List[typing.Dict[str, typing.Any]] = []
+        self._dedup_keys: typing.Dict[str, str] = {}
+        self._lock = threading.Lock()
+        self._timer: typing.Optional[threading.Timer] = None
+
+    def record_evaluation_event(
+        self,
+        flag_key: str,
+        enabled: bool,
+        value: typing.Any,
+        identity_identifier: typing.Optional[str] = None,
+        traits: typing.Optional[typing.Dict[str, typing.Any]] = None,
+    ) -> None:
+        fingerprint = f"{identity_identifier or 'none'}|{enabled}|{value}"
+        should_flush = False
+
+        with self._lock:
+            if self._dedup_keys.get(flag_key) == fingerprint:
+                return
+            self._dedup_keys[flag_key] = fingerprint
+            self._buffer.append(
+                {
+                    "event_id": flag_key,
+                    "event_type": "flag_evaluation",
+                    "evaluated_at": int(time.time() * 1000),
+                    "identity_identifier": identity_identifier,
+                    "enabled": enabled,
+                    "value": value,
+                    "traits": dict(traits) if traits else None,
+                    "metadata": {"sdk_version": __version__},
+                }
+            )
+            if len(self._buffer) >= self._max_buffer:
+                should_flush = True
+
+        if should_flush:
+            self.flush()
+
+    def record_custom_event(
+        self,
+        event_name: str,
+        identity_identifier: typing.Optional[str] = None,
+        traits: typing.Optional[typing.Dict[str, typing.Any]] = None,
+        metadata: typing.Optional[typing.Dict[str, typing.Any]] = None,
+    ) -> None:
+        should_flush = False
+
+        with self._lock:
+            self._buffer.append(
+                {
+                    "event_id": event_name,
+                    "event_type": "custom_event",
+                    "evaluated_at": int(time.time() * 1000),
+                    "identity_identifier": identity_identifier,
+                    "enabled": None,
+                    "value": None,
+                    "traits": dict(traits) if traits else None,
+                    "metadata": {**(metadata or {}), "sdk_version": __version__},
+                }
+            )
+            if len(self._buffer) >= self._max_buffer:
+                should_flush = True
+
+        if should_flush:
+            self.flush()
+
+    def flush(self) -> None:
+        with self._lock:
+            if not self._buffer:
+                return
+            events = self._buffer
+            self._buffer = []
+            self._dedup_keys.clear()
+
+        payload = json.dumps(
+            {"events": events, "environment_key": self._environment_key}
+        )
+        future = session.post(
+            self._batch_endpoint,
+            data=payload,
+            timeout=3,
+            headers={
+                "Content-Type": "application/json; charset=utf-8",
+                "X-Environment-Key": self._environment_key,
+                "Flagsmith-SDK-User-Agent": f"flagsmith-python-client/{__version__}",
+            },
+        )
+        future.add_done_callback(lambda f: self._handle_flush_result(f, events))
+
+    def _handle_flush_result(
+        self,
+        future: typing.Any,
+        events: typing.List[typing.Dict[str, typing.Any]],
+    ) -> None:
+        try:
+            response = future.result()
+            response.raise_for_status()
+        except Exception:
+            logger.warning("Failed to flush pipeline analytics, re-queuing events")
+            with self._lock:
+                self._buffer = events + self._buffer
+                self._buffer = self._buffer[: self._max_buffer]
+
+    def start(self) -> None:
+        self._schedule_flush()
+
+    def stop(self) -> None:
+        if self._timer is not None:
+            self._timer.cancel()
+        self.flush()
+
+    def _schedule_flush(self) -> None:
+        self._timer = threading.Timer(
+            self._flush_interval_seconds, self._timer_flush
+        )
+        self._timer.daemon = True
+        self._timer.start()
+
+    def _timer_flush(self) -> None:
+        self.flush()
+        self._schedule_flush()
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -11,7 +11,11 @@
 from pytest_mock import MockerFixture
 
 from flagsmith import Flagsmith
-from flagsmith.analytics import AnalyticsProcessor
+from flagsmith.analytics import (
+    AnalyticsProcessor,
+    PipelineAnalyticsConfig,
+    PipelineAnalyticsProcessor,
+)
 from flagsmith.api.types import EnvironmentModel
 from flagsmith.mappers import map_environment_document_to_context
 from flagsmith.types import SDKEvaluationContext
@@ -26,6 +30,21 @@ def analytics_processor() -> AnalyticsProcessor:
     )
 
 
+@pytest.fixture()
+def pipeline_analytics_config() -> PipelineAnalyticsConfig:
+    return PipelineAnalyticsConfig(analytics_server_url="http://test_analytics/")
+
+
+@pytest.fixture()
+def pipeline_analytics_processor(
+    pipeline_analytics_config: PipelineAnalyticsConfig,
+) -> PipelineAnalyticsProcessor:
+    return PipelineAnalyticsProcessor(
+        config=pipeline_analytics_config,
+        environment_key="test_key",
+    )
+
+
 @pytest.fixture(scope="session")
 def api_key() -> str:
     return "".join(random.sample(string.ascii_letters, 20))
diff --git a/tests/test_pipeline_analytics.py b/tests/test_pipeline_analytics.py
@@ -0,0 +1,194 @@
+import json
+from concurrent.futures import Future
+from unittest import mock
+
+import pytest
+
+from flagsmith.analytics import (
+    PipelineAnalyticsConfig,
+    PipelineAnalyticsProcessor,
+)
+
+
+def test_record_evaluation_event_buffers_event(
+    pipeline_analytics_processor: PipelineAnalyticsProcessor,
+) -> None:
+    pipeline_analytics_processor.record_evaluation_event(
+        flag_key="my_flag",
+        enabled=True,
+        value="variant_a",
+        identity_identifier="user123",
+        traits={"plan": "premium"},
+    )
+
+    assert len(pipeline_analytics_processor._buffer) == 1
+    event = pipeline_analytics_processor._buffer[0]
+    assert event["event_id"] == "my_flag"
+    assert event["event_type"] == "flag_evaluation"
+    assert event["identity_identifier"] == "user123"
+    assert event["enabled"] is True
+    assert event["value"] == "variant_a"
+    assert event["traits"] == {"plan": "premium"}
+    assert "sdk_version" in event["metadata"]
+    assert isinstance(event["evaluated_at"], int)
+
+
+@pytest.mark.parametrize(
+    "second_enabled, expected_count",
+    [
+        (True, 1),   # same fingerprint -> deduplicated
+        (False, 2),  # different fingerprint -> both kept
+    ],
+)
+def test_evaluation_event_deduplication(
+    pipeline_analytics_processor: PipelineAnalyticsProcessor,
+    second_enabled: bool,
+    expected_count: int,
+) -> None:
+    pipeline_analytics_processor.record_evaluation_event(
+        flag_key="my_flag", enabled=True, value="v1", identity_identifier="user1"
+    )
+    pipeline_analytics_processor.record_evaluation_event(
+        flag_key="my_flag", enabled=second_enabled, value="v1", identity_identifier="user1"
+    )
+
+    assert len(pipeline_analytics_processor._buffer) == expected_count
+
+
+def test_dedup_keys_cleared_after_flush(
+    pipeline_analytics_processor: PipelineAnalyticsProcessor,
+) -> None:
+    with mock.patch("flagsmith.analytics.session"):
+        pipeline_analytics_processor.record_evaluation_event(
+            flag_key="my_flag", enabled=True, value="v1", identity_identifier="user1"
+        )
+        pipeline_analytics_processor.flush()
+
+        pipeline_analytics_processor.record_evaluation_event(
+            flag_key="my_flag", enabled=True, value="v1", identity_identifier="user1"
+        )
+
+    assert len(pipeline_analytics_processor._buffer) == 1
+
+
+def test_auto_flush_on_buffer_full() -> None:
+    config = PipelineAnalyticsConfig(
+        analytics_server_url="http://test/", max_buffer=5
+    )
+    processor = PipelineAnalyticsProcessor(config=config, environment_key="key")
+
+    with mock.patch("flagsmith.analytics.session"):
+        for i in range(5):
+            processor.record_evaluation_event(
+                flag_key=f"flag_{i}", enabled=True, value=str(i)
+            )
+
+    assert len(processor._buffer) == 0
+
+
+def test_flush_sends_correct_http_request(
+    pipeline_analytics_processor: PipelineAnalyticsProcessor,
+) -> None:
+    with mock.patch("flagsmith.analytics.session") as mock_session:
+        pipeline_analytics_processor.record_evaluation_event(
+            flag_key="my_flag", enabled=True, value="v1", identity_identifier="user1"
+        )
+        pipeline_analytics_processor.flush()
+
+    mock_session.post.assert_called_once()
+    call_kwargs = mock_session.post.call_args
+    assert call_kwargs[0][0] == "http://test_analytics/v1/analytics/batch"
+
+    headers = call_kwargs[1]["headers"]
+    assert headers["X-Environment-Key"] == "test_key"
+    assert headers["Content-Type"] == "application/json; charset=utf-8"
+    assert "flagsmith-python-client/" in headers["Flagsmith-SDK-User-Agent"]
+
+    body = json.loads(call_kwargs[1]["data"])
+    assert body["environment_key"] == "test_key"
+    assert len(body["events"]) == 1
+    assert body["events"][0]["event_id"] == "my_flag"
+
+
+def test_flush_noop_when_empty(
+    pipeline_analytics_processor: PipelineAnalyticsProcessor,
+) -> None:
+    with mock.patch("flagsmith.analytics.session") as mock_session:
+        pipeline_analytics_processor.flush()
+
+    mock_session.post.assert_not_called()
+
+
+def test_failed_flush_requeues_events(
+    pipeline_analytics_processor: PipelineAnalyticsProcessor,
+) -> None:
+    future: Future[None] = Future()
+    future.set_exception(Exception("connection error"))
+
+    with mock.patch("flagsmith.analytics.session") as mock_session:
+        mock_session.post.return_value = future
+        pipeline_analytics_processor.record_evaluation_event(
+            flag_key="my_flag", enabled=True, value="v1"
+        )
+        pipeline_analytics_processor.flush()
+
+    assert len(pipeline_analytics_processor._buffer) == 1
+    assert pipeline_analytics_processor._buffer[0]["event_id"] == "my_flag"
+
+
+@pytest.mark.parametrize(
+    "url, expected_endpoint",
+    [
+        ("http://example.com", "http://example.com/v1/analytics/batch"),
+        ("http://example.com/", "http://example.com/v1/analytics/batch"),
+    ],
+)
+def test_url_trailing_slash_handling(url: str, expected_endpoint: str) -> None:
+    config = PipelineAnalyticsConfig(analytics_server_url=url)
+    processor = PipelineAnalyticsProcessor(config=config, environment_key="key")
+    assert processor._batch_endpoint == expected_endpoint
+
+
+def test_record_custom_event(
+    pipeline_analytics_processor: PipelineAnalyticsProcessor,
+) -> None:
+    pipeline_analytics_processor.record_custom_event(
+        event_name="purchase",
+        identity_identifier="user1",
+        traits={"plan": "premium"},
+        metadata={"amount": 99},
+    )
+    # Custom events are never deduplicated
+    pipeline_analytics_processor.record_custom_event(
+        event_name="purchase",
+        identity_identifier="user1",
+    )
+
+    assert len(pipeline_analytics_processor._buffer) == 2
+    event = pipeline_analytics_processor._buffer[0]
+    assert event["event_id"] == "purchase"
+    assert event["event_type"] == "custom_event"
+    assert event["enabled"] is None
+    assert event["value"] is None
+    assert event["traits"] == {"plan": "premium"}
+    assert event["metadata"]["amount"] == 99
+    assert "sdk_version" in event["metadata"]
+
+
+def test_start_stop_lifecycle() -> None:
+    config = PipelineAnalyticsConfig(
+        analytics_server_url="http://test/", flush_interval_seconds=100
+    )
+    processor = PipelineAnalyticsProcessor(config=config, environment_key="key")
+
+    processor.start()
+    assert processor._timer is not None
+    assert processor._timer.is_alive()
+
+    with mock.patch("flagsmith.analytics.session"):
+        processor.record_evaluation_event(
+            flag_key="my_flag", enabled=True, value="v1"
+        )
+        processor.stop()
+
+    assert len(processor._buffer) == 0