feat: Lazy identity-flag evaluation in local-eval mode (#200)

khvn26 · web-flow · commit f7750bbd4d35 · 2026-04-28T14:57:49.000-03:00
diff --git a/flagsmith/flagsmith.py b/flagsmith/flagsmith.py
@@ -21,7 +21,13 @@
     map_segment_results_to_identity_segments,
     resolve_trait_values,
 )
-from flagsmith.models import DefaultFlag, Flags, Segment
+from flagsmith.models import (
+    DefaultFlag,
+    Flags,
+    Segment,
+    SegmentOverridesIndex,
+    build_segment_overrides_index,
+)
 from flagsmith.offline_handlers import OfflineHandler
 from flagsmith.polling_manager import EnvironmentDataPollingManager
 from flagsmith.streaming_manager import EventStreamManager
@@ -117,7 +123,8 @@ def __init__(
         self._pipeline_analytics_processor: typing.Optional[
             PipelineAnalyticsProcessor
         ] = None
-        self._evaluation_context: typing.Optional[SDKEvaluationContext] = None
+        self.__evaluation_context: typing.Optional[SDKEvaluationContext] = None
+        self._segment_overrides_index: SegmentOverridesIndex = {}
         self._environment_updated_at: typing.Optional[datetime] = None
 
         # argument validation
@@ -356,6 +363,26 @@ def update_environment(self) -> None:
             except (KeyError, TypeError, ValueError):
                 logger.exception("Error parsing environment document")
 
+    @property
+    def _evaluation_context(self) -> typing.Optional[SDKEvaluationContext]:
+        return self.__evaluation_context
+
+    @_evaluation_context.setter
+    def _evaluation_context(
+        self, context: typing.Optional[SDKEvaluationContext]
+    ) -> None:
+        """Swap in a new evaluation context and rebuild the overrides index.
+
+        The index maps feature_name -> segments that override it. Built once
+        per refresh and reused across every subsequent per-identity lazy
+        resolution; rebuilding here keeps it in sync with the current doc
+        without any hot-path cost.
+        """
+        self.__evaluation_context = context
+        self._segment_overrides_index = (
+            build_segment_overrides_index(context) if context is not None else {}
+        )
+
     def _get_headers(
         self,
         environment_key: str,
@@ -407,12 +434,12 @@ def _get_identity_flags_from_document(
             identifier=identifier,
             traits=traits,
         )
-        evaluation_result = engine.get_evaluation_result(
+        # Lazy: defer per-feature evaluation until the caller actually reads
+        # a flag. Hot for callers that only read one or a few flags out of a
+        # large environment.
+        return Flags.from_evaluation_context(
             context=context,
-        )
-
-        return Flags.from_evaluation_result(
-            evaluation_result=evaluation_result,
+            overrides_index=self._segment_overrides_index,
             analytics_processor=self._analytics_processor,
             default_flag_handler=self.default_flag_handler,
             pipeline_analytics_processor=self._pipeline_analytics_processor,
diff --git a/flagsmith/models.py b/flagsmith/models.py
@@ -3,9 +3,37 @@
 import typing
 from dataclasses import dataclass, field
 
+from flag_engine import engine
+from flag_engine.context.types import SegmentContext
+
 from flagsmith.analytics import AnalyticsProcessor, PipelineAnalyticsProcessor
 from flagsmith.exceptions import FlagsmithFeatureDoesNotExistError
-from flagsmith.types import SDKEvaluationResult, SDKFlagResult
+from flagsmith.types import (
+    FeatureMetadata,
+    SDKEvaluationContext,
+    SDKEvaluationResult,
+    SDKFlagResult,
+    SegmentMetadata,
+)
+
+SegmentOverridesIndex = typing.Dict[
+    str, typing.List[SegmentContext[SegmentMetadata, FeatureMetadata]]
+]
+
+
+def build_segment_overrides_index(
+    context: SDKEvaluationContext,
+) -> SegmentOverridesIndex:
+    """Map feature_name -> segments that carry an override for that feature.
+
+    Computed once per environment-document refresh so the lazy eval path
+    can walk only the segments actually relevant to a given flag.
+    """
+    index: SegmentOverridesIndex = {}
+    for segment_context in (context.get("segments") or {}).values():
+        for override in segment_context.get("overrides") or ():
+            index.setdefault(override["name"], []).append(segment_context)
+    return index
 
 
 @dataclass
@@ -60,6 +88,14 @@ class Flags:
     _pipeline_analytics_processor: typing.Optional[PipelineAnalyticsProcessor] = None
     _identity_identifier: typing.Optional[str] = None
     _traits: typing.Optional[typing.Dict[str, typing.Any]] = None
+    # Lazy-evaluation state. When `_context` is set, `flags` is a
+    # per-feature memo rather than a fully-materialised snapshot; unseen
+    # features are resolved on demand via the engine primitives and
+    # cached back into `flags`. Left as `None` by the eager code
+    # paths (`from_evaluation_result` / `from_api_flags`).
+    _context: typing.Optional[SDKEvaluationContext] = None
+    _overrides_index: typing.Optional[SegmentOverridesIndex] = None
+    _fully_materialised: bool = False
 
     @classmethod
     def from_evaluation_result(
@@ -86,6 +122,37 @@ def from_evaluation_result(
             _traits=traits,
         )
 
+    @classmethod
+    def from_evaluation_context(
+        cls,
+        context: SDKEvaluationContext,
+        overrides_index: SegmentOverridesIndex,
+        analytics_processor: typing.Optional[AnalyticsProcessor],
+        default_flag_handler: typing.Optional[typing.Callable[[str], DefaultFlag]],
+        pipeline_analytics_processor: typing.Optional[
+            PipelineAnalyticsProcessor
+        ] = None,
+        identity_identifier: typing.Optional[str] = None,
+        traits: typing.Optional[typing.Dict[str, typing.Any]] = None,
+    ) -> Flags:
+        """Build a lazy `Flags` backed by an evaluation context.
+
+        No engine work is done here — flags are resolved on first access
+        via :meth:`_resolve_flag`. Reusing the same `overrides_index`
+        across calls amortises its construction cost (it's rebuilt only
+        when the environment doc refreshes, not per identity).
+        """
+        return cls(
+            flags={},
+            default_flag_handler=default_flag_handler,
+            _analytics_processor=analytics_processor,
+            _pipeline_analytics_processor=pipeline_analytics_processor,
+            _identity_identifier=identity_identifier,
+            _traits=traits,
+            _context=context,
+            _overrides_index=overrides_index,
+        )
+
     @classmethod
     def from_api_flags(
         cls,
@@ -116,8 +183,21 @@ def all_flags(self) -> typing.List[Flag]:
         """
         Get a list of all Flag objects.
 
+        In lazy mode, the caller has signalled they want every flag, so
+        we run the bulk evaluator once on the full context and copy the
+        results into the per-flag cache. Cheaper than asking the engine
+        for each feature one at a time.
+
         :return: list of Flag objects.
         """
+        if self._context is not None and not self._fully_materialised:
+            result = engine.get_evaluation_result(self._context)
+            for feature_name, flag_result in result["flags"].items():
+                if feature_name not in self.flags:
+                    self.flags[feature_name] = Flag.from_evaluation_result(
+                        flag_result,
+                    )
+            self._fully_materialised = True
         return list(self.flags.values())
 
     def is_feature_enabled(self, feature_name: str) -> bool:
@@ -151,11 +231,23 @@ def get_flag(self, feature_name: str) -> typing.Union[DefaultFlag, Flag]:
         try:
             flag = self.flags[feature_name]
         except KeyError:
-            if self.default_flag_handler:
+            # Lazy path: if this `Flags` wraps an evaluation context and
+            # the feature exists in it, resolve and memoise now. Otherwise
+            # fall through to the default_flag_handler / not-found error,
+            # preserving the eager-mode behaviour byte-for-byte.
+            if (
+                self._context is not None
+                and self._overrides_index is not None
+                and feature_name in (self._context.get("features") or {})
+            ):
+                flag = self._resolve_flag(feature_name)
+                self.flags[feature_name] = flag
+            elif self.default_flag_handler:
                 return self.default_flag_handler(feature_name)
-            raise FlagsmithFeatureDoesNotExistError(
-                "Feature does not exist: %s" % feature_name
-            )
+            else:
+                raise FlagsmithFeatureDoesNotExistError(
+                    "Feature does not exist: %s" % feature_name
+                )
 
         if self._analytics_processor and hasattr(flag, "feature_name"):
             self._analytics_processor.track_feature(flag.feature_name)
@@ -171,6 +263,35 @@ def get_flag(self, feature_name: str) -> typing.Union[DefaultFlag, Flag]:
 
         return flag
 
+    def _resolve_flag(self, feature_name: str) -> Flag:
+        """Evaluate a single feature against the lazy context.
+
+        Goes through the engine's public `get_evaluation_result` so
+        identity-key enrichment, multivariate hashing, percentage-split
+        rules and override-priority handling all stay where they
+        belong (in the engine). The performance win comes from passing
+        a *trimmed* context — just the queried feature plus the segments
+        that could override it, looked up in O(1) via the precomputed
+        reverse index — so the engine's full pipeline runs against an
+        input small enough to evaluate in ~1 µs.
+        """
+        context = self._context
+        overrides_index = self._overrides_index
+        # `get_flag` / `all_flags` gate this call behind the same
+        # non-None checks; assert here so type checkers can narrow.
+        assert context is not None and overrides_index is not None
+
+        trimmed: SDKEvaluationContext = {
+            **context,
+            "features": {feature_name: context["features"][feature_name]},
+            "segments": {
+                segment_context["key"]: segment_context
+                for segment_context in overrides_index.get(feature_name, ())
+            },
+        }
+        result = engine.get_evaluation_result(trimmed)
+        return Flag.from_evaluation_result(result["flags"][feature_name])
+
 
 @dataclass
 class Segment:
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,7 +10,7 @@ documentation = "https://docs.flagsmith.com"
 packages = [{ include = "flagsmith" }]
 
 [tool.poetry.dependencies]
-flagsmith-flag-engine = "^10.0.3"
+flagsmith-flag-engine = "^10.0.4"
 iso8601 = { version = "^2.1.0", python = "<3.11" }
 python = ">=3.9,<4"
 requests = "^2.32.3"
diff --git a/tests/test_flagsmith.py b/tests/test_flagsmith.py
@@ -5,6 +5,7 @@
 import pytest
 import requests
 import responses
+from flag_engine import engine
 from pytest_mock import MockerFixture
 from responses import matchers
 
@@ -15,7 +16,7 @@
     FlagsmithAPIError,
     FlagsmithFeatureDoesNotExistError,
 )
-from flagsmith.models import DefaultFlag, Flags
+from flagsmith.models import DefaultFlag, Flag, Flags
 from flagsmith.offline_handlers import OfflineHandler
 from flagsmith.types import SDKEvaluationContext
 
@@ -193,7 +194,12 @@ def test_get_identity_flags_uses_local_environment_when_available(
     # Given
     flagsmith._evaluation_context = evaluation_context
     flagsmith.enable_local_evaluation = True
-    mock_engine = mocker.patch("flagsmith.flagsmith.engine")
+    # `Flags` materialises identity flags via `engine.get_evaluation_result`
+    # imported from `flagsmith.models`, so patch it where it's actually used.
+    mock_get_evaluation_result = mocker.patch(
+        "flagsmith.models.engine.get_evaluation_result",
+        autospec=True,
+    )
 
     expected_evaluation_result = {
         "flags": {
@@ -210,15 +216,15 @@ def test_get_identity_flags_uses_local_environment_when_available(
     identifier = "identifier"
     traits = {"some_trait": "some_value"}
 
-    mock_engine.get_evaluation_result.return_value = expected_evaluation_result
+    mock_get_evaluation_result.return_value = expected_evaluation_result
 
     # When
     identity_flags = flagsmith.get_identity_flags(identifier, traits).all_flags()
 
     # Then
-    mock_engine.get_evaluation_result.assert_called_once()
-    call_args = mock_engine.get_evaluation_result.call_args
-    context = call_args[1]["context"]
+    mock_get_evaluation_result.assert_called_once()
+    call_args = mock_get_evaluation_result.call_args
+    context = call_args[0][0] if call_args.args else call_args[1]["context"]
     assert context["identity"]["identifier"] == identifier
     assert context["identity"]["traits"]["some_trait"] == "some_value"
     assert "some_trait" in context["identity"]["traits"]
@@ -233,7 +239,7 @@ def test_get_identity_flags_includes_segments_in_evaluation_context(
 ) -> None:
     # Given
     mock_get_evaluation_result = mocker.patch(
-        "flagsmith.flagsmith.engine.get_evaluation_result",
+        "flagsmith.models.engine.get_evaluation_result",
         autospec=True,
     )
 
@@ -254,16 +260,43 @@ def test_get_identity_flags_includes_segments_in_evaluation_context(
 
     mock_get_evaluation_result.return_value = expected_evaluation_result
 
-    # When
-    local_eval_flagsmith.get_identity_flags(identifier, traits)
+    # When: `all_flags` triggers the bulk evaluation path on the lazy
+    # `Flags` object, which is where the full identity context — segments
+    # included — is passed to the engine.
+    local_eval_flagsmith.get_identity_flags(identifier, traits).all_flags()
 
-    # Then
-    # Verify segments are present in the context passed to the engine for identity flags
+    # Then: segments are present in the context passed to the engine for
+    # identity flags (in contrast to the env-flags path, which strips them).
     call_args = mock_get_evaluation_result.call_args
-    context = call_args[1]["context"]
+    context = call_args[0][0] if call_args.args else call_args[1]["context"]
     assert "segments" in context
 
 
+def test_get_identity_flags__resolves_one_flag_at_a_time(
+    local_eval_flagsmith: Flagsmith,
+    mocker: MockerFixture,
+) -> None:
+    spy = mocker.spy(engine, "get_evaluation_result")
+
+    # When: we ask for identity flags but never touch a specific flag...
+    flags = local_eval_flagsmith.get_identity_flags("someone")
+
+    # Then: nothing has been evaluated yet — no engine call, empty cache.
+    assert spy.call_count == 0
+    assert flags.flags == {}
+
+    # And: touching one flag triggers exactly one engine call against a
+    # *trimmed* context (the queried feature only), not the full env.
+    flag = flags.get_flag("some_feature")
+    assert isinstance(flag, Flag)
+    assert flag.feature_name == "some_feature"
+    assert set(flags.flags.keys()) == {"some_feature"}
+
+    assert spy.call_count == 1
+    trimmed_context = spy.call_args.kwargs.get("context") or spy.call_args.args[0]
+    assert set(trimmed_context["features"]) == {"some_feature"}
+
+
 @responses.activate()
 def test_get_identity_flags__transient_identity__calls_expected(
     flagsmith: Flagsmith,
diff --git a/tests/test_models.py b/tests/test_models.py