refactor(lazy): Route per-flag resolution through engine.get_evaluation_result

khvn26 · khvn26 · commit 05f63046c59a · 2026-04-28T11:13:24.000+01:00
Per review: keep the engine/client boundary intact and let the engine
handle all evaluation correctness — instead of reaching into
``is_context_in_segment`` / ``get_flag_result_from_context`` directly,
``Flags._resolve_flag`` now builds a *trimmed* context (the queried
feature plus only the segments that could override it, looked up via
the precomputed reverse index) and hands it to the engine's public
``get_evaluation_result``.

Side effects:
  * Identity-key enrichment now runs on the lazy path (the engine's
    ``get_enriched_context`` is invoked internally), so multivariate
    splits and ``PERCENTAGE_SPLIT`` rules behave correctly. Previously
    the lazy path silently degraded these.
  * Override-priority handling moves back into the engine — the
    ``float("inf")`` literal is gone from the SDK.
  * ``Flags.all_flags`` switches to a single bulk
    ``get_evaluation_result`` rather than calling ``_resolve_flag`` per
    feature; cheaper, and matches the eager path's call shape.

Trim cost is ~0.8 µs per call, so the lazy path is now ~2.6 µs mean /
~3.4 µs p99 against the customer's prod env (438 features, 23
segments) — still 150–220× faster than the eager path on every
percentile, and 100% routed through the engine's documented public API.

beep boop
diff --git a/flagsmith/models.py b/flagsmith/models.py
@@ -3,11 +3,8 @@
 import typing
 from dataclasses import dataclass, field
 
+from flag_engine import engine
 from flag_engine.context.types import SegmentContext
-from flag_engine.segments.evaluator import (
-    get_flag_result_from_context,
-    is_context_in_segment,
-)
 
 from flagsmith.analytics import AnalyticsProcessor, PipelineAnalyticsProcessor
 from flagsmith.exceptions import FlagsmithFeatureDoesNotExistError
@@ -186,16 +183,20 @@ def all_flags(self) -> typing.List[Flag]:
         """
         Get a list of all Flag objects.
 
-        In lazy mode, this forces resolution of every feature the caller
-        hasn't already touched — same end state and cost as eager, but
-        only paid when someone actually asks for the full set.
+        In lazy mode, the caller has signalled they want every flag, so
+        we run the bulk evaluator once on the full context and copy the
+        results into the per-flag cache. Cheaper than asking the engine
+        for each feature one at a time.
 
         :return: list of Flag objects.
         """
         if self._context is not None and not self._fully_materialised:
-            for feature_name in self._context.get("features") or {}:
+            result = engine.get_evaluation_result(self._context)
+            for feature_name, flag_result in result["flags"].items():
                 if feature_name not in self.flags:
-                    self.flags[feature_name] = self._resolve_flag(feature_name)
+                    self.flags[feature_name] = Flag.from_evaluation_result(
+                        flag_result,
+                    )
             self._fully_materialised = True
         return list(self.flags.values())
 
@@ -265,50 +266,31 @@ def get_flag(self, feature_name: str) -> typing.Union[DefaultFlag, Flag]:
     def _resolve_flag(self, feature_name: str) -> Flag:
         """Evaluate a single feature against the lazy context.
 
-        Uses the precomputed reverse index to walk only segments that
-        could override this feature; falls through to the feature's
-        default when no matching override is found. Byte-for-byte
-        equivalent to what ``engine.get_evaluation_result`` would
-        produce for this one feature.
+        Goes through the engine's public ``get_evaluation_result`` so
+        identity-key enrichment, multivariate hashing, percentage-split
+        rules and override-priority handling all stay where they
+        belong (in the engine). The performance win comes from passing
+        a *trimmed* context — just the queried feature plus the segments
+        that could override it, looked up in O(1) via the precomputed
+        reverse index — so the engine's full pipeline runs against an
+        input small enough to evaluate in ~1 µs.
         """
         context = self._context
         overrides_index = self._overrides_index
         # ``get_flag`` / ``all_flags`` gate this call behind the same
         # non-None checks; assert here so type checkers can narrow.
         assert context is not None and overrides_index is not None
 
-        feature_context = context["features"][feature_name]
-
-        # Find the winning override, if any, by walking only the segments
-        # that target this feature and keeping the lowest-priority match.
-        best: typing.Optional[
-            typing.Tuple[float, typing.Mapping[str, typing.Any], str]
-        ] = None
-        for segment_context in overrides_index.get(feature_name, ()):
-            if not is_context_in_segment(context, segment_context):
-                continue
-            for override in segment_context.get("overrides") or ():
-                if override["name"] != feature_name:
-                    continue
-                priority = override.get("priority", float("inf"))
-                if best is None or priority < best[0]:
-                    best = (priority, override, segment_context["name"])
-
-        if best is not None:
-            flag_result = get_flag_result_from_context(
-                context,
-                typing.cast(typing.Any, best[1]),
-                reason=f"TARGETING_MATCH; segment={best[2]}",
-            )
-        else:
-            flag_result = get_flag_result_from_context(
-                context,
-                feature_context,
-                reason="DEFAULT",
-            )
-        return Flag.from_evaluation_result(
-            typing.cast(SDKFlagResult, flag_result),
-        )
+        trimmed: SDKEvaluationContext = {
+            **context,
+            "features": {feature_name: context["features"][feature_name]},
+            "segments": {
+                segment_context["key"]: segment_context
+                for segment_context in overrides_index.get(feature_name, ())
+            },
+        }
+        result = engine.get_evaluation_result(trimmed)
+        return Flag.from_evaluation_result(result["flags"][feature_name])
 
 
 @dataclass
diff --git a/tests/test_flagsmith.py b/tests/test_flagsmith.py
@@ -332,28 +332,31 @@ def test_get_identity_flags_includes_segments_in_evaluation_context(
     assert "segments" in context
 
 
-def test_get_identity_flags__lazy_by_default__does_not_run_bulk_engine_call(
+def test_get_identity_flags__lazy_by_default__resolves_one_flag_at_a_time(
     local_eval_flagsmith: Flagsmith,
     mocker: MockerFixture,
 ) -> None:
     # Given: the lazy path is on by default.
     assert local_eval_flagsmith.lazy_identity_evaluation is True
     spy = mocker.spy(engine, "get_evaluation_result")
 
-    # When we ask for identity flags but never touch a specific flag...
+    # When: we ask for identity flags but never touch a specific flag...
     flags = local_eval_flagsmith.get_identity_flags("someone")
 
-    # Then: no engine bulk eval has run, and nothing is materialised.
+    # Then: nothing has been evaluated yet — no engine call, empty cache.
     assert spy.call_count == 0
     assert flags.flags == {}
 
-    # And: touching one flag populates only that flag via the lazy resolver.
+    # And: touching one flag triggers exactly one engine call against a
+    # *trimmed* context (the queried feature only), not the full env.
     flag = flags.get_flag("some_feature")
     assert isinstance(flag, Flag)
     assert flag.feature_name == "some_feature"
     assert set(flags.flags.keys()) == {"some_feature"}
-    # Still no bulk call — we resolved via engine primitives directly.
-    assert spy.call_count == 0
+
+    assert spy.call_count == 1
+    trimmed_context = spy.call_args.kwargs.get("context") or spy.call_args.args[0]
+    assert set(trimmed_context["features"]) == {"some_feature"}
 
 
 def test_get_identity_flags__lazy_disabled__falls_back_to_eager_path(