Skip to content

Commit 3e76c44

Browse files
committed
feat: Lazy identity-flag evaluation in local-eval mode
``get_identity_flags`` now returns a ``Flags`` that holds the evaluation context plus a precomputed segment-overrides reverse index, and resolves each feature on first access via the engine primitives (``is_context_in_segment`` + ``get_flag_result_from_context``) rather than running a full bulk evaluation up-front. In environments shaped like the Slack-report customer (420 features, 30 CSV-IN segments, hot loop reading one boolean flag) this takes ``get_identity_flags().is_feature_enabled(name)`` from ~430 µs to ~1.85 µs per call; 200-segment envs go from ~1200 µs to ~2 µs. The ``.all_flags()`` materialisation path is never slower than the eager baseline in the bench matrix. Back-compat: * ``Flags`` public API unchanged (``is_feature_enabled``, ``get_feature_value``, ``get_flag``, ``all_flags``). * ``FlagResult`` construction reuses the same engine helper as the bulk path — identical output shape. * New ``lazy_identity_evaluation`` constructor kwarg, default ``True``, lets operators flip back to the eager path if they hit an unexpected regression. Engine contract is untouched: the SDK consumes only already-public ``flag_engine.segments.evaluator`` symbols. beep boop
1 parent ae45cbd commit 3e76c44

7 files changed

Lines changed: 724 additions & 12 deletions

File tree

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/bench.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""Customer-shape benchmark for SDK lazy identity-flag evaluation.
2+
3+
Compares lazy (default) vs eager (rollback kwarg) per-call latency
4+
across three workloads — always the same flag, a rotating flag, and
5+
``all_flags()`` — on environments shaped after the Slack report:
6+
420 features, a handful of CSV-IN segments, optionally with several of
7+
those segments overriding the queried flag.
8+
9+
Run with::
10+
11+
poetry run python -m benchmarks.bench
12+
poetry run python -m benchmarks.bench --iters 5000 --warmup 500
13+
"""
14+
15+
from __future__ import annotations
16+
17+
import argparse
18+
import statistics
19+
import time
20+
import typing
21+
22+
from benchmarks.env import build_environment
23+
from flagsmith import Flagsmith
24+
from flagsmith.api.types import EnvironmentModel
25+
from flagsmith.mappers import map_environment_document_to_context
26+
27+
28+
def _make_client(
29+
*,
30+
n_features: int = 420,
31+
with_segments: int = 0,
32+
overrides_per_queried_flag: int = 0,
33+
lazy_identity_evaluation: bool = True,
34+
) -> Flagsmith:
35+
env_doc = typing.cast(
36+
EnvironmentModel,
37+
build_environment(
38+
n_features=n_features,
39+
with_segments=with_segments,
40+
overrides_per_queried_flag=overrides_per_queried_flag,
41+
),
42+
)
43+
# Bypass the real __init__ (no HTTP, no polling, no analytics). The
44+
# _evaluation_context property setter rebuilds the overrides index,
45+
# matching what happens on a real environment refresh.
46+
client = Flagsmith.__new__(Flagsmith)
47+
client.offline_mode = False
48+
client.enable_local_evaluation = True
49+
client.offline_handler = None
50+
client.default_flag_handler = None
51+
client.enable_realtime_updates = False
52+
client.lazy_identity_evaluation = lazy_identity_evaluation
53+
client._analytics_processor = None
54+
client._pipeline_analytics_processor = None
55+
client._environment_updated_at = None
56+
client._evaluation_context = map_environment_document_to_context(env_doc)
57+
return client
58+
59+
60+
def _bench(
61+
label: str,
62+
fn: typing.Callable[[], object],
63+
*,
64+
iters: int,
65+
warmup: int,
66+
) -> None:
67+
for _ in range(warmup):
68+
fn()
69+
# Break iters into 20 batches to get a cheap stdev estimate.
70+
samples: typing.List[float] = []
71+
batch = max(1, iters // 20)
72+
remaining = iters
73+
while remaining > 0:
74+
n = min(batch, remaining)
75+
t0 = time.perf_counter()
76+
for _ in range(n):
77+
fn()
78+
samples.append((time.perf_counter() - t0) / n)
79+
remaining -= n
80+
p50 = statistics.median(samples) * 1e6
81+
mean = statistics.fmean(samples) * 1e6
82+
stdev = statistics.pstdev(samples) * 1e6
83+
print(f"{label:<60} p50={p50:8.2f} µs mean={mean:8.2f} µs stdev={stdev:6.2f} µs")
84+
85+
86+
def run_matrix(iters: int, warmup: int) -> None:
87+
traits = {"venue_id": "12345"}
88+
89+
scenarios: typing.List[typing.Tuple[str, typing.Dict[str, int]]] = [
90+
("customer-shape (0 overrides)", {"n_features": 420, "with_segments": 30}),
91+
(
92+
"flag targeted by 10 overrides",
93+
{
94+
"n_features": 420,
95+
"with_segments": 30,
96+
"overrides_per_queried_flag": 10,
97+
},
98+
),
99+
(
100+
"segmenting-heavy (200 segs)",
101+
{"n_features": 420, "with_segments": 200},
102+
),
103+
]
104+
105+
for label, kwargs in scenarios:
106+
print(f"\n=== {label} ===")
107+
lazy_client = _make_client(lazy_identity_evaluation=True, **kwargs)
108+
eager_client = _make_client(lazy_identity_evaluation=False, **kwargs)
109+
110+
probe_flags = eager_client.get_identity_flags(
111+
identifier="anonymous",
112+
traits=traits,
113+
)
114+
probe_name = next(iter(probe_flags.flags))
115+
rotating_names = list(probe_flags.flags.keys())
116+
117+
for mode, client in [("eager", eager_client), ("lazy ", lazy_client)]:
118+
_bench(
119+
f" {mode} | get_identity_flags().is_feature_enabled(one)",
120+
lambda c=client: c.get_identity_flags( # type: ignore[misc]
121+
identifier="anonymous",
122+
traits=traits,
123+
).is_feature_enabled(probe_name),
124+
iters=iters,
125+
warmup=warmup,
126+
)
127+
128+
rotating_state = {"i": 0}
129+
130+
def rotating(c: Flagsmith = client) -> None:
131+
idx = rotating_state["i"]
132+
rotating_state["i"] = (idx + 1) % len(rotating_names)
133+
c.get_identity_flags(
134+
identifier="anonymous",
135+
traits=traits,
136+
).is_feature_enabled(rotating_names[idx])
137+
138+
_bench(
139+
f" {mode} | get_identity_flags().is_feature_enabled(rotating)",
140+
rotating,
141+
iters=iters,
142+
warmup=warmup,
143+
)
144+
145+
_bench(
146+
f" {mode} | get_identity_flags().all_flags()",
147+
lambda c=client: c.get_identity_flags( # type: ignore[misc]
148+
identifier="anonymous",
149+
traits=traits,
150+
).all_flags(),
151+
iters=iters,
152+
warmup=warmup,
153+
)
154+
155+
156+
def main() -> None:
157+
parser = argparse.ArgumentParser()
158+
parser.add_argument("--iters", type=int, default=2000)
159+
parser.add_argument("--warmup", type=int, default=300)
160+
args = parser.parse_args()
161+
run_matrix(args.iters, args.warmup)
162+
163+
164+
if __name__ == "__main__":
165+
main()

benchmarks/env.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""Synthetic environment builder for local-evaluation benchmarks.
2+
3+
Produces a document shaped like the real Flagsmith environment payload
4+
so we can exercise the SDK's local-eval paths without any network or
5+
fixture-file dependencies. Parameters mirror the shape reported in the
6+
Slack thread driving the lazy-evaluation work: many features, a handful
7+
of CSV-IN segments, optionally several segments that all override the
8+
same queried flag.
9+
"""
10+
11+
from __future__ import annotations
12+
13+
import typing
14+
15+
16+
def build_environment(
17+
n_features: int = 420,
18+
*,
19+
with_multivariate: int = 0,
20+
with_segments: int = 0,
21+
overrides_per_queried_flag: int = 0,
22+
queried_feature_name: str = "feature_0000",
23+
) -> typing.Dict[str, typing.Any]:
24+
"""Return a Flagsmith environment document.
25+
26+
:param n_features: total feature count; dominant driver of eager-eval cost.
27+
:param with_multivariate: how many of the first N features carry a
28+
2-way multivariate split (exercises the variant hash path).
29+
:param with_segments: how many segments to attach. Each has a single
30+
IN condition on ``venue_id`` with a comma-separated CSV value list.
31+
:param overrides_per_queried_flag: how many of those segments also
32+
carry an override for ``queried_feature_name`` (at priorities
33+
0..N-1, so the lowest-priority match wins deterministically).
34+
:param queried_feature_name: the feature whose evaluation the bench
35+
times; only meaningful when ``overrides_per_queried_flag > 0``.
36+
"""
37+
feature_states: typing.List[typing.Dict[str, typing.Any]] = []
38+
for i in range(n_features):
39+
name = f"feature_{i:04d}"
40+
fs: typing.Dict[str, typing.Any] = {
41+
"django_id": i + 1,
42+
"feature": {"name": name, "type": "STANDARD", "id": i + 1},
43+
"feature_state_value": f"value-{i}",
44+
"enabled": bool(i % 2),
45+
"featurestate_uuid": f"fs-{i:04d}",
46+
"feature_segment": None,
47+
"multivariate_feature_state_values": [],
48+
}
49+
if with_multivariate and i < with_multivariate:
50+
fs["multivariate_feature_state_values"] = [
51+
{
52+
"multivariate_feature_option": {"value": f"mv-{i}-a"},
53+
"percentage_allocation": 50.0,
54+
"id": (i + 1) * 100 + 1,
55+
},
56+
{
57+
"multivariate_feature_option": {"value": f"mv-{i}-b"},
58+
"percentage_allocation": 50.0,
59+
"id": (i + 1) * 100 + 2,
60+
},
61+
]
62+
feature_states.append(fs)
63+
64+
segments: typing.List[typing.Dict[str, typing.Any]] = []
65+
queried_feature_id: typing.Optional[int] = None
66+
for fs in feature_states:
67+
if fs["feature"]["name"] == queried_feature_name:
68+
queried_feature_id = fs["feature"]["id"]
69+
break
70+
for i in range(with_segments):
71+
csv_values = ",".join(str(v) for v in range(i * 100, i * 100 + 50))
72+
segment: typing.Dict[str, typing.Any] = {
73+
"id": 10_000 + i,
74+
"name": f"segment_{i}",
75+
"rules": [
76+
{
77+
"type": "ALL",
78+
"rules": [],
79+
"conditions": [
80+
{
81+
"property_": "venue_id",
82+
"operator": "IN",
83+
"value": csv_values,
84+
}
85+
],
86+
}
87+
],
88+
"feature_states": [],
89+
}
90+
if i < overrides_per_queried_flag and queried_feature_id is not None:
91+
segment["feature_states"] = [
92+
{
93+
"django_id": 90_000 + i,
94+
"feature": {
95+
"name": queried_feature_name,
96+
"type": "STANDARD",
97+
"id": queried_feature_id,
98+
},
99+
"feature_state_value": f"override-{i}",
100+
"enabled": True,
101+
"featurestate_uuid": f"override-{i:04d}",
102+
"feature_segment": {"priority": i},
103+
"multivariate_feature_state_values": [],
104+
}
105+
]
106+
segments.append(segment)
107+
108+
return {
109+
"api_key": "B62qaMZNwfiqT76p38ggrQ",
110+
"name": "Benchmark Environment",
111+
"project": {
112+
"name": "Benchmark project",
113+
"organisation": {
114+
"feature_analytics": False,
115+
"name": "Benchmark Org",
116+
"id": 1,
117+
"persist_trait_data": True,
118+
"stop_serving_flags": False,
119+
},
120+
"id": 1,
121+
"hide_disabled_flags": False,
122+
"segments": segments,
123+
},
124+
"segment_overrides": [],
125+
"id": 1,
126+
"feature_states": feature_states,
127+
"identity_overrides": [],
128+
}

flagsmith/flagsmith.py

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,13 @@
2121
map_segment_results_to_identity_segments,
2222
resolve_trait_values,
2323
)
24-
from flagsmith.models import DefaultFlag, Flags, Segment
24+
from flagsmith.models import (
25+
DefaultFlag,
26+
Flags,
27+
Segment,
28+
SegmentOverridesIndex,
29+
build_segment_overrides_index,
30+
)
2531
from flagsmith.offline_handlers import OfflineHandler
2632
from flagsmith.polling_manager import EnvironmentDataPollingManager
2733
from flagsmith.streaming_manager import EventStreamManager
@@ -77,6 +83,7 @@ def __init__(
7783
offline_handler: typing.Optional[OfflineHandler] = None,
7884
enable_realtime_updates: bool = False,
7985
application_metadata: typing.Optional[ApplicationMetadata] = None,
86+
lazy_identity_evaluation: bool = True,
8087
):
8188
"""
8289
:param environment_key: The environment key obtained from Flagsmith interface.
@@ -105,6 +112,11 @@ def __init__(
105112
default_flag_handler if offline_mode is not set and using remote evaluation.
106113
:param enable_realtime_updates: Use real-time functionality via SSE as opposed to polling the API
107114
:param application_metadata: Optional metadata about the client application.
115+
:param lazy_identity_evaluation: When True (default), ``get_identity_flags``
116+
returns a lazy ``Flags`` that resolves flags on first access using a
117+
precomputed segment-overrides index, rather than evaluating every
118+
feature in the environment up-front. Set to False to opt back into
119+
the legacy eager path if you hit a regression.
108120
"""
109121

110122
self.offline_mode = offline_mode
@@ -113,11 +125,13 @@ def __init__(
113125
self.offline_handler = offline_handler
114126
self.default_flag_handler = default_flag_handler
115127
self.enable_realtime_updates = enable_realtime_updates
128+
self.lazy_identity_evaluation = lazy_identity_evaluation
116129
self._analytics_processor: typing.Optional[AnalyticsProcessor] = None
117130
self._pipeline_analytics_processor: typing.Optional[
118131
PipelineAnalyticsProcessor
119132
] = None
120-
self._evaluation_context: typing.Optional[SDKEvaluationContext] = None
133+
self.__evaluation_context: typing.Optional[SDKEvaluationContext] = None
134+
self._segment_overrides_index: SegmentOverridesIndex = {}
121135
self._environment_updated_at: typing.Optional[datetime] = None
122136

123137
# argument validation
@@ -356,6 +370,26 @@ def update_environment(self) -> None:
356370
except (KeyError, TypeError, ValueError):
357371
logger.exception("Error parsing environment document")
358372

373+
@property
374+
def _evaluation_context(self) -> typing.Optional[SDKEvaluationContext]:
375+
return self.__evaluation_context
376+
377+
@_evaluation_context.setter
378+
def _evaluation_context(
379+
self, context: typing.Optional[SDKEvaluationContext]
380+
) -> None:
381+
"""Swap in a new evaluation context and rebuild the overrides index.
382+
383+
The index maps feature_name -> segments that override it. Built once
384+
per refresh and reused across every subsequent per-identity lazy
385+
resolution; rebuilding here keeps it in sync with the current doc
386+
without any hot-path cost.
387+
"""
388+
self.__evaluation_context = context
389+
self._segment_overrides_index = (
390+
build_segment_overrides_index(context) if context is not None else {}
391+
)
392+
359393
def _get_headers(
360394
self,
361395
environment_key: str,
@@ -407,6 +441,19 @@ def _get_identity_flags_from_document(
407441
identifier=identifier,
408442
traits=traits,
409443
)
444+
if self.lazy_identity_evaluation:
445+
# Lazy path: defer per-feature evaluation until the caller
446+
# actually reads a flag. Hot for callers that only read one
447+
# or a few flags out of a large environment.
448+
return Flags.from_evaluation_context(
449+
context=context,
450+
overrides_index=self._segment_overrides_index,
451+
analytics_processor=self._analytics_processor,
452+
default_flag_handler=self.default_flag_handler,
453+
pipeline_analytics_processor=self._pipeline_analytics_processor,
454+
identity_identifier=identifier,
455+
traits=resolve_trait_values(traits),
456+
)
410457
evaluation_result = engine.get_evaluation_result(
411458
context=context,
412459
)

0 commit comments

Comments
 (0)