HypothesisWorks · ianhi · Mar 9, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -83,6 +83,7 @@ their individual contributions.
 * `Hal Blackburn <https://github.com/h4l>`_
 * `Hugo van Kemenade <https://github.com/hugovk>`_
 * `Humberto Rocha <https://github.com/humrochagf>`_
+* `Ian Hunt-Isaak <https://github.com/ianhi>`_
 * `Ilya Lebedev <https://github.com/melevir>`_ (melevir@gmail.com)
 * `Israel Fruchter <https://github.com/fruch>`_
 * `Ivan Tham <https://github.com/pickfire>`_

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,12 @@
+RELEASE_TYPE: patch
+
+This patch improves :class:`~hypothesis.errors.FlakyStrategyDefinition` error
+messages to describe *what* changed between runs (e.g. different constraints,
+different types, or a different number of draws), making it much easier to
+diagnose flaky data generation. Duplicate errors are no longer raised when a
+single mismatch triggers multiple checks, and when a real test failure is found
+alongside a flaky strategy error, the real failure is now reported cleanly with
+a warning about the flaky issue. Stateful tests also gain context about which
+steps led to the error when observability is enabled.
+
+Thanks to Ian Hunt-Isaak for this contribution!
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -60,6 +60,7 @@
     FailedHealthCheck,
     FlakyFailure,
     FlakyReplay,
+    FlakyStrategyDefinition,
     Found,
     Frozen,
     HypothesisException,
@@ -953,6 +954,12 @@ def __init__(
 
         self._runner: ConjectureRunner | None = None
 
+    @property
+    def has_suppressed_flaky_error(self) -> bool:
+        return (
+            self._runner is not None and self._runner.suppressed_flaky_error is not None
+        )
+
     @property
     def test_identifier(self) -> str:
         return getattr(
@@ -1239,6 +1246,7 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
             raise
         except (
             FailedHealthCheck,
+            FlakyStrategyDefinition,
             *skip_exceptions_to_reraise(),
         ):
             # These are fatal errors or control exceptions that should stop the
@@ -1570,6 +1578,13 @@ def run_engine(self):
                         "as a decorator on your test case"
                     )
 
+        if flaky := runner.suppressed_flaky_error:
+            report(
+                "WARNING: a flaky strategy definition error was detected "
+                "during shrinking and suppressed in favor of the real "
+                f"failure above.\n  {flaky}"
+            )
+
         _raise_to_user(
             errors_to_report,
             self.settings,
@@ -2218,7 +2233,10 @@ def wrapped_test(*arguments, **kwargs):
                         wrapped_test._hypothesis_internal_use_generated_seed
                     )
                     with local_settings(settings):
-                        if not (state.failed_normally or generated_seed is None):
+                        if generated_seed is not None and (
+                            not state.failed_normally
+                            or state.has_suppressed_flaky_error
+                        ):
                             if running_under_pytest:
                                 report(
                                     f"You can add @seed({generated_seed}) to this test or "

@@ -60,6 +60,28 @@ class PreviouslyUnseenBehaviour(HypothesisException):
 )
 
 
+def _flaky_strat_msg_with_detail(detail: str) -> str:
+    return f"{_FLAKY_STRAT_MSG}\n\n{detail}"
+
+
+def _mismatch_detail(expected: tuple[str, object], actual: tuple[str, object]) -> str:
+    expected_type, expected_constraints = expected
+    actual_type, actual_constraints = actual
+    if actual_type != expected_type:
+        return (
+            f"The second run drew a different type of value "
+            f"than the first run.\n"
+            f"  first run:  {expected_type}\n"
+            f"  second run: {actual_type}\n"
+        )
+    return (
+        f"The second run drew {actual_type} with different "
+        f"constraints than the first run.\n"
+        f"  first run:  {expected_constraints}\n"
+        f"  second run: {actual_constraints}\n"
+    )
+
+
 EMPTY: frozenset[int] = frozenset()
 
 
@@ -442,7 +464,7 @@ def mark_forced(self, i: int) -> None:
             self.__forced = set()
         self.__forced.add(i)
 
-    def split_at(self, i: int) -> None:
+    def split_at(self, i: int, *, new_value: object = None) -> None:
         """
         Splits the tree so that it can incorporate a decision at the draw call
         corresponding to the node at position i.
@@ -451,7 +473,13 @@ def split_at(self, i: int) -> None:
         """
 
         if i in self.forced:
-            raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+            raise FlakyStrategyDefinition(
+                _flaky_strat_msg_with_detail(
+                    f"The {self.choice_types[i]} value was forced to "
+                    f"{self.values[i]!r} in the first run, but the second "
+                    f"run drew {new_value!r}.\n"
+                )
+            )
 
         assert not self.is_exhausted
 
@@ -931,7 +959,7 @@ def _draw_from_cache(
         key: ChoiceT,
         random: Random,
     ) -> ChoiceT:
-        (generator, children, rejected) = self._get_children_cache(
+        generator, children, rejected = self._get_children_cache(
             choice_type, constraints, key=key
         )
         # Keep a stock of 100 potentially-valid children at all times.
@@ -961,7 +989,7 @@ def _reject_child(
         child: ChoiceT,
         key: ChoiceT,
     ) -> None:
-        (_generator, children, rejected) = self._get_children_cache(
+        _generator, children, rejected = self._get_children_cache(
             choice_type, constraints, key=key
         )
         rejected.add(child)
@@ -999,6 +1027,7 @@ def __init__(self, tree: DataTree):
         self._index_in_current_node: int = 0
         self._trail: list[TreeNode] = [self._current_node]
         self.killed: bool = False
+        self.flaky: bool = False
 
     def draw_integer(
         self, value: int, *, was_forced: bool, constraints: IntegerConstraints
@@ -1050,17 +1079,35 @@ def draw_value(
                 choice_type != node.choice_types[i]
                 or constraints != node.constraints[i]
             ):
-                raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+                self.flaky = True
+                raise FlakyStrategyDefinition(
+                    _flaky_strat_msg_with_detail(
+                        _mismatch_detail(
+                            (node.choice_types[i], node.constraints[i]),
+                            (choice_type, constraints),
+                        )
+                    )
+                )
             # Note that we don't check whether a previously
             # forced value is now free. That will be caught
             # if we ever split the node there, but otherwise
             # may pass silently. This is acceptable because it
             # means we skip a hash set lookup on every
             # draw and that's a pretty niche failure mode.
-            if was_forced and i not in node.forced:
-                raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
-            if value != node.values[i]:
-                node.split_at(i)
+            elif was_forced and i not in node.forced:
+                self.flaky = True
+                raise FlakyStrategyDefinition(
+                    _flaky_strat_msg_with_detail(
+                        f"The {choice_type} value was forced to a specific "
+                        f"value but was not forced on the first run.\n"
+                    )
+                )
+            elif value != node.values[i]:
+                try:
+                    node.split_at(i, new_value=value)
+                except FlakyStrategyDefinition:
+                    self.flaky = True
+                    raise
                 assert i == len(node.values)
                 new_node = TreeNode()
                 assert isinstance(node.transition, Branch)
@@ -1095,19 +1142,32 @@ def draw_value(
                     compute_max_children(choice_type, constraints) == 1
                     and not was_forced
                 ):
-                    node.split_at(i)
+                    node.split_at(i, new_value=value)
                     assert isinstance(node.transition, Branch)
                     self._current_node = node.transition.children[value]
                     self._index_in_current_node = 0
             elif isinstance(trans, Conclusion):
                 assert trans.status != Status.OVERRUN
                 # We tried to draw where history says we should have
                 # stopped
-                raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+                self.flaky = True
+                raise FlakyStrategyDefinition(
+                    _flaky_strat_msg_with_detail(
+                        "The second run drew more data than the first run.\n"
+                    )
+                )
             else:
                 assert isinstance(trans, Branch), trans
                 if choice_type != trans.choice_type or constraints != trans.constraints:
-                    raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+                    self.flaky = True
+                    raise FlakyStrategyDefinition(
+                        _flaky_strat_msg_with_detail(
+                            _mismatch_detail(
+                                (trans.choice_type, trans.constraints),
+                                (choice_type, constraints),
+                            )
+                        )
+                    )
                 try:
                     self._current_node = trans.children[value]
                 except KeyError:
@@ -1127,7 +1187,12 @@ def kill_branch(self) -> None:
             self._current_node.transition is not None
             and not isinstance(self._current_node.transition, Killed)
         ):
-            raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+            raise FlakyStrategyDefinition(
+                _flaky_strat_msg_with_detail(
+                    "The second run stopped drawing earlier than the "
+                    "first run, which continued to draw more data.\n"
+                )
+            )
 
         if self._current_node.transition is None:
             self._current_node.transition = Killed(TreeNode())
@@ -1144,11 +1209,18 @@ def conclude_test(
         node if necessary and checks for consistency."""
         if status == Status.OVERRUN:
             return
+        if self.flaky:
+            return
         i = self._index_in_current_node
         node = self._current_node
 
         if i < len(node.values) or isinstance(node.transition, Branch):
-            raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
+            raise FlakyStrategyDefinition(
+                _flaky_strat_msg_with_detail(
+                    "The second run stopped drawing earlier than the "
+                    "first run, which continued to draw more data.\n"
+                )
+            )
 
         new_transition = Conclusion(status, interesting_origin)
 

@@ -27,12 +27,18 @@
 from hypothesis.errors import (
     BackendCannotProceed,
     FlakyBackendFailure,
+    FlakyStrategyDefinition,
     HypothesisException,
     InvalidArgument,
     StopTest,
 )
 from hypothesis.internal.cache import LRUReusedCache
-from hypothesis.internal.compat import NotRequired, TypedDict, ceil, override
+from hypothesis.internal.compat import (
+    NotRequired,
+    TypedDict,
+    ceil,
+    override,
+)
 from hypothesis.internal.conjecture.choice import (
     ChoiceConstraintsT,
     ChoiceKeyT,
@@ -315,6 +321,7 @@ def __init__(
         self.first_bug_found_time: float = math.inf
 
         self.shrunk_examples: set[InterestingOrigin] = set()
+        self.suppressed_flaky_error: FlakyStrategyDefinition | None = None
         self.health_check_state: HealthCheckState | None = None
         self.tree: DataTree = DataTree()
         self.provider: PrimitiveProvider | type[PrimitiveProvider] = _get_provider(
@@ -562,6 +569,23 @@ def _backend_cannot_proceed(
             interrupted = True
             data.freeze()
             return
+        except FlakyStrategyDefinition:
+            data.freeze()
+            # _stateful_repr_parts is:
+            # None for non-stateful tests
+            # a list when steps were recorded
+            if data._stateful_repr_parts:
+                report(
+                    "Steps leading up to this error:\n"
+                    + "\n".join(f"  {s}" for s in data._stateful_repr_parts)
+                )
+            elif data._stateful_repr_parts is not None:
+                report(
+                    "Tip: to see which steps led to this error, re-run with "
+                    "HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY=1"
+                )
+            self.save_choices(data.choices)
+            raise
         except BaseException:
             data.freeze()
             if self.settings.backend != "hypothesis":
@@ -967,6 +991,13 @@ def run(self) -> None:
                 self._run()
             except RunIsComplete:
                 pass
+            except FlakyStrategyDefinition as e:
+                if not self.interesting_examples:
+                    raise
+                self.statistics["stopped-because"] = (
+                    "a flaky strategy was detected during shrinking"
+                )
+                self.suppressed_flaky_error = e
             for v in self.interesting_examples.values():
                 self.debug_data(v)
             self.debug(

diff --git a/hypothesis-python/src/hypothesis/stateful.py b/hypothesis-python/src/hypothesis/stateful.py
@@ -271,8 +271,10 @@ def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_step
         if flaky_state["selecting_rule"]:
             add_note(
                 err,
-                "while selecting a rule to run. This is usually caused by "
-                "a flaky precondition, or a bundle that was unexpectedly empty.",
+                "This error occurred while selecting a rule to run. This is "
+                "usually caused by a flaky precondition, a bundle that "
+                "was unexpectedly empty, or a rule that depends on external "
+                "state such as time or a global variable.",
             )
         raise
 

diff --git a/hypothesis-python/tests/conjecture/test_data_tree.py b/hypothesis-python/tests/conjecture/test_data_tree.py
@@ -545,10 +545,7 @@ def test_datatree_repr(bool_constraints, int_constraints):
     observer.draw_boolean(False, was_forced=False, constraints=bool_constraints)
     observer.draw_integer(5, was_forced=False, constraints=int_constraints)
 
-    assert (
-        pretty.pretty(tree)
-        == textwrap.dedent(
-            f"""
+    assert pretty.pretty(tree) == textwrap.dedent(f"""
         boolean True {bool_constraints}
           Conclusion (Status.INVALID)
         boolean False {bool_constraints}
@@ -559,9 +556,7 @@ def test_datatree_repr(bool_constraints, int_constraints):
               Conclusion (Status.INTERESTING, {origin})
           integer 5 {int_constraints}
             unknown
-        """
-        ).strip()
-    )
+        """).strip()
 
 
 def _draw(data, node, *, forced=None):
@@ -581,3 +576,15 @@ def test_simulate_forced_floats(node):
     tree.simulate_test_function(data)
     data.freeze()
     assert data.nodes == (node,)
+
+
+def test_type_mismatch_gives_detail():
+    tree = DataTree()
+    data = ConjectureData.for_choices((1,), observer=tree.new_observer())
+    data.draw_integer(0, 1)
+    with pytest.raises(StopTest):
+        data.conclude_test(Status.INTERESTING)
+
+    data = ConjectureData.for_choices((True,), observer=tree.new_observer())
+    with pytest.raises(Flaky, match="different type"):
+        data.draw_boolean()