Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ their individual contributions.
* `Hal Blackburn <https://github.com/h4l>`_
* `Hugo van Kemenade <https://github.com/hugovk>`_
* `Humberto Rocha <https://github.com/humrochagf>`_
* `Ian Hunt-Isaak <https://github.com/ianhi>`_
* `Ilya Lebedev <https://github.com/melevir>`_ (melevir@gmail.com)
* `Israel Fruchter <https://github.com/fruch>`_
* `Ivan Tham <https://github.com/pickfire>`_
Expand Down
12 changes: 12 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
RELEASE_TYPE: patch

This patch improves :class:`~hypothesis.errors.FlakyStrategyDefinition` error
messages to describe *what* changed between runs (e.g. different constraints,
different types, or a different number of draws), making it much easier to
diagnose flaky data generation. Duplicate errors are no longer raised when a
single mismatch triggers multiple checks, and when a real test failure is found
alongside a flaky strategy error, the real failure is now reported cleanly with
a warning about the flaky issue. Stateful tests also gain context about which
steps led to the error when observability is enabled.

Thanks to Ian Hunt-Isaak for this contribution!
20 changes: 19 additions & 1 deletion hypothesis-python/src/hypothesis/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
FailedHealthCheck,
FlakyFailure,
FlakyReplay,
FlakyStrategyDefinition,
Found,
Frozen,
HypothesisException,
Expand Down Expand Up @@ -953,6 +954,12 @@ def __init__(

self._runner: ConjectureRunner | None = None

@property
def has_suppressed_flaky_error(self) -> bool:
return (
self._runner is not None and self._runner.suppressed_flaky_error is not None
)

@property
def test_identifier(self) -> str:
return getattr(
Expand Down Expand Up @@ -1239,6 +1246,7 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
raise
except (
FailedHealthCheck,
FlakyStrategyDefinition,
*skip_exceptions_to_reraise(),
):
# These are fatal errors or control exceptions that should stop the
Expand Down Expand Up @@ -1570,6 +1578,13 @@ def run_engine(self):
"as a decorator on your test case"
)

if flaky := runner.suppressed_flaky_error:
report(
"WARNING: a flaky strategy definition error was detected "
"during shrinking and suppressed in favor of the real "
f"failure above.\n {flaky}"
)

_raise_to_user(
errors_to_report,
self.settings,
Expand Down Expand Up @@ -2218,7 +2233,10 @@ def wrapped_test(*arguments, **kwargs):
wrapped_test._hypothesis_internal_use_generated_seed
)
with local_settings(settings):
if not (state.failed_normally or generated_seed is None):
if generated_seed is not None and (
not state.failed_normally
or state.has_suppressed_flaky_error
):
if running_under_pytest:
report(
f"You can add @seed({generated_seed}) to this test or "
Expand Down
100 changes: 86 additions & 14 deletions hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,28 @@ class PreviouslyUnseenBehaviour(HypothesisException):
)


def _flaky_strat_msg_with_detail(detail: str) -> str:
return f"{_FLAKY_STRAT_MSG}\n\n{detail}"


def _mismatch_detail(expected: tuple[str, object], actual: tuple[str, object]) -> str:
expected_type, expected_constraints = expected
actual_type, actual_constraints = actual
if actual_type != expected_type:
return (
f"The second run drew a different type of value "
f"than the first run.\n"
f" first run: {expected_type}\n"
f" second run: {actual_type}\n"
)
return (
f"The second run drew {actual_type} with different "
f"constraints than the first run.\n"
f" first run: {expected_constraints}\n"
f" second run: {actual_constraints}\n"
)


EMPTY: frozenset[int] = frozenset()


Expand Down Expand Up @@ -442,7 +464,7 @@ def mark_forced(self, i: int) -> None:
self.__forced = set()
self.__forced.add(i)

def split_at(self, i: int) -> None:
def split_at(self, i: int, *, new_value: object = None) -> None:
"""
Splits the tree so that it can incorporate a decision at the draw call
corresponding to the node at position i.
Expand All @@ -451,7 +473,13 @@ def split_at(self, i: int) -> None:
"""

if i in self.forced:
raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
raise FlakyStrategyDefinition(
_flaky_strat_msg_with_detail(
f"The {self.choice_types[i]} value was forced to "
f"{self.values[i]!r} in the first run, but the second "
f"run drew {new_value!r}.\n"
)
)

assert not self.is_exhausted

Expand Down Expand Up @@ -931,7 +959,7 @@ def _draw_from_cache(
key: ChoiceT,
random: Random,
) -> ChoiceT:
(generator, children, rejected) = self._get_children_cache(
generator, children, rejected = self._get_children_cache(
choice_type, constraints, key=key
)
# Keep a stock of 100 potentially-valid children at all times.
Expand Down Expand Up @@ -961,7 +989,7 @@ def _reject_child(
child: ChoiceT,
key: ChoiceT,
) -> None:
(_generator, children, rejected) = self._get_children_cache(
_generator, children, rejected = self._get_children_cache(
choice_type, constraints, key=key
)
rejected.add(child)
Expand Down Expand Up @@ -999,6 +1027,7 @@ def __init__(self, tree: DataTree):
self._index_in_current_node: int = 0
self._trail: list[TreeNode] = [self._current_node]
self.killed: bool = False
self.flaky: bool = False

def draw_integer(
self, value: int, *, was_forced: bool, constraints: IntegerConstraints
Expand Down Expand Up @@ -1050,17 +1079,35 @@ def draw_value(
choice_type != node.choice_types[i]
or constraints != node.constraints[i]
):
raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
self.flaky = True
raise FlakyStrategyDefinition(
_flaky_strat_msg_with_detail(
_mismatch_detail(
(node.choice_types[i], node.constraints[i]),
(choice_type, constraints),
)
)
)
# Note that we don't check whether a previously
# forced value is now free. That will be caught
# if we ever split the node there, but otherwise
# may pass silently. This is acceptable because it
# means we skip a hash set lookup on every
# draw and that's a pretty niche failure mode.
if was_forced and i not in node.forced:
raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
if value != node.values[i]:
node.split_at(i)
elif was_forced and i not in node.forced:
self.flaky = True
raise FlakyStrategyDefinition(
_flaky_strat_msg_with_detail(
f"The {choice_type} value was forced to a specific "
f"value but was not forced on the first run.\n"
)
)
elif value != node.values[i]:
try:
node.split_at(i, new_value=value)
except FlakyStrategyDefinition:
self.flaky = True
raise
assert i == len(node.values)
new_node = TreeNode()
assert isinstance(node.transition, Branch)
Expand Down Expand Up @@ -1095,19 +1142,32 @@ def draw_value(
compute_max_children(choice_type, constraints) == 1
and not was_forced
):
node.split_at(i)
node.split_at(i, new_value=value)
assert isinstance(node.transition, Branch)
self._current_node = node.transition.children[value]
self._index_in_current_node = 0
elif isinstance(trans, Conclusion):
assert trans.status != Status.OVERRUN
# We tried to draw where history says we should have
# stopped
raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
self.flaky = True
raise FlakyStrategyDefinition(
_flaky_strat_msg_with_detail(
"The second run drew more data than the first run.\n"
)
)
else:
assert isinstance(trans, Branch), trans
if choice_type != trans.choice_type or constraints != trans.constraints:
raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
self.flaky = True
raise FlakyStrategyDefinition(
_flaky_strat_msg_with_detail(
_mismatch_detail(
(trans.choice_type, trans.constraints),
(choice_type, constraints),
)
)
)
try:
self._current_node = trans.children[value]
except KeyError:
Expand All @@ -1127,7 +1187,12 @@ def kill_branch(self) -> None:
self._current_node.transition is not None
and not isinstance(self._current_node.transition, Killed)
):
raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
raise FlakyStrategyDefinition(
_flaky_strat_msg_with_detail(
"The second run stopped drawing earlier than the "
"first run, which continued to draw more data.\n"
)
)

if self._current_node.transition is None:
self._current_node.transition = Killed(TreeNode())
Expand All @@ -1144,11 +1209,18 @@ def conclude_test(
node if necessary and checks for consistency."""
if status == Status.OVERRUN:
return
if self.flaky:
return
i = self._index_in_current_node
node = self._current_node

if i < len(node.values) or isinstance(node.transition, Branch):
raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG)
raise FlakyStrategyDefinition(
_flaky_strat_msg_with_detail(
"The second run stopped drawing earlier than the "
"first run, which continued to draw more data.\n"
)
)

new_transition = Conclusion(status, interesting_origin)

Expand Down
33 changes: 32 additions & 1 deletion hypothesis-python/src/hypothesis/internal/conjecture/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,18 @@
from hypothesis.errors import (
BackendCannotProceed,
FlakyBackendFailure,
FlakyStrategyDefinition,
HypothesisException,
InvalidArgument,
StopTest,
)
from hypothesis.internal.cache import LRUReusedCache
from hypothesis.internal.compat import NotRequired, TypedDict, ceil, override
from hypothesis.internal.compat import (
NotRequired,
TypedDict,
ceil,
override,
)
from hypothesis.internal.conjecture.choice import (
ChoiceConstraintsT,
ChoiceKeyT,
Expand Down Expand Up @@ -315,6 +321,7 @@ def __init__(
self.first_bug_found_time: float = math.inf

self.shrunk_examples: set[InterestingOrigin] = set()
self.suppressed_flaky_error: FlakyStrategyDefinition | None = None
self.health_check_state: HealthCheckState | None = None
self.tree: DataTree = DataTree()
self.provider: PrimitiveProvider | type[PrimitiveProvider] = _get_provider(
Expand Down Expand Up @@ -562,6 +569,23 @@ def _backend_cannot_proceed(
interrupted = True
data.freeze()
return
except FlakyStrategyDefinition:
data.freeze()
# _stateful_repr_parts is:
# None for non-stateful tests
# a list when steps were recorded
if data._stateful_repr_parts:
report(
"Steps leading up to this error:\n"
+ "\n".join(f" {s}" for s in data._stateful_repr_parts)
)
elif data._stateful_repr_parts is not None:
report(
"Tip: to see which steps led to this error, re-run with "
"HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY=1"
)
self.save_choices(data.choices)
raise
except BaseException:
data.freeze()
if self.settings.backend != "hypothesis":
Expand Down Expand Up @@ -967,6 +991,13 @@ def run(self) -> None:
self._run()
except RunIsComplete:
pass
except FlakyStrategyDefinition as e:
if not self.interesting_examples:
raise
self.statistics["stopped-because"] = (
"a flaky strategy was detected during shrinking"
)
self.suppressed_flaky_error = e
for v in self.interesting_examples.values():
self.debug_data(v)
self.debug(
Expand Down
6 changes: 4 additions & 2 deletions hypothesis-python/src/hypothesis/stateful.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,10 @@ def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_step
if flaky_state["selecting_rule"]:
add_note(
err,
"while selecting a rule to run. This is usually caused by "
"a flaky precondition, or a bundle that was unexpectedly empty.",
"This error occurred while selecting a rule to run. This is "
"usually caused by a flaky precondition, a bundle that "
"was unexpectedly empty, or a rule that depends on external "
"state such as time or a global variable.",
)
raise

Expand Down
21 changes: 14 additions & 7 deletions hypothesis-python/tests/conjecture/test_data_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,10 +545,7 @@ def test_datatree_repr(bool_constraints, int_constraints):
observer.draw_boolean(False, was_forced=False, constraints=bool_constraints)
observer.draw_integer(5, was_forced=False, constraints=int_constraints)

assert (
pretty.pretty(tree)
== textwrap.dedent(
f"""
assert pretty.pretty(tree) == textwrap.dedent(f"""
boolean True {bool_constraints}
Conclusion (Status.INVALID)
boolean False {bool_constraints}
Expand All @@ -559,9 +556,7 @@ def test_datatree_repr(bool_constraints, int_constraints):
Conclusion (Status.INTERESTING, {origin})
integer 5 {int_constraints}
unknown
"""
).strip()
)
""").strip()


def _draw(data, node, *, forced=None):
Expand All @@ -581,3 +576,15 @@ def test_simulate_forced_floats(node):
tree.simulate_test_function(data)
data.freeze()
assert data.nodes == (node,)


def test_type_mismatch_gives_detail():
tree = DataTree()
data = ConjectureData.for_choices((1,), observer=tree.new_observer())
data.draw_integer(0, 1)
with pytest.raises(StopTest):
data.conclude_test(Status.INTERESTING)

data = ConjectureData.for_choices((True,), observer=tree.new_observer())
with pytest.raises(Flaky, match="different type"):
data.draw_boolean()
Loading
Loading