Skip to content

Commit 14a3bba

Browse files
committed
feat(expressions): add truthiness semantics for constant boolean expressions
`AlwaysTrue`/`AlwaysFalse` extend `IcebergRootModel[bool]` but defined no `__bool__`, so they used default object truthiness and were both always truthy. That made `bool(AlwaysFalse())` return `True`, so intuitive control-flow patterns (`if not visit_starts_with(...)`, `expr or AlwaysTrue()`) behaved incorrectly for the FALSE constant. Define `__bool__` on the two constants (`True`/`False`) and make the base `BooleanExpression.__bool__` raise `TypeError`, so truthiness is only defined for the constants and an accidental `if predicate:` is an explicit error rather than a silently-true no-op. `not expr` returns a Python bool; `~expr` still returns the negated expression. Per the audit kevinjqliu requested on the issue, fix the one optional-expression check that relied on truthiness instead of presence: REST scan-planning residual handling (`FileScanTask.from_rest_response`) now uses `is not None`, so an explicit residual filter is preserved and not treated as missing. Also update the `expr or AlwaysTrue()` default in the pyarrow `project` test helper. Tests cover constant truthiness, control-flow branching, `TypeError` for non-constant expressions, and the REST residual path. Closes #3543
1 parent 9d36e23 commit 14a3bba

5 files changed

Lines changed: 70 additions & 2 deletions

File tree

pyiceberg/expressions/__init__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,20 @@ def __or__(self, other: BooleanExpression) -> BooleanExpression:
7070

7171
return Or(self, other)
7272

73+
def __bool__(self) -> bool:
74+
"""Reject truthiness checks on non-constant expressions.
75+
76+
Truthiness is only defined for the constant expressions ``AlwaysTrue`` and
77+
``AlwaysFalse``, which override this method. Evaluating a predicate such as
78+
``if EqualTo("x", 1):`` is almost always a mistake; use ``~expr`` to negate
79+
an expression or compare explicitly against ``AlwaysTrue()``/``AlwaysFalse()``.
80+
"""
81+
raise TypeError(
82+
f"The truth value of {type(self).__name__} is ambiguous. "
83+
"Truthiness is only defined for AlwaysTrue() and AlwaysFalse(); "
84+
"use ~expr to negate an expression or compare against AlwaysTrue()/AlwaysFalse()."
85+
)
86+
7387
@model_validator(mode="wrap")
7488
@classmethod
7589
def handle_primitive_type(cls, v: Any, handler: ValidatorFunctionWrapHandler) -> BooleanExpression:
@@ -455,6 +469,10 @@ def __invert__(self) -> AlwaysFalse:
455469
"""Transform the Expression into its negated version."""
456470
return AlwaysFalse()
457471

472+
def __bool__(self) -> bool:
473+
"""Return True, the constant value of this expression."""
474+
return True
475+
458476
def __str__(self) -> str:
459477
"""Return the string representation of the AlwaysTrue class."""
460478
return "AlwaysTrue()"
@@ -473,6 +491,10 @@ def __invert__(self) -> AlwaysTrue:
473491
"""Transform the Expression into its negated version."""
474492
return AlwaysTrue()
475493

494+
def __bool__(self) -> bool:
495+
"""Return False, the constant value of this expression."""
496+
return False
497+
476498
def __str__(self) -> str:
477499
"""Return the string representation of the AlwaysFalse class."""
478500
return "AlwaysFalse()"

pyiceberg/table/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2103,7 +2103,7 @@ def from_rest_response(
21032103
return FileScanTask(
21042104
data_file=data_file,
21052105
delete_files=resolved_deletes,
2106-
residual=rest_task.residual_filter if rest_task.residual_filter else ALWAYS_TRUE,
2106+
residual=rest_task.residual_filter if rest_task.residual_filter is not None else ALWAYS_TRUE,
21072107
)
21082108

21092109

tests/catalog/test_scan_planning_models.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
)
4040
from pyiceberg.expressions import AlwaysTrue, EqualTo, Reference
4141
from pyiceberg.manifest import FileFormat
42+
from pyiceberg.table import FileScanTask
4243

4344
TEST_URI = "https://iceberg-test-catalog/"
4445

@@ -242,6 +243,24 @@ def test_scan_task_with_residual_filter_true() -> None:
242243
assert isinstance(task.residual_filter, AlwaysTrue)
243244

244245

246+
def test_from_rest_response_preserves_non_constant_residual_filter() -> None:
247+
data = {
248+
"data-file": _rest_data_file(),
249+
"residual-filter": {"type": "eq", "term": "x", "value": 1},
250+
}
251+
rest_task = RESTFileScanTask.model_validate(data)
252+
task = FileScanTask.from_rest_response(rest_task, [])
253+
assert task.residual == EqualTo(Reference("x"), 1)
254+
255+
256+
def test_from_rest_response_defaults_missing_residual_filter_to_always_true() -> None:
257+
data = {"data-file": _rest_data_file()}
258+
rest_task = RESTFileScanTask.model_validate(data)
259+
assert rest_task.residual_filter is None
260+
task = FileScanTask.from_rest_response(rest_task, [])
261+
assert task.residual == AlwaysTrue()
262+
263+
245264
def test_empty_scan_tasks() -> None:
246265
data: dict[str, Any] = {
247266
"delete-files": [],

tests/expressions/test_expressions.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,33 @@ def test_invert_always() -> None:
639639
assert ~AlwaysTrue() == AlwaysFalse()
640640

641641

642+
def test_always_bool() -> None:
643+
assert bool(AlwaysTrue()) is True
644+
assert bool(AlwaysFalse()) is False
645+
646+
647+
def test_always_bool_control_flow() -> None:
648+
assert (1 if AlwaysTrue() else 0) == 1
649+
assert (1 if AlwaysFalse() else 0) == 0
650+
assert not AlwaysFalse()
651+
assert not (not AlwaysTrue())
652+
653+
654+
@pytest.mark.parametrize(
655+
"expression",
656+
[
657+
EqualTo("x", 1),
658+
IsNull("x"),
659+
And(EqualTo("x", 1), IsNull("y")),
660+
Or(EqualTo("x", 1), IsNull("y")),
661+
Not(EqualTo("x", 1)),
662+
],
663+
)
664+
def test_non_constant_expression_bool_raises(expression: BooleanExpression) -> None:
665+
with pytest.raises(TypeError, match="truth value"):
666+
bool(expression)
667+
668+
642669
def test_accessor_base_class() -> None:
643670
"""Test retrieving a value at a position of a container using an accessor"""
644671

tests/io/test_pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1137,7 +1137,7 @@ def _set_spec_id(datafile: DataFile) -> DataFile:
11371137
),
11381138
io=PyArrowFileIO(),
11391139
projected_schema=schema,
1140-
row_filter=expr or AlwaysTrue(),
1140+
row_filter=expr if expr is not None else AlwaysTrue(),
11411141
case_sensitive=True,
11421142
).to_table(
11431143
tasks=[

0 commit comments

Comments
 (0)