From 8a4c2c6a450747beb99f3571792bd7a329e715be Mon Sep 17 00:00:00 2001 From: Sung Yun <107272191+syun64@users.noreply.github.com> Date: Wed, 24 Jul 2024 00:42:21 +0000 Subject: [PATCH] fix type issues --- pyiceberg/io/pyarrow.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 87dfe3b7ba..3a6ce06edc 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -73,7 +73,7 @@ from pyiceberg.conversions import to_bytes from pyiceberg.exceptions import ResolveError -from pyiceberg.expressions import AlwaysTrue, BooleanExpression, BoundIsNaN, BoundIsNull, BoundReference, BoundTerm, Not, Or +from pyiceberg.expressions import AlwaysTrue, BooleanExpression, BoundIsNaN, BoundIsNull, BoundTerm, Not, Or from pyiceberg.expressions.literals import Literal from pyiceberg.expressions.visitors import ( BoundBooleanExpressionVisitor, @@ -764,31 +764,18 @@ def _expression_to_complementary_pyarrow(expr: BooleanExpression) -> pc.Expressi collector = _NullNaNUnmentionedTermsCollector() collector.collect(expr) - def _downcast_term_to_reference(bound_terms: Set[BoundTerm[Any]]) -> Set[BoundReference[Any]]: - """Handle mypy check for BoundTerm -> BoundReference.""" - bound_refs: Set[BoundReference[Any]] = set() - for t in bound_terms: - if not isinstance(t, BoundReference): - raise ValueError("Collected Bound Term that is not reference.") - else: - bound_refs.add(t) - return bound_refs - - null_unmentioned_bound_refs: Set[BoundReference[Any]] = _downcast_term_to_reference(collector.null_unmentioned_bound_terms) - nan_unmentioned_bound_refs: Set[BoundReference[Any]] = _downcast_term_to_reference(collector.nan_unmentioned_bound_terms) - - # Convert the set of references to a sorted list so that layout of the expression to build is deterministic. - null_unmentioned_bound_refs_sorted: List[BoundReference[Any]] = sorted( - null_unmentioned_bound_refs, key=lambda ref: ref.field.name + # Convert the set of terms to a sorted list so that layout of the expression to build is deterministic. + null_unmentioned_bound_terms_sorted: List[BoundTerm[Any]] = sorted( + collector.null_unmentioned_bound_terms, key=lambda term: term.ref().field.name ) - nan_unmentioned_bound_refs_sorted: List[BoundReference[Any]] = sorted( - nan_unmentioned_bound_refs, key=lambda ref: ref.field.name + nan_unmentioned_bound_terms_sorted: List[BoundTerm[Any]] = sorted( + collector.nan_unmentioned_bound_terms, key=lambda term: term.ref().field.name ) preserve_expr: BooleanExpression = Not(expr) - for term in null_unmentioned_bound_refs_sorted: + for term in null_unmentioned_bound_terms_sorted: preserve_expr = Or(preserve_expr, BoundIsNull(term=term)) - for term in nan_unmentioned_bound_refs_sorted: + for term in nan_unmentioned_bound_terms_sorted: preserve_expr = Or(preserve_expr, BoundIsNaN(term=term)) return expression_to_pyarrow(preserve_expr)