Skip to content

Commit 2262b51

Browse files
committed
Document null field handling for PyArrow
1 parent b647f72 commit 2262b51

2 files changed

Lines changed: 24 additions & 2 deletions

File tree

pyiceberg/io/pyarrow.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,9 @@ def visit_uuid(self, _: UUIDType) -> pa.DataType:
779779
return pa.uuid()
780780

781781
def visit_unknown(self, _: UnknownType) -> pa.DataType:
782+
"""
783+
UnknownType can be promoted to any primitive type in V3+ tables per the Iceberg spec
784+
"""
782785
return pa.null()
783786

784787
def visit_binary(self, _: BinaryType) -> pa.DataType:
@@ -1358,6 +1361,8 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
13581361
primitive = cast(pa.FixedSizeBinaryType, primitive)
13591362
return FixedType(primitive.byte_width)
13601363
elif pa.types.is_null(primitive):
1364+
# PyArrow null type (pa.null()) is converted to Iceberg UnknownType
1365+
# UnknownType can be promoted to any primitive type in V3+ tables per the Iceberg spec
13611366
return UnknownType()
13621367
elif isinstance(primitive, pa.UuidType):
13631368
return UUIDType()

pyiceberg/schema.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,15 @@ def _(file_type: FixedType, read_type: IcebergType) -> IcebergType:
16921692
raise ResolveError(f"Cannot promote {file_type} to {read_type}")
16931693

16941694

1695+
@promote.register(UnknownType)
1696+
def _(file_type: UnknownType, read_type: IcebergType) -> IcebergType:
1697+
# Per V3 Spec, "Unknown" can be promoted to any Primitive type
1698+
if isinstance(read_type, PrimitiveType):
1699+
return read_type
1700+
else:
1701+
raise ResolveError(f"Cannot promote {file_type} to {read_type}")
1702+
1703+
16951704
def _check_schema_compatible(requested_schema: Schema, provided_schema: Schema) -> None:
16961705
"""
16971706
Check if the `provided_schema` is compatible with `requested_schema`.
@@ -1760,8 +1769,16 @@ def _is_field_compatible(self, lhs: NestedField) -> bool:
17601769
promote(rhs.field_type, lhs.field_type)
17611770
self.rich_table.add_row("✅", str(lhs), str(rhs))
17621771
return True
1763-
except ResolveError:
1764-
self.rich_table.add_row("❌", str(lhs), str(rhs))
1772+
except ResolveError as e:
1773+
# UnknownType can only be promoted to Primitive types
1774+
if isinstance(rhs.field_type, UnknownType):
1775+
if isinstance(lhs.field_type, (ListType, MapType, StructType)):
1776+
error_msg = f"PyArrow null type (UnknownType) cannot be promoted to non-primitive type {lhs.field_type}. UnknownType can only be promoted to primitive types (string, int, boolean, etc.) in V3+ tables."
1777+
else:
1778+
error_msg = f"PyArrow null type (UnknownType) cannot be promoted to {lhs.field_type}. This may be due to table format version limitations (V1/V2 tables don't support UnknownType promotion)."
1779+
self.rich_table.add_row("❌", str(lhs), f"{str(rhs)} - {error_msg}")
1780+
else:
1781+
self.rich_table.add_row("❌", str(lhs), str(rhs))
17651782
return False
17661783

17671784
def schema(self, schema: Schema, struct_result: Callable[[], bool]) -> bool:

0 commit comments

Comments
 (0)