Skip to content

Commit be4e2a7

Browse files
committed
fix: preserve HMS table properties during commits
This change fixes HMS-specific table properties being lost during commits by merging parameters instead of replacing them. Fixes: #2926
1 parent 2b84bf5 commit be4e2a7

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

pyiceberg/catalog/hive.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,11 +551,22 @@ def commit_table(
551551

552552
if hive_table and current_table:
553553
# Table exists, update it.
554-
hive_table.parameters = _construct_parameters(
554+
new_parameters = _construct_parameters(
555555
metadata_location=updated_staged_table.metadata_location,
556556
previous_metadata_location=current_table.metadata_location,
557557
metadata_properties=updated_staged_table.properties,
558558
)
559+
560+
# Detect properties that were removed from Iceberg metadata
561+
old_iceberg_keys = set(current_table.properties.keys())
562+
new_iceberg_keys = set(updated_staged_table.properties.keys())
563+
removed_keys = old_iceberg_keys - new_iceberg_keys
564+
565+
# Start with current HMS parameters, remove deleted Iceberg properties, then update with new ones
566+
updated_parameters = {k: v for k, v in hive_table.parameters.items() if k not in removed_keys}
567+
updated_parameters.update(new_parameters)
568+
hive_table.parameters = updated_parameters
569+
559570
# Update hive's schema and properties
560571
hive_table.sd = _construct_hive_storage_descriptor(
561572
updated_staged_table.schema(),

tests/integration/test_reads.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,44 @@ def test_hive_properties(catalog: Catalog) -> None:
135135
assert hive_table.parameters.get("abc") is None
136136

137137

138+
@pytest.mark.integration
139+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
140+
def test_hive_preserves_hms_specific_properties(catalog: Catalog) -> None:
141+
"""Test that HMS-specific table properties are preserved during table commits.
142+
143+
This verifies that HMS-specific properties that are not managed by Iceberg
144+
are preserved during commits, rather than being lost.
145+
146+
Regression test for: https://github.com/apache/iceberg-python/issues/2926
147+
"""
148+
table = create_table(catalog)
149+
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])
150+
with hive_client as open_client:
151+
hive_table = open_client.get_table(*TABLE_NAME)
152+
# Add HMS-specific properties that aren't managed by Iceberg
153+
hive_table.parameters["table_category"] = "production"
154+
hive_table.parameters["data_owner"] = "data_team"
155+
open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table)
156+
157+
with hive_client as open_client:
158+
hive_table = open_client.get_table(*TABLE_NAME)
159+
assert hive_table.parameters.get("table_category") == "production"
160+
assert hive_table.parameters.get("data_owner") == "data_team"
161+
162+
table.transaction().set_properties({"iceberg_property": "new_value"}).commit_transaction()
163+
164+
# Verify that HMS-specific properties are STILL present after commit
165+
with hive_client as open_client:
166+
hive_table = open_client.get_table(*TABLE_NAME)
167+
# HMS-specific properties should be preserved
168+
assert hive_table.parameters.get("table_category") == "production", (
169+
"HMS property 'table_category' was lost during commit!"
170+
)
171+
assert hive_table.parameters.get("data_owner") == "data_team", "HMS property 'data_owner' was lost during commit!"
172+
# Iceberg properties should also be present
173+
assert hive_table.parameters.get("iceberg_property") == "new_value"
174+
175+
138176
@pytest.mark.integration
139177
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
140178
def test_table_properties_dict(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)