Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions core/src/main/java/org/apache/iceberg/CatalogUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.iceberg.catalog.Catalog;
Expand Down Expand Up @@ -597,12 +598,18 @@ public static void deleteRemovedMetadataFiles(
// the log, thus we don't include metadata.previousFiles() for deletion - everything else can
// be removed
removedPreviousMetadataFiles.removeAll(metadata.previousFiles());
deleteFiles(
io,

Set<String> metadataFilesToDelete =
removedPreviousMetadataFiles.stream()
.map(TableMetadata.MetadataLogEntry::file)
.collect(Collectors.toSet()),
"metadata");
.collect(Collectors.toCollection(Sets::newHashSet));
// delete base's metadata file too if log is empty
if (metadata.previousFiles().isEmpty()
&& !Objects.equals(base.metadataFileLocation(), metadata.metadataFileLocation())) {
metadataFilesToDelete.add(base.metadataFileLocation());
}

deleteFiles(io, metadataFilesToDelete, "metadata");
}
}
}
6 changes: 5 additions & 1 deletion core/src/main/java/org/apache/iceberg/TableMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -1800,12 +1800,16 @@ private static List<MetadataLogEntry> addPreviousFile(

int maxSize =
Math.max(
1,
0,

@smaheshwar-pltr smaheshwar-pltr Jun 13, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pointing out that a negative value for this property now corresponds to an empty log - previously, it was a log of maximum size one. This is technically a break, but that felt fine to me?

PropertyUtil.propertyAsInt(
properties,
TableProperties.METADATA_PREVIOUS_VERSIONS_MAX,
TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT));

if (maxSize == 0) {
return Lists.newArrayList();
}

List<MetadataLogEntry> newMetadataLog;
if (previousFiles.size() >= maxSize) {
int removeIndex = previousFiles.size() - maxSize + 1;
Expand Down
75 changes: 75 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestCatalogUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.anyBoolean;
import static org.mockito.Mockito.anyString;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.withSettings;

Expand All @@ -49,6 +53,7 @@
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;

public class TestCatalogUtil {

Expand Down Expand Up @@ -320,6 +325,76 @@ public void noFailureWhenBulkDeletingMetadataFiles() {
.doesNotThrowAnyException();
}

@Test
public void deletesSupersededMetadataFileWhenPreviousVersionsMaxIsZero() {
FileIO io = mock(FileIO.class);

TableMetadata.MetadataLogEntry entry1 =
new TableMetadata.MetadataLogEntry(
System.currentTimeMillis(), "s3://bucket/metadata/v1.json");

TableMetadata base = mock(TableMetadata.class);
TableMetadata metadata = mock(TableMetadata.class);

when(metadata.propertyAsBoolean(
eq(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED), anyBoolean()))
.thenReturn(true);
when(base.previousFiles()).thenReturn(ImmutableList.of(entry1));
when(base.metadataFileLocation()).thenReturn("s3://bucket/metadata/v2.json");
when(metadata.previousFiles()).thenReturn(ImmutableList.of());

CatalogUtil.deleteRemovedMetadataFiles(io, base, metadata);

ArgumentCaptor<String> deleted = ArgumentCaptor.forClass(String.class);
verify(io, times(2)).deleteFile(deleted.capture());
assertThat(deleted.getAllValues())
.containsExactlyInAnyOrder("s3://bucket/metadata/v1.json", "s3://bucket/metadata/v2.json");
}

@Test
public void keepsSupersededMetadataFileWhenStillTracked() {
FileIO io = mock(FileIO.class);

TableMetadata.MetadataLogEntry entry1 =
new TableMetadata.MetadataLogEntry(
System.currentTimeMillis(), "s3://bucket/metadata/v1.json");
TableMetadata.MetadataLogEntry entry2 =
new TableMetadata.MetadataLogEntry(
System.currentTimeMillis(), "s3://bucket/metadata/v2.json");

TableMetadata base = mock(TableMetadata.class);
TableMetadata metadata = mock(TableMetadata.class);

when(metadata.propertyAsBoolean(
eq(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED), anyBoolean()))
.thenReturn(true);
when(base.previousFiles()).thenReturn(ImmutableList.of(entry1));
when(base.metadataFileLocation()).thenReturn("s3://bucket/metadata/v2.json");
when(metadata.previousFiles()).thenReturn(ImmutableList.of(entry2));

CatalogUtil.deleteRemovedMetadataFiles(io, base, metadata);

ArgumentCaptor<String> deleted = ArgumentCaptor.forClass(String.class);
verify(io, times(1)).deleteFile(deleted.capture());
assertThat(deleted.getAllValues()).containsExactly("s3://bucket/metadata/v1.json");
}

@Test
public void doesNotDeleteCurrentMetadataFileWhenBaseEqualsMetadata() {
FileIO io = mock(FileIO.class);

TableMetadata metadata = mock(TableMetadata.class);
when(metadata.propertyAsBoolean(
eq(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED), anyBoolean()))
.thenReturn(true);
when(metadata.previousFiles()).thenReturn(ImmutableList.of());
when(metadata.metadataFileLocation()).thenReturn("s3://bucket/metadata/v1.json");

CatalogUtil.deleteRemovedMetadataFiles(io, metadata, metadata);

verify(io, never()).deleteFile(anyString());
}

public static class TestCatalog extends BaseMetastoreCatalog {

private String catalogName;
Expand Down
95 changes: 95 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestTableMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,101 @@ public void testAddPreviousMetadataRemoveMultiple() throws IOException {
.isEqualTo(previousMetadataLog.subList(0, 4));
}

@Test
public void testAddPreviousMetadataRemoveAllWhenZero() throws IOException {
long previousSnapshotId = System.currentTimeMillis() - new Random(1234).nextInt(3600);

String manifestList =
createManifestListWithManifestFile(previousSnapshotId, null, "file:/tmp/manifest1.avro");
Snapshot previousSnapshot =
new BaseSnapshot(
0,
previousSnapshotId,
null,
previousSnapshotId,
null,
null,
null,
manifestList,
null,
null,
null);

long currentSnapshotId = System.currentTimeMillis();
manifestList =
createManifestListWithManifestFile(
currentSnapshotId, previousSnapshotId, "file:/tmp/manifest2.avro");
Snapshot currentSnapshot =
new BaseSnapshot(
0,
currentSnapshotId,
previousSnapshotId,
currentSnapshotId,
null,
null,
null,
manifestList,
null,
null,
null);

List<HistoryEntry> reversedSnapshotLog = Lists.newArrayList();
reversedSnapshotLog.add(
new SnapshotLogEntry(previousSnapshot.timestampMillis(), previousSnapshotId));
reversedSnapshotLog.add(
new SnapshotLogEntry(currentSnapshot.timestampMillis(), currentSnapshotId));
long currentTimestamp = System.currentTimeMillis();
List<MetadataLogEntry> previousMetadataLog = Lists.newArrayList();
previousMetadataLog.add(
new MetadataLogEntry(
currentTimestamp - 100, "/tmp/000001-" + UUID.randomUUID() + ".metadata.json"));
previousMetadataLog.add(
new MetadataLogEntry(
currentTimestamp - 90, "/tmp/000002-" + UUID.randomUUID() + ".metadata.json"));
previousMetadataLog.add(
new MetadataLogEntry(
currentTimestamp - 80, "/tmp/000003-" + UUID.randomUUID() + ".metadata.json"));

MetadataLogEntry latestPreviousMetadata =
new MetadataLogEntry(
currentTimestamp - 50, "/tmp/000004-" + UUID.randomUUID() + ".metadata.json");

TableMetadata base =
new TableMetadata(
latestPreviousMetadata.file(),
1,
UUID.randomUUID().toString(),
TEST_LOCATION,
0,
currentTimestamp - 50,
3,
7,
ImmutableList.of(TEST_SCHEMA),
SPEC_5.specId(),
ImmutableList.of(SPEC_5),
SPEC_5.lastAssignedFieldId(),
SortOrder.unsorted().orderId(),
ImmutableList.of(SortOrder.unsorted()),
ImmutableMap.of("property", "value"),
currentSnapshotId,
Arrays.asList(previousSnapshot, currentSnapshot),
null,
reversedSnapshotLog,
ImmutableList.copyOf(previousMetadataLog),
ImmutableMap.of(),
ImmutableList.of(),
ImmutableList.of(),
0L,
ImmutableList.of(),
ImmutableList.of());

TableMetadata metadata =
base.replaceProperties(
ImmutableMap.of(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, "0"));

assertThat(metadata.previousFiles()).isEmpty();
}

@Test
public void testV2UUIDValidation() {
assertThatThrownBy(
Expand Down
10 changes: 10 additions & 0 deletions core/src/test/java/org/apache/iceberg/catalog/CatalogTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -3231,6 +3231,16 @@ public void testMetadataFileLocationsRemovalAfterCommit() {
metadataFileLocations = ReachableFileUtil.metadataFileLocations(table, false);
assertThat(metadataFileLocations).hasSize(maxPreviousVersionsToKeep + 1);
}

// with previous-versions-max=0 no previous metadata files are tracked, so each commit deletes
// the superseded metadata file and only the current one remains
table.updateProperties().set(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, "0").commit();

for (int i = 1; i <= 5; i++) {
table.updateSchema().addColumn("f" + i, Types.LongType.get()).commit();
metadataFileLocations = ReachableFileUtil.metadataFileLocations(table, false);
assertThat(metadataFileLocations).hasSize(1);
}
}

@Test
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Iceberg tables support table properties to configure table behavior, like the de
| write.wap.enabled | false | Enables write-audit-publish writes |
| write.summary.partition-limit | 0 | Includes partition-level summary stats in snapshot summaries if the changed partition count is less than this limit |
| write.metadata.delete-after-commit.enabled | false | Controls whether to delete the oldest **tracked** version metadata files after each table commit. See the [Remove old metadata files](maintenance.md#remove-old-metadata-files) section for additional details |
| write.metadata.previous-versions-max | 100 | The max number of previous version metadata files to track |
| write.metadata.previous-versions-max | 100 | The max number of previous version metadata files to track. A value of `0` keeps no previous metadata files (an empty `metadata-log`) |

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pointing out that I'm not including a spec change in this PR because I kind of think

A list (optional) of timestamp and metadata file location pairs that encodes changes to the previous metadata files for the table. Each time a new metadata file is created, a new entry of the previous metadata file location should be added to the list. Tables can be configured to remove oldest metadata log entries and keep a fixed-size log of the most recent entries after a commit.

from https://iceberg.apache.org/spec/#table-metadata is worded consistently with a value of 0?

| write.spark.fanout.enabled | false | Enables the fanout writer in Spark that does not require data to be clustered; uses more memory |
| write.object-storage.enabled | false | Enables the object storage location provider that adds a hash component to file paths |
| write.object-storage.partitioned-paths | true | Includes the partition values in the file path |
Expand Down
2 changes: 2 additions & 0 deletions docs/docs/maintenance.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ Untracked metadata files are also deleted as part of [orphan file deletion](#del
| write.metadata.delete-after-commit.enabled | false | Controls whether to delete the oldest **tracked** version metadata files after each table commit |
| write.metadata.previous-versions-max | 100 | The max number of previous version metadata files to track |

Setting `write.metadata.previous-versions-max=0` keeps no previous metadata files, so the `metadata-log` is always empty. With `write.metadata.delete-after-commit.enabled=true`, the superseded metadata file is deleted on every commit, leaving only the current metadata file; lowering the property to `0` also deletes any files still tracked in the existing `metadata-log` on the next commit.

Examples:

* With `write.metadata.delete-after-commit.enabled=false` and `write.metadata.previous-versions-max=10`, after 100 commits, one will have 10 tracked metadata files and 90 orphaned metadata files. These 90 orphaned metadata files cannot be deleted by setting `write.metadata.delete-after-commit.enabled=true` because they are already untracked. They can only be cleaned with an orphan file deletion procedure.
Expand Down