From 02e415048823ede1737431361e22a9ad580df427 Mon Sep 17 00:00:00 2001 From: Rahil Chertara Date: Mon, 12 May 2025 08:16:54 -0700 Subject: [PATCH 1/3] Add additional properties when doing conversion for hudi --- .../xtable/service/ConversionService.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java b/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java index 1d4ad32e3..ab3a1a4ad 100644 --- a/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java +++ b/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java @@ -19,6 +19,7 @@ package org.apache.xtable.service; import static org.apache.xtable.conversion.ConversionUtils.convertToSourceTable; +import static org.apache.xtable.hudi.HudiSourceConfig.PARTITION_FIELD_SPEC_CONFIG; import static org.apache.xtable.model.storage.TableFormat.DELTA; import static org.apache.xtable.model.storage.TableFormat.HUDI; import static org.apache.xtable.model.storage.TableFormat.ICEBERG; @@ -27,6 +28,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Properties; import lombok.extern.log4j.Log4j2; @@ -186,11 +188,24 @@ public ConversionService( * @return a ConvertTableResponse containing details of the converted target tables */ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest) { + + Properties sourceProperties = new Properties(); + if (convertTableRequest.getConfigurations() != null) { + String partitionSpec = + convertTableRequest.getConfigurations().getOrDefault("partition-spec", null); + if (partitionSpec != null + && (HUDI.equals(convertTableRequest.getSourceFormat()) + || convertTableRequest.getTargetFormats().contains(HUDI))) { + sourceProperties.put(PARTITION_FIELD_SPEC_CONFIG, partitionSpec); + } + } + SourceTable sourceTable = SourceTable.builder() .name(convertTableRequest.getSourceTableName()) .basePath(convertTableRequest.getSourceTablePath()) .formatName(convertTableRequest.getSourceFormat()) + .additionalProperties(sourceProperties) .build(); List targetTables = new ArrayList<>(); @@ -200,6 +215,7 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest .name(convertTableRequest.getSourceTableName()) .basePath(convertTableRequest.getSourceTablePath()) .formatName(targetFormat) + .additionalProperties(sourceProperties) .build(); targetTables.add(targetTable); } @@ -220,7 +236,7 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest String schemaString = extractSchemaString(targetTable, internalTable); convertedTables.add( ConvertedTable.builder() - .targetFormat(internalTable.getName()) + .targetFormat(internalTable.getTableFormat()) .targetSchema(schemaString) .targetMetadataPath(internalTable.getLatestMetdataPath()) .build()); From 2e680eec2e2ac8c3b1c463767426a289929f1226 Mon Sep 17 00:00:00 2001 From: Rahil Chertara Date: Thu, 29 May 2025 00:59:38 -0700 Subject: [PATCH 2/3] fix other issue with hudi partitiong when iceberg is source, and fix UT --- .../org/apache/xtable/service/ConversionService.java | 10 +++++----- .../xtable/service/models/ConvertTableRequest.java | 5 +++++ .../apache/xtable/service/TestConversionService.java | 10 +++++++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java b/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java index ab3a1a4ad..675ae5a50 100644 --- a/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java +++ b/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java @@ -192,10 +192,8 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest Properties sourceProperties = new Properties(); if (convertTableRequest.getConfigurations() != null) { String partitionSpec = - convertTableRequest.getConfigurations().getOrDefault("partition-spec", null); - if (partitionSpec != null - && (HUDI.equals(convertTableRequest.getSourceFormat()) - || convertTableRequest.getTargetFormats().contains(HUDI))) { + convertTableRequest.getConfigurations().getOrDefault("partition-spec", null); + if (partitionSpec != null) { sourceProperties.put(PARTITION_FIELD_SPEC_CONFIG, partitionSpec); } } @@ -204,6 +202,7 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest SourceTable.builder() .name(convertTableRequest.getSourceTableName()) .basePath(convertTableRequest.getSourceTablePath()) + .dataPath(convertTableRequest.getSourceDataPath()) .formatName(convertTableRequest.getSourceFormat()) .additionalProperties(sourceProperties) .build(); @@ -213,7 +212,8 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest TargetTable targetTable = TargetTable.builder() .name(convertTableRequest.getSourceTableName()) - .basePath(convertTableRequest.getSourceTablePath()) + // set the metadata path to the data path as the default (required by Hudi) + .basePath(convertTableRequest.getSourceDataPath()) .formatName(targetFormat) .additionalProperties(sourceProperties) .build(); diff --git a/xtable-service/src/main/java/org/apache/xtable/service/models/ConvertTableRequest.java b/xtable-service/src/main/java/org/apache/xtable/service/models/ConvertTableRequest.java index 465c3c0c0..1e151300c 100644 --- a/xtable-service/src/main/java/org/apache/xtable/service/models/ConvertTableRequest.java +++ b/xtable-service/src/main/java/org/apache/xtable/service/models/ConvertTableRequest.java @@ -39,6 +39,9 @@ public class ConvertTableRequest { @JsonProperty("source-table-path") private String sourceTablePath; + @JsonProperty("source-data-path") + private String sourceDataPath; + @JsonProperty("target-formats") private List targetFormats; @@ -52,12 +55,14 @@ public ConvertTableRequest( @JsonProperty("source-format") String sourceFormat, @JsonProperty("source-table-name") String sourceTableName, @JsonProperty("source-table-path") String sourceTablePath, + @JsonProperty("source-data-path") String sourceDataPath, @JsonProperty("target-format") List targetFormat, @JsonProperty("configurations") Map configurations) { this.sourceFormat = sourceFormat; this.sourceTableName = sourceTableName; this.sourceTablePath = sourceTablePath; + this.sourceDataPath = sourceDataPath; this.targetFormats = targetFormat; this.configurations = configurations; } diff --git a/xtable-service/src/test/java/org/apache/xtable/service/TestConversionService.java b/xtable-service/src/test/java/org/apache/xtable/service/TestConversionService.java index d22b561cf..465b1c4ce 100644 --- a/xtable-service/src/test/java/org/apache/xtable/service/TestConversionService.java +++ b/xtable-service/src/test/java/org/apache/xtable/service/TestConversionService.java @@ -59,6 +59,7 @@ class TestConversionService { private static final String SOURCE_NAME = "users"; private static final String SOURCE_PATH = "s3://bucket/tables/users"; + private static final String SOURCE_DATA_PATH = "s3://bucket/tables/users/data"; private static final String HUDI_META_PATH = "s3://bucket/tables/users/.hoodie"; private static final String ICEBERG_META_PATH = "s3://bucket/tables/users/metadata/v1.metadata.json"; @@ -111,6 +112,7 @@ void convertToTargetHudi() { .sourceFormat(TableFormat.DELTA) .sourceTableName(SOURCE_NAME) .sourceTablePath(SOURCE_PATH) + .sourceDataPath(SOURCE_DATA_PATH) .targetFormats(Collections.singletonList(TableFormat.HUDI)) .build(); @@ -120,7 +122,7 @@ void convertToTargetHudi() { when(provider.getConversionSourceInstance(any())).thenReturn(conversionSrc); when(conversionSrc.getCurrentTable()).thenReturn(internalTbl); - when(internalTbl.getName()).thenReturn(TableFormat.HUDI); + when(internalTbl.getTableFormat()).thenReturn(TableFormat.HUDI); when(internalTbl.getLatestMetdataPath()).thenReturn(HUDI_META_PATH); when(internalTbl.getReadSchema()).thenReturn(internalSchema); @@ -146,6 +148,7 @@ void convertToTargetIceberg() { .sourceFormat(TableFormat.DELTA) .sourceTableName(SOURCE_NAME) .sourceTablePath(SOURCE_PATH) + .sourceDataPath(SOURCE_DATA_PATH) .targetFormats(Collections.singletonList(TableFormat.ICEBERG)) .build(); @@ -157,7 +160,7 @@ void convertToTargetIceberg() { when(provider.getConversionSourceInstance(any())).thenReturn(conversionSrc); when(conversionSrc.getCurrentTable()).thenReturn(internalTbl); - when(internalTbl.getName()).thenReturn(TableFormat.ICEBERG); + when(internalTbl.getTableFormat()).thenReturn(TableFormat.ICEBERG); when(internalTbl.getLatestMetdataPath()).thenReturn(ICEBERG_META_PATH); when(internalTbl.getReadSchema()).thenReturn(internalSchema); @@ -185,6 +188,7 @@ void convertToTargetDelta() { .sourceFormat(TableFormat.ICEBERG) .sourceTableName(SOURCE_NAME) .sourceTablePath(SOURCE_PATH) + .sourceDataPath(SOURCE_DATA_PATH) .targetFormats(Collections.singletonList(TableFormat.DELTA)) .build(); @@ -194,7 +198,7 @@ void convertToTargetDelta() { when(provider.getConversionSourceInstance(any())).thenReturn(conversionSrc); when(conversionSrc.getCurrentTable()).thenReturn(internalTbl); - when(internalTbl.getName()).thenReturn(TableFormat.DELTA); + when(internalTbl.getTableFormat()).thenReturn(TableFormat.DELTA); when(internalTbl.getLatestMetdataPath()).thenReturn(DELTA_META_PATH); when(internalTbl.getReadSchema()).thenReturn(internalSchema); From 44ac6e570d393f1d216c42914b715f090f0c8c8d Mon Sep 17 00:00:00 2001 From: Rahil Chertara Date: Thu, 29 May 2025 01:21:56 -0700 Subject: [PATCH 3/3] spotless apply --- .../main/java/org/apache/xtable/service/ConversionService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java b/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java index 675ae5a50..7d19512a2 100644 --- a/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java +++ b/xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java @@ -192,7 +192,7 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest Properties sourceProperties = new Properties(); if (convertTableRequest.getConfigurations() != null) { String partitionSpec = - convertTableRequest.getConfigurations().getOrDefault("partition-spec", null); + convertTableRequest.getConfigurations().getOrDefault("partition-spec", null); if (partitionSpec != null) { sourceProperties.put(PARTITION_FIELD_SPEC_CONFIG, partitionSpec); }