From 8a21601e7fded5fafae3bcba60dfae996fd7f1fc Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Wed, 15 Apr 2026 19:56:32 -0700 Subject: [PATCH 1/6] =?UTF-8?q?API,=20Core:=20Add=20UDF=20leaf=20types=20?= =?UTF-8?q?=E2=80=94=20Representation=20and=20Parameter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../iceberg/udf/SQLUdfRepresentation.java | 34 +++ .../org/apache/iceberg/udf/UdfParameter.java | 38 ++++ .../apache/iceberg/udf/UdfRepresentation.java | 31 +++ .../iceberg/udf/BaseSQLUdfRepresentation.java | 30 +++ .../apache/iceberg/udf/BaseUdfParameter.java | 34 +++ .../udf/SQLUdfRepresentationParser.java | 61 ++++++ .../iceberg/udf/UdfParameterParser.java | 70 ++++++ .../iceberg/udf/UdfRepresentationParser.java | 68 ++++++ .../org/apache/iceberg/udf/UdfTypeUtil.java | 93 ++++++++ .../iceberg/udf/UnknownUdfRepresentation.java | 24 +++ .../udf/TestSQLUdfRepresentationParser.java | 97 +++++++++ .../iceberg/udf/TestUdfParameterParser.java | 149 +++++++++++++ .../udf/TestUdfRepresentationParser.java | 53 +++++ .../apache/iceberg/udf/TestUdfTypeUtil.java | 199 ++++++++++++++++++ 14 files changed, 981 insertions(+) create mode 100644 api/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentation.java create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfParameter.java create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java create mode 100644 core/src/main/java/org/apache/iceberg/udf/BaseSQLUdfRepresentation.java create mode 100644 core/src/main/java/org/apache/iceberg/udf/BaseUdfParameter.java create mode 100644 core/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentationParser.java create mode 100644 core/src/main/java/org/apache/iceberg/udf/UdfParameterParser.java create mode 100644 core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java create mode 100644 core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java create mode 100644 core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java create mode 100644 core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java create mode 100644 core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java create mode 100644 core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java create mode 100644 core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java diff --git a/api/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentation.java b/api/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentation.java new file mode 100644 index 000000000000..00a90bf1a07b --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentation.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +/** A SQL representation of a UDF, containing the SQL expression and dialect. */ +public interface SQLUdfRepresentation extends UdfRepresentation { + + @Override + default String type() { + return Type.SQL; + } + + /** The SQL expression text that defines the function body. */ + String sql(); + + /** The SQL dialect identifier (e.g., "spark", "trino"). */ + String dialect(); +} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java b/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java new file mode 100644 index 000000000000..2e08e92447f7 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import javax.annotation.Nullable; + +/** A parameter declared in a UDF definition. */ +public interface UdfParameter { + + /** The parameter name. */ + String name(); + + /** + * The parameter data type, encoded as a type string for primitives/semi-structured types or as a + * JSON object for nested types (struct, list, map). + */ + Object type(); + + /** Optional documentation string. */ + @Nullable + String doc(); +} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java b/api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java new file mode 100644 index 000000000000..d1884639604a --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +/** A representation of a UDF implementation. */ +public interface UdfRepresentation { + + class Type { + private Type() {} + + public static final String SQL = "sql"; + } + + String type(); +} diff --git a/core/src/main/java/org/apache/iceberg/udf/BaseSQLUdfRepresentation.java b/core/src/main/java/org/apache/iceberg/udf/BaseSQLUdfRepresentation.java new file mode 100644 index 000000000000..43df1e243fd5 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/udf/BaseSQLUdfRepresentation.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import org.immutables.value.Value; + +@Value.Immutable +@Value.Include(value = SQLUdfRepresentation.class) +@SuppressWarnings("ImmutablesStyle") +@Value.Style( + typeImmutable = "ImmutableSQLUdfRepresentation", + visibilityString = "PUBLIC", + builderVisibilityString = "PUBLIC") +interface BaseSQLUdfRepresentation extends SQLUdfRepresentation {} diff --git a/core/src/main/java/org/apache/iceberg/udf/BaseUdfParameter.java b/core/src/main/java/org/apache/iceberg/udf/BaseUdfParameter.java new file mode 100644 index 000000000000..8531a92cbc15 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/udf/BaseUdfParameter.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import javax.annotation.Nullable; +import org.immutables.value.Value; + +@Value.Immutable +@SuppressWarnings("ImmutablesStyle") +@Value.Style( + typeImmutable = "ImmutableUdfParameter", + visibilityString = "PUBLIC", + builderVisibilityString = "PUBLIC") +interface BaseUdfParameter extends UdfParameter { + @Override + @Nullable + String doc(); +} diff --git a/core/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentationParser.java b/core/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentationParser.java new file mode 100644 index 000000000000..7174c8e6de7c --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/udf/SQLUdfRepresentationParser.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class SQLUdfRepresentationParser { + private static final String SQL = "sql"; + private static final String DIALECT = "dialect"; + + private SQLUdfRepresentationParser() {} + + static String toJson(SQLUdfRepresentation representation) { + return JsonUtil.generate(gen -> toJson(representation, gen), false); + } + + static void toJson(SQLUdfRepresentation representation, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(representation != null, "Invalid SQL UDF representation: null"); + generator.writeStartObject(); + generator.writeStringField(UdfRepresentationParser.TYPE, representation.type()); + generator.writeStringField(SQL, representation.sql()); + generator.writeStringField(DIALECT, representation.dialect()); + generator.writeEndObject(); + } + + static SQLUdfRepresentation fromJson(String json) { + return JsonUtil.parse(json, SQLUdfRepresentationParser::fromJson); + } + + static SQLUdfRepresentation fromJson(JsonNode node) { + Preconditions.checkArgument( + node != null, "Cannot parse SQL UDF representation from null object"); + Preconditions.checkArgument( + node.isObject(), "Cannot parse SQL UDF representation from non-object: %s", node); + return ImmutableSQLUdfRepresentation.builder() + .sql(JsonUtil.getString(SQL, node)) + .dialect(JsonUtil.getString(DIALECT, node)) + .build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfParameterParser.java b/core/src/main/java/org/apache/iceberg/udf/UdfParameterParser.java new file mode 100644 index 000000000000..bd9df272c950 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/udf/UdfParameterParser.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class UdfParameterParser { + private static final String NAME = "name"; + private static final String TYPE = "type"; + private static final String DOC = "doc"; + + private UdfParameterParser() {} + + static String toJson(UdfParameter parameter) { + return JsonUtil.generate(gen -> toJson(parameter, gen), false); + } + + static void toJson(UdfParameter parameter, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(parameter != null, "Invalid UDF parameter: null"); + generator.writeStartObject(); + generator.writeStringField(NAME, parameter.name()); + UdfTypeUtil.writeType(TYPE, parameter.type(), generator); + if (parameter.doc() != null) { + generator.writeStringField(DOC, parameter.doc()); + } + + generator.writeEndObject(); + } + + static UdfParameter fromJson(String json) { + return JsonUtil.parse(json, UdfParameterParser::fromJson); + } + + static UdfParameter fromJson(JsonNode node) { + Preconditions.checkArgument(node != null, "Cannot parse UDF parameter from null object"); + Preconditions.checkArgument( + node.isObject(), "Cannot parse UDF parameter from non-object: %s", node); + + ImmutableUdfParameter.Builder builder = + ImmutableUdfParameter.builder() + .name(JsonUtil.getString(NAME, node)) + .type(UdfTypeUtil.readType(node.get(TYPE))); + + if (node.has(DOC)) { + builder.doc(JsonUtil.getString(DOC, node)); + } + + return builder.build(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java b/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java new file mode 100644 index 000000000000..fab2d800a873 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.util.Locale; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class UdfRepresentationParser { + static final String TYPE = "type"; + + private UdfRepresentationParser() {} + + static void toJson(UdfRepresentation representation, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(representation != null, "Invalid UDF representation: null"); + switch (representation.type().toLowerCase(Locale.ENGLISH)) { + case UdfRepresentation.Type.SQL: + SQLUdfRepresentationParser.toJson((SQLUdfRepresentation) representation, generator); + break; + + default: + throw new UnsupportedOperationException( + String.format( + "Cannot serialize unsupported UDF representation: %s", representation.type())); + } + } + + static String toJson(UdfRepresentation entry) { + return JsonUtil.generate(gen -> toJson(entry, gen), false); + } + + static UdfRepresentation fromJson(String json) { + return JsonUtil.parse(json, UdfRepresentationParser::fromJson); + } + + static UdfRepresentation fromJson(JsonNode node) { + Preconditions.checkArgument(node != null, "Cannot parse UDF representation from null object"); + Preconditions.checkArgument( + node.isObject(), "Cannot parse UDF representation from non-object: %s", node); + String type = JsonUtil.getString(TYPE, node).toLowerCase(Locale.ENGLISH); + switch (type) { + case UdfRepresentation.Type.SQL: + return SQLUdfRepresentationParser.fromJson(node); + + default: + return ImmutableUnknownUdfRepresentation.builder().type(type).build(); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java new file mode 100644 index 000000000000..ef0ccd770fb1 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.io.IOException; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +/** + * Utility for reading and writing UDF types. Types can be either a primitive type string (e.g., + * "int", "string", "variant") or a JSON object for nested types (struct, list, map). + */ +class UdfTypeUtil { + + private UdfTypeUtil() {} + + /** + * Reads a UDF type from a JSON node. Returns a String for primitive types or a Map for nested + * types. + */ + static Object readType(JsonNode node) { + Preconditions.checkArgument(node != null, "Cannot read type from null node"); + + if (node.isTextual()) { + return node.asText(); + } else if (node.isObject()) { + return JsonUtil.mapper().convertValue(node, java.util.Map.class); + } else { + throw new IllegalArgumentException( + String.format("Cannot parse UDF type from node: %s", node)); + } + } + + /** + * Writes a UDF type to a JSON generator under the given field name. The type can be a String + * (primitive) or a Map (nested type). + */ + @SuppressWarnings("unchecked") + static void writeType(String fieldName, Object type, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(type != null, "Invalid type: null"); + + if (type instanceof String) { + generator.writeStringField(fieldName, (String) type); + } else if (type instanceof java.util.Map) { + generator.writeFieldName(fieldName); + ObjectNode objectNode = + JsonUtil.mapper().convertValue((java.util.Map) type, ObjectNode.class); + generator.writeTree(objectNode); + } else { + throw new IllegalArgumentException( + String.format("Cannot serialize UDF type: %s (%s)", type, type.getClass().getName())); + } + } + + /** + * Writes a UDF type value (without a field name) to a JSON generator. Used when writing array + * elements. + */ + @SuppressWarnings("unchecked") + static void writeTypeValue(Object type, JsonGenerator generator) throws IOException { + Preconditions.checkArgument(type != null, "Invalid type: null"); + + if (type instanceof String) { + generator.writeString((String) type); + } else if (type instanceof java.util.Map) { + ObjectNode objectNode = + JsonUtil.mapper().convertValue((java.util.Map) type, ObjectNode.class); + generator.writeTree(objectNode); + } else { + throw new IllegalArgumentException( + String.format("Cannot serialize UDF type: %s (%s)", type, type.getClass().getName())); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java b/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java new file mode 100644 index 000000000000..b6031ab034d6 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import org.immutables.value.Value; + +@Value.Immutable +public interface UnknownUdfRepresentation extends UdfRepresentation {} diff --git a/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java new file mode 100644 index 000000000000..1fc960c11d89 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import org.junit.jupiter.api.Test; + +public class TestSQLUdfRepresentationParser { + + @Test + public void testParseSqlUdfRepresentation() { + String json = "{\"type\":\"sql\", \"sql\": \"x + 1\", \"dialect\": \"spark\"}"; + SQLUdfRepresentation representation = + ImmutableSQLUdfRepresentation.builder().sql("x + 1").dialect("spark").build(); + + assertThat(SQLUdfRepresentationParser.fromJson(json)) + .as("Should be able to parse valid SQL UDF representation") + .isEqualTo(representation); + } + + @Test + public void testParseMissingRequiredFields() { + String missingDialect = "{\"type\":\"sql\", \"sql\": \"x + 1\"}"; + assertThatThrownBy(() -> UdfRepresentationParser.fromJson(missingDialect)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: dialect"); + + String missingSql = "{\"type\":\"sql\", \"dialect\": \"spark\"}"; + assertThatThrownBy(() -> UdfRepresentationParser.fromJson(missingSql)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: sql"); + + String missingType = "{\"sql\":\"x + 1\",\"dialect\":\"spark\"}"; + assertThatThrownBy(() -> UdfRepresentationParser.fromJson(missingType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: type"); + } + + @Test + public void testRoundTripSerialization() { + String expectedJson = "{\"type\":\"sql\",\"sql\":\"x + 1\",\"dialect\":\"spark\"}"; + SQLUdfRepresentation representation = + ImmutableSQLUdfRepresentation.builder().sql("x + 1").dialect("spark").build(); + + assertThat(UdfRepresentationParser.toJson(representation)) + .as("Should be able to serialize valid SQL UDF representation") + .isEqualTo(expectedJson); + + assertThat(UdfRepresentationParser.fromJson(UdfRepresentationParser.toJson(representation))) + .isEqualTo(representation); + } + + @Test + public void testRoundTripWithTrinoDialect() { + SQLUdfRepresentation representation = + ImmutableSQLUdfRepresentation.builder().sql("x + 1.0").dialect("trino").build(); + + String serialized = UdfRepresentationParser.toJson(representation); + UdfRepresentation deserialized = UdfRepresentationParser.fromJson(serialized); + + assertThat(deserialized).isInstanceOf(SQLUdfRepresentation.class); + SQLUdfRepresentation sqlRepr = (SQLUdfRepresentation) deserialized; + assertThat(sqlRepr.sql()).isEqualTo("x + 1.0"); + assertThat(sqlRepr.dialect()).isEqualTo("trino"); + assertThat(sqlRepr.type()).isEqualTo("sql"); + } + + @Test + public void testNullSqlUdfRepresentation() { + assertThatThrownBy(() -> SQLUdfRepresentationParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid SQL UDF representation: null"); + + assertThatThrownBy(() -> SQLUdfRepresentationParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse SQL UDF representation from null object"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java new file mode 100644 index 000000000000..64f971768b48 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestUdfParameterParser { + + @Test + public void testParsePrimitiveTypeParameter() { + String json = "{\"name\":\"x\",\"type\":\"int\"}"; + UdfParameter parameter = ImmutableUdfParameter.builder().name("x").type("int").build(); + + UdfParameter parsed = UdfParameterParser.fromJson(json); + assertThat(parsed.name()).isEqualTo("x"); + assertThat(parsed.type()).isEqualTo("int"); + assertThat(parsed.doc()).isNull(); + assertThat(parsed).isEqualTo(parameter); + } + + @Test + public void testParseParameterWithDoc() { + String json = "{\"name\":\"x\",\"type\":\"int\",\"doc\":\"Input integer\"}"; + UdfParameter parameter = + ImmutableUdfParameter.builder().name("x").type("int").doc("Input integer").build(); + + UdfParameter parsed = UdfParameterParser.fromJson(json); + assertThat(parsed.name()).isEqualTo("x"); + assertThat(parsed.type()).isEqualTo("int"); + assertThat(parsed.doc()).isEqualTo("Input integer"); + assertThat(parsed).isEqualTo(parameter); + } + + @Test + public void testParseDecimalTypeParameter() { + String json = "{\"name\":\"amount\",\"type\":\"decimal(9,2)\"}"; + UdfParameter parsed = UdfParameterParser.fromJson(json); + assertThat(parsed.name()).isEqualTo("amount"); + assertThat(parsed.type()).isEqualTo("decimal(9,2)"); + } + + @Test + public void testParseVariantTypeParameter() { + String json = "{\"name\":\"data\",\"type\":\"variant\"}"; + UdfParameter parsed = UdfParameterParser.fromJson(json); + assertThat(parsed.name()).isEqualTo("data"); + assertThat(parsed.type()).isEqualTo("variant"); + } + + @Test + @SuppressWarnings("unchecked") + public void testParseListTypeParameter() { + String json = "{\"name\":\"items\",\"type\":{\"type\":\"list\",\"element\":\"string\"}}"; + UdfParameter parsed = UdfParameterParser.fromJson(json); + assertThat(parsed.name()).isEqualTo("items"); + assertThat(parsed.type()).isInstanceOf(Map.class); + + Map typeMap = (Map) parsed.type(); + assertThat(typeMap).containsEntry("type", "list"); + assertThat(typeMap).containsEntry("element", "string"); + } + + @Test + @SuppressWarnings("unchecked") + public void testParseMapTypeParameter() { + String json = + "{\"name\":\"lookup\",\"type\":{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"; + UdfParameter parsed = UdfParameterParser.fromJson(json); + assertThat(parsed.name()).isEqualTo("lookup"); + assertThat(parsed.type()).isInstanceOf(Map.class); + + Map typeMap = (Map) parsed.type(); + assertThat(typeMap).containsEntry("type", "map"); + assertThat(typeMap).containsEntry("key", "string"); + assertThat(typeMap).containsEntry("value", "int"); + } + + @Test + public void testRoundTripPrimitiveType() { + UdfParameter parameter = + ImmutableUdfParameter.builder().name("x").type("int").doc("Input integer").build(); + + String serialized = UdfParameterParser.toJson(parameter); + UdfParameter deserialized = UdfParameterParser.fromJson(serialized); + + assertThat(deserialized).isEqualTo(parameter); + } + + @Test + @SuppressWarnings("unchecked") + public void testRoundTripNestedType() { + Map listType = ImmutableMap.of("type", "list", "element", "string"); + UdfParameter parameter = ImmutableUdfParameter.builder().name("items").type(listType).build(); + + String serialized = UdfParameterParser.toJson(parameter); + UdfParameter deserialized = UdfParameterParser.fromJson(serialized); + + assertThat(deserialized.name()).isEqualTo("items"); + Map roundTrippedType = (Map) deserialized.type(); + assertThat(roundTrippedType).containsEntry("type", "list"); + assertThat(roundTrippedType).containsEntry("element", "string"); + } + + @Test + public void testNullParameter() { + assertThatThrownBy(() -> UdfParameterParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid UDF parameter: null"); + + assertThatThrownBy(() -> UdfParameterParser.fromJson((JsonNode) null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse UDF parameter from null object"); + } + + @Test + public void testMissingRequiredFields() { + String missingName = "{\"type\":\"int\"}"; + assertThatThrownBy(() -> UdfParameterParser.fromJson(missingName)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: name"); + + String missingType = "{\"name\":\"x\"}"; + assertThatThrownBy(() -> UdfParameterParser.fromJson(missingType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot read type from null node"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java new file mode 100644 index 000000000000..aba831a1c753 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.junit.jupiter.api.Test; + +public class TestUdfRepresentationParser { + + @Test + public void testParseUnknownRepresentation() { + String json = "{\"type\":\"python\"}"; + UdfRepresentation unknownRepresentation = UdfRepresentationParser.fromJson(json); + assertThat(unknownRepresentation) + .isEqualTo(ImmutableUnknownUdfRepresentation.builder().type("python").build()); + + assertThatThrownBy(() -> UdfRepresentationParser.toJson(unknownRepresentation)) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Cannot serialize unsupported UDF representation: python"); + } + + @Test + public void testNullRepresentation() { + assertThatThrownBy(() -> UdfRepresentationParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid UDF representation: null"); + } + + @Test + public void testMissingType() { + assertThatThrownBy(() -> UdfRepresentationParser.fromJson("{\"sql\":\"x + 1\"}")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: type"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java new file mode 100644 index 000000000000..322ff984476c --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.fasterxml.jackson.databind.JsonNode; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.JsonUtil; +import org.junit.jupiter.api.Test; + +public class TestUdfTypeUtil { + + @Test + public void testReadPrimitiveType() { + JsonNode node = JsonUtil.mapper().valueToTree("int"); + Object type = UdfTypeUtil.readType(node); + assertThat(type).isInstanceOf(String.class); + assertThat(type).isEqualTo("int"); + } + + @Test + public void testReadDecimalType() { + JsonNode node = JsonUtil.mapper().valueToTree("decimal(9,2)"); + Object type = UdfTypeUtil.readType(node); + assertThat(type).isEqualTo("decimal(9,2)"); + } + + @Test + public void testReadVariantType() { + JsonNode node = JsonUtil.mapper().valueToTree("variant"); + Object type = UdfTypeUtil.readType(node); + assertThat(type).isEqualTo("variant"); + } + + @Test + @SuppressWarnings("unchecked") + public void testReadListType() { + Map listType = ImmutableMap.of("type", "list", "element", "string"); + JsonNode node = JsonUtil.mapper().valueToTree(listType); + Object type = UdfTypeUtil.readType(node); + assertThat(type).isInstanceOf(Map.class); + + Map typeMap = (Map) type; + assertThat(typeMap).containsEntry("type", "list"); + assertThat(typeMap).containsEntry("element", "string"); + } + + @Test + @SuppressWarnings("unchecked") + public void testReadMapType() { + Map mapType = ImmutableMap.of("type", "map", "key", "string", "value", "int"); + JsonNode node = JsonUtil.mapper().valueToTree(mapType); + Object type = UdfTypeUtil.readType(node); + + Map typeMap = (Map) type; + assertThat(typeMap).containsEntry("type", "map"); + assertThat(typeMap).containsEntry("key", "string"); + assertThat(typeMap).containsEntry("value", "int"); + } + + @Test + @SuppressWarnings("unchecked") + public void testReadStructType() { + String structJson = + "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"name\",\"type\":\"string\"}]}"; + JsonNode node = JsonUtil.parse(structJson, n -> n); + Object type = UdfTypeUtil.readType(node); + assertThat(type).isInstanceOf(Map.class); + + Map typeMap = (Map) type; + assertThat(typeMap).containsEntry("type", "struct"); + assertThat(typeMap).containsKey("fields"); + } + + @Test + public void testReadNullNode() { + assertThatThrownBy(() -> UdfTypeUtil.readType(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot read type from null node"); + } + + @Test + public void testReadArrayNode() { + JsonNode node = JsonUtil.mapper().valueToTree(new int[] {1, 2, 3}); + assertThatThrownBy(() -> UdfTypeUtil.readType(node)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Cannot parse UDF type from node:"); + } + + @Test + public void testWritePrimitiveType() { + String json = + JsonUtil.generate( + gen -> { + gen.writeStartObject(); + UdfTypeUtil.writeType("return-type", "int", gen); + gen.writeEndObject(); + }, + false); + + assertThat(json).isEqualTo("{\"return-type\":\"int\"}"); + } + + @Test + public void testWriteNestedType() { + Map listType = ImmutableMap.of("type", "list", "element", "string"); + String json = + JsonUtil.generate( + gen -> { + gen.writeStartObject(); + UdfTypeUtil.writeType("return-type", listType, gen); + gen.writeEndObject(); + }, + false); + + assertThat(json).contains("\"return-type\""); + assertThat(json).contains("\"type\":\"list\""); + assertThat(json).contains("\"element\":\"string\""); + } + + @Test + public void testWriteNullType() { + assertThatThrownBy( + () -> + JsonUtil.generate( + gen -> { + gen.writeStartObject(); + UdfTypeUtil.writeType("type", null, gen); + gen.writeEndObject(); + }, + false)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid type: null"); + } + + @Test + public void testWriteUnsupportedType() { + assertThatThrownBy( + () -> + JsonUtil.generate( + gen -> { + gen.writeStartObject(); + UdfTypeUtil.writeType("type", 42, gen); + gen.writeEndObject(); + }, + false)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Cannot serialize UDF type:"); + } + + @Test + public void testWriteTypeValue() { + String json = + JsonUtil.generate( + gen -> { + gen.writeStartArray(); + UdfTypeUtil.writeTypeValue("int", gen); + gen.writeEndArray(); + }, + false); + + assertThat(json).isEqualTo("[\"int\"]"); + } + + @Test + public void testWriteNestedTypeValue() { + Map listType = ImmutableMap.of("type", "list", "element", "string"); + String json = + JsonUtil.generate( + gen -> { + gen.writeStartArray(); + UdfTypeUtil.writeTypeValue(listType, gen); + gen.writeEndArray(); + }, + false); + + assertThat(json).contains("\"type\":\"list\""); + assertThat(json).contains("\"element\":\"string\""); + } +} From 5afb02f67200b8e44249afbba821dd4215c90dd4 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Mon, 20 Apr 2026 16:13:53 -0700 Subject: [PATCH 2/6] Add UdfType --- .../org/apache/iceberg/udf/UdfParameter.java | 7 +- .../java/org/apache/iceberg/udf/UdfType.java | 104 ++++++++++++++++++ .../org/apache/iceberg/udf/UdfTypeUtil.java | 49 +++------ .../iceberg/udf/UnknownUdfRepresentation.java | 2 +- .../iceberg/udf/TestUdfParameterParser.java | 39 ++++--- .../apache/iceberg/udf/TestUdfTypeUtil.java | 59 ++++------ 6 files changed, 170 insertions(+), 90 deletions(-) create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfType.java diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java b/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java index 2e08e92447f7..15e2b112cc81 100644 --- a/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java +++ b/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java @@ -26,11 +26,8 @@ public interface UdfParameter { /** The parameter name. */ String name(); - /** - * The parameter data type, encoded as a type string for primitives/semi-structured types or as a - * JSON object for nested types (struct, list, map). - */ - Object type(); + /** The parameter data type. */ + UdfType type(); /** Optional documentation string. */ @Nullable diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfType.java b/api/src/main/java/org/apache/iceberg/udf/UdfType.java new file mode 100644 index 000000000000..2b2cf90c82c1 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfType.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import java.util.Map; +import java.util.Objects; + +/** + * Represents a UDF data type as defined in the UDF spec. UDF types are based on Iceberg types but + * intentionally omit field IDs and element nullability. Primitive and semi-structured types (e.g., + * "int", "string", "decimal(9,2)", "variant") are represented as type strings. Nested types + * (struct, list, map) are represented as structured JSON objects. + */ +public class UdfType { + private final String primitiveType; + private final Map nestedType; + + private UdfType(String primitiveType, Map nestedType) { + this.primitiveType = primitiveType; + this.nestedType = nestedType; + } + + /** Creates a UdfType for a primitive or semi-structured type (e.g., "int", "decimal(9,2)"). */ + public static UdfType primitive(String type) { + if (type == null) { + throw new IllegalArgumentException("Primitive type string must not be null"); + } + + return new UdfType(type, null); + } + + /** Creates a UdfType for a nested type (struct, list, or map). */ + public static UdfType nested(Map type) { + if (type == null) { + throw new IllegalArgumentException("Nested type map must not be null"); + } + + return new UdfType(null, type); + } + + /** Returns true if this is a primitive or semi-structured type. */ + public boolean isPrimitive() { + return primitiveType != null; + } + + /** Returns the primitive type string, or throws if this is a nested type. */ + public String asPrimitive() { + if (primitiveType == null) { + throw new IllegalStateException("Not a primitive type: " + nestedType); + } + + return primitiveType; + } + + /** Returns the nested type structure, or throws if this is a primitive type. */ + public Map asNested() { + if (nestedType == null) { + throw new IllegalStateException("Not a nested type: " + primitiveType); + } + + return nestedType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof UdfType)) { + return false; + } + + UdfType that = (UdfType) o; + return Objects.equals(primitiveType, that.primitiveType) + && Objects.equals(nestedType, that.nestedType); + } + + @Override + public int hashCode() { + return Objects.hash(primitiveType, nestedType); + } + + @Override + public String toString() { + return isPrimitive() ? primitiveType : nestedType.toString(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java index ef0ccd770fb1..9de37668e32c 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java +++ b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; +import java.util.Map; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.util.JsonUtil; @@ -33,41 +34,32 @@ class UdfTypeUtil { private UdfTypeUtil() {} - /** - * Reads a UDF type from a JSON node. Returns a String for primitive types or a Map for nested - * types. - */ - static Object readType(JsonNode node) { + /** Reads a UDF type from a JSON node. */ + static UdfType readType(JsonNode node) { Preconditions.checkArgument(node != null, "Cannot read type from null node"); if (node.isTextual()) { - return node.asText(); + return UdfType.primitive(node.asText()); } else if (node.isObject()) { - return JsonUtil.mapper().convertValue(node, java.util.Map.class); + Map nested = JsonUtil.mapper().convertValue(node, Map.class); + return UdfType.nested(nested); } else { throw new IllegalArgumentException( String.format("Cannot parse UDF type from node: %s", node)); } } - /** - * Writes a UDF type to a JSON generator under the given field name. The type can be a String - * (primitive) or a Map (nested type). - */ - @SuppressWarnings("unchecked") - static void writeType(String fieldName, Object type, JsonGenerator generator) throws IOException { + /** Writes a UDF type to a JSON generator under the given field name. */ + static void writeType(String fieldName, UdfType type, JsonGenerator generator) + throws IOException { Preconditions.checkArgument(type != null, "Invalid type: null"); - if (type instanceof String) { - generator.writeStringField(fieldName, (String) type); - } else if (type instanceof java.util.Map) { + if (type.isPrimitive()) { + generator.writeStringField(fieldName, type.asPrimitive()); + } else { generator.writeFieldName(fieldName); - ObjectNode objectNode = - JsonUtil.mapper().convertValue((java.util.Map) type, ObjectNode.class); + ObjectNode objectNode = JsonUtil.mapper().convertValue(type.asNested(), ObjectNode.class); generator.writeTree(objectNode); - } else { - throw new IllegalArgumentException( - String.format("Cannot serialize UDF type: %s (%s)", type, type.getClass().getName())); } } @@ -75,19 +67,14 @@ static void writeType(String fieldName, Object type, JsonGenerator generator) th * Writes a UDF type value (without a field name) to a JSON generator. Used when writing array * elements. */ - @SuppressWarnings("unchecked") - static void writeTypeValue(Object type, JsonGenerator generator) throws IOException { + static void writeTypeValue(UdfType type, JsonGenerator generator) throws IOException { Preconditions.checkArgument(type != null, "Invalid type: null"); - if (type instanceof String) { - generator.writeString((String) type); - } else if (type instanceof java.util.Map) { - ObjectNode objectNode = - JsonUtil.mapper().convertValue((java.util.Map) type, ObjectNode.class); - generator.writeTree(objectNode); + if (type.isPrimitive()) { + generator.writeString(type.asPrimitive()); } else { - throw new IllegalArgumentException( - String.format("Cannot serialize UDF type: %s (%s)", type, type.getClass().getName())); + ObjectNode objectNode = JsonUtil.mapper().convertValue(type.asNested(), ObjectNode.class); + generator.writeTree(objectNode); } } } diff --git a/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java b/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java index b6031ab034d6..78b62ecfecc1 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java +++ b/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java @@ -21,4 +21,4 @@ import org.immutables.value.Value; @Value.Immutable -public interface UnknownUdfRepresentation extends UdfRepresentation {} +interface UnknownUdfRepresentation extends UdfRepresentation {} diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java index 64f971768b48..7d16c9c6cc21 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java @@ -31,11 +31,12 @@ public class TestUdfParameterParser { @Test public void testParsePrimitiveTypeParameter() { String json = "{\"name\":\"x\",\"type\":\"int\"}"; - UdfParameter parameter = ImmutableUdfParameter.builder().name("x").type("int").build(); + UdfParameter parameter = + ImmutableUdfParameter.builder().name("x").type(UdfType.primitive("int")).build(); UdfParameter parsed = UdfParameterParser.fromJson(json); assertThat(parsed.name()).isEqualTo("x"); - assertThat(parsed.type()).isEqualTo("int"); + assertThat(parsed.type()).isEqualTo(UdfType.primitive("int")); assertThat(parsed.doc()).isNull(); assertThat(parsed).isEqualTo(parameter); } @@ -44,11 +45,15 @@ public void testParsePrimitiveTypeParameter() { public void testParseParameterWithDoc() { String json = "{\"name\":\"x\",\"type\":\"int\",\"doc\":\"Input integer\"}"; UdfParameter parameter = - ImmutableUdfParameter.builder().name("x").type("int").doc("Input integer").build(); + ImmutableUdfParameter.builder() + .name("x") + .type(UdfType.primitive("int")) + .doc("Input integer") + .build(); UdfParameter parsed = UdfParameterParser.fromJson(json); assertThat(parsed.name()).isEqualTo("x"); - assertThat(parsed.type()).isEqualTo("int"); + assertThat(parsed.type()).isEqualTo(UdfType.primitive("int")); assertThat(parsed.doc()).isEqualTo("Input integer"); assertThat(parsed).isEqualTo(parameter); } @@ -58,7 +63,7 @@ public void testParseDecimalTypeParameter() { String json = "{\"name\":\"amount\",\"type\":\"decimal(9,2)\"}"; UdfParameter parsed = UdfParameterParser.fromJson(json); assertThat(parsed.name()).isEqualTo("amount"); - assertThat(parsed.type()).isEqualTo("decimal(9,2)"); + assertThat(parsed.type()).isEqualTo(UdfType.primitive("decimal(9,2)")); } @Test @@ -66,32 +71,30 @@ public void testParseVariantTypeParameter() { String json = "{\"name\":\"data\",\"type\":\"variant\"}"; UdfParameter parsed = UdfParameterParser.fromJson(json); assertThat(parsed.name()).isEqualTo("data"); - assertThat(parsed.type()).isEqualTo("variant"); + assertThat(parsed.type()).isEqualTo(UdfType.primitive("variant")); } @Test - @SuppressWarnings("unchecked") public void testParseListTypeParameter() { String json = "{\"name\":\"items\",\"type\":{\"type\":\"list\",\"element\":\"string\"}}"; UdfParameter parsed = UdfParameterParser.fromJson(json); assertThat(parsed.name()).isEqualTo("items"); - assertThat(parsed.type()).isInstanceOf(Map.class); + assertThat(parsed.type().isPrimitive()).isFalse(); - Map typeMap = (Map) parsed.type(); + Map typeMap = parsed.type().asNested(); assertThat(typeMap).containsEntry("type", "list"); assertThat(typeMap).containsEntry("element", "string"); } @Test - @SuppressWarnings("unchecked") public void testParseMapTypeParameter() { String json = "{\"name\":\"lookup\",\"type\":{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"; UdfParameter parsed = UdfParameterParser.fromJson(json); assertThat(parsed.name()).isEqualTo("lookup"); - assertThat(parsed.type()).isInstanceOf(Map.class); + assertThat(parsed.type().isPrimitive()).isFalse(); - Map typeMap = (Map) parsed.type(); + Map typeMap = parsed.type().asNested(); assertThat(typeMap).containsEntry("type", "map"); assertThat(typeMap).containsEntry("key", "string"); assertThat(typeMap).containsEntry("value", "int"); @@ -100,7 +103,11 @@ public void testParseMapTypeParameter() { @Test public void testRoundTripPrimitiveType() { UdfParameter parameter = - ImmutableUdfParameter.builder().name("x").type("int").doc("Input integer").build(); + ImmutableUdfParameter.builder() + .name("x") + .type(UdfType.primitive("int")) + .doc("Input integer") + .build(); String serialized = UdfParameterParser.toJson(parameter); UdfParameter deserialized = UdfParameterParser.fromJson(serialized); @@ -109,16 +116,16 @@ public void testRoundTripPrimitiveType() { } @Test - @SuppressWarnings("unchecked") public void testRoundTripNestedType() { Map listType = ImmutableMap.of("type", "list", "element", "string"); - UdfParameter parameter = ImmutableUdfParameter.builder().name("items").type(listType).build(); + UdfParameter parameter = + ImmutableUdfParameter.builder().name("items").type(UdfType.nested(listType)).build(); String serialized = UdfParameterParser.toJson(parameter); UdfParameter deserialized = UdfParameterParser.fromJson(serialized); assertThat(deserialized.name()).isEqualTo("items"); - Map roundTrippedType = (Map) deserialized.type(); + Map roundTrippedType = deserialized.type().asNested(); assertThat(roundTrippedType).containsEntry("type", "list"); assertThat(roundTrippedType).containsEntry("element", "string"); } diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java index 322ff984476c..81bfad065738 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java @@ -32,61 +32,61 @@ public class TestUdfTypeUtil { @Test public void testReadPrimitiveType() { JsonNode node = JsonUtil.mapper().valueToTree("int"); - Object type = UdfTypeUtil.readType(node); - assertThat(type).isInstanceOf(String.class); - assertThat(type).isEqualTo("int"); + UdfType type = UdfTypeUtil.readType(node); + assertThat(type.isPrimitive()).isTrue(); + assertThat(type.asPrimitive()).isEqualTo("int"); } @Test public void testReadDecimalType() { JsonNode node = JsonUtil.mapper().valueToTree("decimal(9,2)"); - Object type = UdfTypeUtil.readType(node); - assertThat(type).isEqualTo("decimal(9,2)"); + UdfType type = UdfTypeUtil.readType(node); + assertThat(type.isPrimitive()).isTrue(); + assertThat(type.asPrimitive()).isEqualTo("decimal(9,2)"); } @Test public void testReadVariantType() { JsonNode node = JsonUtil.mapper().valueToTree("variant"); - Object type = UdfTypeUtil.readType(node); - assertThat(type).isEqualTo("variant"); + UdfType type = UdfTypeUtil.readType(node); + assertThat(type.isPrimitive()).isTrue(); + assertThat(type.asPrimitive()).isEqualTo("variant"); } @Test - @SuppressWarnings("unchecked") public void testReadListType() { Map listType = ImmutableMap.of("type", "list", "element", "string"); JsonNode node = JsonUtil.mapper().valueToTree(listType); - Object type = UdfTypeUtil.readType(node); - assertThat(type).isInstanceOf(Map.class); + UdfType type = UdfTypeUtil.readType(node); + assertThat(type.isPrimitive()).isFalse(); - Map typeMap = (Map) type; + Map typeMap = type.asNested(); assertThat(typeMap).containsEntry("type", "list"); assertThat(typeMap).containsEntry("element", "string"); } @Test - @SuppressWarnings("unchecked") public void testReadMapType() { Map mapType = ImmutableMap.of("type", "map", "key", "string", "value", "int"); JsonNode node = JsonUtil.mapper().valueToTree(mapType); - Object type = UdfTypeUtil.readType(node); + UdfType type = UdfTypeUtil.readType(node); + assertThat(type.isPrimitive()).isFalse(); - Map typeMap = (Map) type; + Map typeMap = type.asNested(); assertThat(typeMap).containsEntry("type", "map"); assertThat(typeMap).containsEntry("key", "string"); assertThat(typeMap).containsEntry("value", "int"); } @Test - @SuppressWarnings("unchecked") public void testReadStructType() { String structJson = "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"name\",\"type\":\"string\"}]}"; JsonNode node = JsonUtil.parse(structJson, n -> n); - Object type = UdfTypeUtil.readType(node); - assertThat(type).isInstanceOf(Map.class); + UdfType type = UdfTypeUtil.readType(node); + assertThat(type.isPrimitive()).isFalse(); - Map typeMap = (Map) type; + Map typeMap = type.asNested(); assertThat(typeMap).containsEntry("type", "struct"); assertThat(typeMap).containsKey("fields"); } @@ -112,7 +112,7 @@ public void testWritePrimitiveType() { JsonUtil.generate( gen -> { gen.writeStartObject(); - UdfTypeUtil.writeType("return-type", "int", gen); + UdfTypeUtil.writeType("return-type", UdfType.primitive("int"), gen); gen.writeEndObject(); }, false); @@ -127,7 +127,7 @@ public void testWriteNestedType() { JsonUtil.generate( gen -> { gen.writeStartObject(); - UdfTypeUtil.writeType("return-type", listType, gen); + UdfTypeUtil.writeType("return-type", UdfType.nested(listType), gen); gen.writeEndObject(); }, false); @@ -152,28 +152,13 @@ public void testWriteNullType() { .hasMessage("Invalid type: null"); } - @Test - public void testWriteUnsupportedType() { - assertThatThrownBy( - () -> - JsonUtil.generate( - gen -> { - gen.writeStartObject(); - UdfTypeUtil.writeType("type", 42, gen); - gen.writeEndObject(); - }, - false)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageStartingWith("Cannot serialize UDF type:"); - } - @Test public void testWriteTypeValue() { String json = JsonUtil.generate( gen -> { gen.writeStartArray(); - UdfTypeUtil.writeTypeValue("int", gen); + UdfTypeUtil.writeTypeValue(UdfType.primitive("int"), gen); gen.writeEndArray(); }, false); @@ -188,7 +173,7 @@ public void testWriteNestedTypeValue() { JsonUtil.generate( gen -> { gen.writeStartArray(); - UdfTypeUtil.writeTypeValue(listType, gen); + UdfTypeUtil.writeTypeValue(UdfType.nested(listType), gen); gen.writeEndArray(); }, false); From b96a2fe9a857ad1e05beb8674d4bf8da283af198 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Fri, 1 May 2026 20:13:50 -0700 Subject: [PATCH 3/6] address comments --- .../org/apache/iceberg/udf/UdfFieldType.java | 72 ++++++++ .../org/apache/iceberg/udf/UdfListType.java | 74 ++++++++ .../org/apache/iceberg/udf/UdfMapType.java | 82 +++++++++ .../org/apache/iceberg/udf/UdfParameter.java | 2 +- .../apache/iceberg/udf/UdfPrimitiveType.java | 78 ++++++++ .../apache/iceberg/udf/UdfRepresentation.java | 9 +- .../org/apache/iceberg/udf/UdfStructType.java | 88 +++++++++ .../java/org/apache/iceberg/udf/UdfType.java | 87 +++------ .../iceberg/udf/UdfRepresentationParser.java | 8 +- .../org/apache/iceberg/udf/UdfTypeUtil.java | 109 ++++++++--- .../iceberg/udf/UnknownUdfRepresentation.java | 4 + .../udf/TestSQLUdfRepresentationParser.java | 12 +- .../iceberg/udf/TestUdfParameterParser.java | 163 +++++++++++------ .../udf/TestUdfRepresentationParser.java | 8 +- .../apache/iceberg/udf/TestUdfTypeUtil.java | 171 +++++++++++------- 15 files changed, 739 insertions(+), 228 deletions(-) create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfListType.java create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfMapType.java create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfStructType.java diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java b/api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java new file mode 100644 index 000000000000..4bad7671ecce --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import java.util.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** A field within a {@link UdfStructType}, with a name and a type. */ +public final class UdfFieldType { + + private final String name; + private final UdfType type; + + public static UdfFieldType of(String name, UdfType type) { + Preconditions.checkArgument(name != null, "Invalid field name: null"); + Preconditions.checkArgument(type != null, "Invalid field type: null"); + return new UdfFieldType(name, type); + } + + private UdfFieldType(String name, UdfType type) { + this.name = name; + this.type = type; + } + + public String name() { + return name; + } + + public UdfType type() { + return type; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof UdfFieldType)) { + return false; + } + + UdfFieldType that = (UdfFieldType) o; + return Objects.equals(name, that.name) && Objects.equals(type, that.type); + } + + @Override + public int hashCode() { + return Objects.hash(name, type); + } + + @Override + public String toString() { + return String.format("%s:%s", name, type); + } +} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfListType.java b/api/src/main/java/org/apache/iceberg/udf/UdfListType.java new file mode 100644 index 000000000000..b6d7c06346c8 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfListType.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import java.util.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** A UDF list type with an element type. */ +public final class UdfListType implements UdfType { + + private final UdfType elementType; + + public static UdfListType of(UdfType elementType) { + Preconditions.checkArgument(elementType != null, "Invalid element type: null"); + return new UdfListType(elementType); + } + + private UdfListType(UdfType elementType) { + this.elementType = elementType; + } + + @Override + public TypeId typeId() { + return TypeId.LIST; + } + + @Override + public UdfListType asListType() { + return this; + } + + public UdfType elementType() { + return elementType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof UdfListType)) { + return false; + } + + return Objects.equals(elementType, ((UdfListType) o).elementType); + } + + @Override + public int hashCode() { + return Objects.hash(UdfListType.class, elementType); + } + + @Override + public String toString() { + return String.format("list<%s>", elementType); + } +} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfMapType.java b/api/src/main/java/org/apache/iceberg/udf/UdfMapType.java new file mode 100644 index 000000000000..dc0b6d07188d --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfMapType.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import java.util.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** A UDF map type with key and value types. */ +public final class UdfMapType implements UdfType { + + private final UdfType keyType; + private final UdfType valueType; + + public static UdfMapType of(UdfType keyType, UdfType valueType) { + Preconditions.checkArgument(keyType != null, "Invalid key type: null"); + Preconditions.checkArgument(valueType != null, "Invalid value type: null"); + return new UdfMapType(keyType, valueType); + } + + private UdfMapType(UdfType keyType, UdfType valueType) { + this.keyType = keyType; + this.valueType = valueType; + } + + @Override + public TypeId typeId() { + return TypeId.MAP; + } + + @Override + public UdfMapType asMapType() { + return this; + } + + public UdfType keyType() { + return keyType; + } + + public UdfType valueType() { + return valueType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof UdfMapType)) { + return false; + } + + UdfMapType that = (UdfMapType) o; + return Objects.equals(keyType, that.keyType) && Objects.equals(valueType, that.valueType); + } + + @Override + public int hashCode() { + return Objects.hash(UdfMapType.class, keyType, valueType); + } + + @Override + public String toString() { + return String.format("map<%s,%s>", keyType, valueType); + } +} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java b/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java index 15e2b112cc81..f79ab59a345a 100644 --- a/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java +++ b/api/src/main/java/org/apache/iceberg/udf/UdfParameter.java @@ -29,7 +29,7 @@ public interface UdfParameter { /** The parameter data type. */ UdfType type(); - /** Optional documentation string. */ + /** A documentation string for the parameter. */ @Nullable String doc(); } diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java b/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java new file mode 100644 index 000000000000..7ff73b46af3d --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import java.util.Objects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +/** + * A UDF primitive or semi-structured type, encoded as a type string (e.g., {@code int}, {@code + * string}, {@code decimal(9,2)}, {@code variant}). + */ +public final class UdfPrimitiveType implements UdfType { + + private final String typeString; + + public static UdfPrimitiveType of(String typeString) { + Preconditions.checkArgument(typeString != null, "Invalid primitive type: null"); + return new UdfPrimitiveType(typeString); + } + + private UdfPrimitiveType(String typeString) { + this.typeString = typeString; + } + + @Override + public TypeId typeId() { + return TypeId.PRIMITIVE; + } + + @Override + public UdfPrimitiveType asPrimitive() { + return this; + } + + /** The primitive or semi-structured type string. */ + public String typeString() { + return typeString; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof UdfPrimitiveType)) { + return false; + } + + return Objects.equals(typeString, ((UdfPrimitiveType) o).typeString); + } + + @Override + public int hashCode() { + return Objects.hashCode(typeString); + } + + @Override + public String toString() { + return typeString; + } +} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java b/api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java index d1884639604a..e58210f5f205 100644 --- a/api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java +++ b/api/src/main/java/org/apache/iceberg/udf/UdfRepresentation.java @@ -18,14 +18,21 @@ */ package org.apache.iceberg.udf; -/** A representation of a UDF implementation. */ +/** + * Describes how a UDF's logic is expressed, for example as a SQL body with a specific dialect. A + * UDF definition version may carry one or more representations so that engines can pick a form they + * understand. + */ public interface UdfRepresentation { + /** Standard representation type names used in UDF metadata. */ class Type { private Type() {} + /** A SQL body representation, see {@link SQLUdfRepresentation}. */ public static final String SQL = "sql"; } + /** Returns the representation type, e.g., {@link Type#SQL}. */ String type(); } diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java b/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java new file mode 100644 index 000000000000..4eff9a05a01a --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; + +/** + * A UDF struct type with an ordered list of named fields. Unlike Iceberg struct types, UDF struct + * fields do not have field IDs. + */ +public final class UdfStructType implements UdfType { + + private final List fields; + + public static UdfStructType of(UdfFieldType... fields) { + Preconditions.checkArgument(fields != null, "Invalid fields: null"); + return of(Arrays.asList(fields)); + } + + public static UdfStructType of(List fields) { + Preconditions.checkArgument(fields != null, "Invalid fields: null"); + return new UdfStructType(ImmutableList.copyOf(fields)); + } + + private UdfStructType(List fields) { + this.fields = fields; + } + + @Override + public TypeId typeId() { + return TypeId.STRUCT; + } + + @Override + public UdfStructType asStructType() { + return this; + } + + public List fields() { + return fields; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof UdfStructType)) { + return false; + } + + return Objects.equals(fields, ((UdfStructType) o).fields); + } + + @Override + public int hashCode() { + return Objects.hash(UdfStructType.class, fields); + } + + @Override + public String toString() { + return fields.stream() + .map(UdfFieldType::toString) + .collect(Collectors.joining(",", "struct<", ">")); + } +} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfType.java b/api/src/main/java/org/apache/iceberg/udf/UdfType.java index 2b2cf90c82c1..4a6d70954381 100644 --- a/api/src/main/java/org/apache/iceberg/udf/UdfType.java +++ b/api/src/main/java/org/apache/iceberg/udf/UdfType.java @@ -18,87 +18,52 @@ */ package org.apache.iceberg.udf; -import java.util.Map; -import java.util.Objects; - /** * Represents a UDF data type as defined in the UDF spec. UDF types are based on Iceberg types but - * intentionally omit field IDs and element nullability. Primitive and semi-structured types (e.g., - * "int", "string", "decimal(9,2)", "variant") are represented as type strings. Nested types - * (struct, list, map) are represented as structured JSON objects. + * intentionally omit field IDs and element nullability. Implementations include {@link + * UdfPrimitiveType} for primitive and semi-structured types, and the nested types {@link + * UdfListType}, {@link UdfMapType}, and {@link UdfStructType}. */ -public class UdfType { - private final String primitiveType; - private final Map nestedType; +public interface UdfType { - private UdfType(String primitiveType, Map nestedType) { - this.primitiveType = primitiveType; - this.nestedType = nestedType; + enum TypeId { + PRIMITIVE, + LIST, + MAP, + STRUCT } - /** Creates a UdfType for a primitive or semi-structured type (e.g., "int", "decimal(9,2)"). */ - public static UdfType primitive(String type) { - if (type == null) { - throw new IllegalArgumentException("Primitive type string must not be null"); - } + TypeId typeId(); - return new UdfType(type, null); + default boolean isPrimitive() { + return typeId() == TypeId.PRIMITIVE; } - /** Creates a UdfType for a nested type (struct, list, or map). */ - public static UdfType nested(Map type) { - if (type == null) { - throw new IllegalArgumentException("Nested type map must not be null"); - } - - return new UdfType(null, type); + default boolean isListType() { + return typeId() == TypeId.LIST; } - /** Returns true if this is a primitive or semi-structured type. */ - public boolean isPrimitive() { - return primitiveType != null; + default boolean isMapType() { + return typeId() == TypeId.MAP; } - /** Returns the primitive type string, or throws if this is a nested type. */ - public String asPrimitive() { - if (primitiveType == null) { - throw new IllegalStateException("Not a primitive type: " + nestedType); - } - - return primitiveType; + default boolean isStructType() { + return typeId() == TypeId.STRUCT; } - /** Returns the nested type structure, or throws if this is a primitive type. */ - public Map asNested() { - if (nestedType == null) { - throw new IllegalStateException("Not a nested type: " + primitiveType); - } - - return nestedType; + default UdfPrimitiveType asPrimitive() { + throw new IllegalArgumentException("Not a primitive type: " + this); } - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof UdfType)) { - return false; - } - - UdfType that = (UdfType) o; - return Objects.equals(primitiveType, that.primitiveType) - && Objects.equals(nestedType, that.nestedType); + default UdfListType asListType() { + throw new IllegalArgumentException("Not a list type: " + this); } - @Override - public int hashCode() { - return Objects.hash(primitiveType, nestedType); + default UdfMapType asMapType() { + throw new IllegalArgumentException("Not a map type: " + this); } - @Override - public String toString() { - return isPrimitive() ? primitiveType : nestedType.toString(); + default UdfStructType asStructType() { + throw new IllegalArgumentException("Not a struct type: " + this); } } diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java b/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java index fab2d800a873..4128d64e16d8 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java +++ b/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java @@ -32,7 +32,7 @@ private UdfRepresentationParser() {} static void toJson(UdfRepresentation representation, JsonGenerator generator) throws IOException { Preconditions.checkArgument(representation != null, "Invalid UDF representation: null"); - switch (representation.type().toLowerCase(Locale.ENGLISH)) { + switch (representation.type().toLowerCase(Locale.ROOT)) { case UdfRepresentation.Type.SQL: SQLUdfRepresentationParser.toJson((SQLUdfRepresentation) representation, generator); break; @@ -44,8 +44,8 @@ static void toJson(UdfRepresentation representation, JsonGenerator generator) th } } - static String toJson(UdfRepresentation entry) { - return JsonUtil.generate(gen -> toJson(entry, gen), false); + static String toJson(UdfRepresentation representation) { + return JsonUtil.generate(gen -> toJson(representation, gen), false); } static UdfRepresentation fromJson(String json) { @@ -56,7 +56,7 @@ static UdfRepresentation fromJson(JsonNode node) { Preconditions.checkArgument(node != null, "Cannot parse UDF representation from null object"); Preconditions.checkArgument( node.isObject(), "Cannot parse UDF representation from non-object: %s", node); - String type = JsonUtil.getString(TYPE, node).toLowerCase(Locale.ENGLISH); + String type = JsonUtil.getString(TYPE, node).toLowerCase(Locale.ROOT); switch (type) { case UdfRepresentation.Type.SQL: return SQLUdfRepresentationParser.fromJson(node); diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java index 9de37668e32c..b3595047e373 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java +++ b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java @@ -20,18 +20,28 @@ import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; -import java.util.Map; +import java.util.List; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.util.JsonUtil; /** - * Utility for reading and writing UDF types. Types can be either a primitive type string (e.g., - * "int", "string", "variant") or a JSON object for nested types (struct, list, map). + * Utility for reading and writing UDF types. A type is either a primitive type string (e.g., "int", + * "string", "variant") or a JSON object for nested types (struct, list, map). */ class UdfTypeUtil { + private static final String TYPE = "type"; + private static final String LIST = "list"; + private static final String MAP = "map"; + private static final String STRUCT = "struct"; + private static final String ELEMENT = "element"; + private static final String KEY = "key"; + private static final String VALUE = "value"; + private static final String FIELDS = "fields"; + private static final String NAME = "name"; + private UdfTypeUtil() {} /** Reads a UDF type from a JSON node. */ @@ -39,42 +49,87 @@ static UdfType readType(JsonNode node) { Preconditions.checkArgument(node != null, "Cannot read type from null node"); if (node.isTextual()) { - return UdfType.primitive(node.asText()); + return UdfPrimitiveType.of(node.asText()); } else if (node.isObject()) { - Map nested = JsonUtil.mapper().convertValue(node, Map.class); - return UdfType.nested(nested); + String typeName = JsonUtil.getString(TYPE, node); + switch (typeName) { + case LIST: + return UdfListType.of(readType(node.get(ELEMENT))); + case MAP: + return UdfMapType.of(readType(node.get(KEY)), readType(node.get(VALUE))); + case STRUCT: + return readStruct(node); + default: + throw new IllegalArgumentException( + String.format("Cannot parse UDF type from object with type: %s", typeName)); + } } else { throw new IllegalArgumentException( String.format("Cannot parse UDF type from node: %s", node)); } } - /** Writes a UDF type to a JSON generator under the given field name. */ - static void writeType(String fieldName, UdfType type, JsonGenerator generator) - throws IOException { - Preconditions.checkArgument(type != null, "Invalid type: null"); + private static UdfStructType readStruct(JsonNode node) { + JsonNode fieldsNode = node.get(FIELDS); + Preconditions.checkArgument( + fieldsNode != null && fieldsNode.isArray(), + "Cannot parse struct type from non-array fields: %s", + fieldsNode); - if (type.isPrimitive()) { - generator.writeStringField(fieldName, type.asPrimitive()); - } else { - generator.writeFieldName(fieldName); - ObjectNode objectNode = JsonUtil.mapper().convertValue(type.asNested(), ObjectNode.class); - generator.writeTree(objectNode); + ImmutableList.Builder fields = ImmutableList.builder(); + for (JsonNode fieldNode : fieldsNode) { + Preconditions.checkArgument( + fieldNode.isObject(), "Cannot parse struct field from non-object: %s", fieldNode); + fields.add( + UdfFieldType.of(JsonUtil.getString(NAME, fieldNode), readType(fieldNode.get(TYPE)))); } + + return UdfStructType.of(fields.build()); } - /** - * Writes a UDF type value (without a field name) to a JSON generator. Used when writing array - * elements. - */ - static void writeTypeValue(UdfType type, JsonGenerator generator) throws IOException { + /** Writes a UDF type to a JSON generator under the given field name. */ + static void writeType(String fieldName, UdfType type, JsonGenerator generator) + throws IOException { Preconditions.checkArgument(type != null, "Invalid type: null"); + generator.writeFieldName(fieldName); + writeTypeValue(type, generator); + } - if (type.isPrimitive()) { - generator.writeString(type.asPrimitive()); - } else { - ObjectNode objectNode = JsonUtil.mapper().convertValue(type.asNested(), ObjectNode.class); - generator.writeTree(objectNode); + private static void writeTypeValue(UdfType type, JsonGenerator generator) throws IOException { + switch (type.typeId()) { + case PRIMITIVE: + generator.writeString(type.asPrimitive().typeString()); + return; + case LIST: + generator.writeStartObject(); + generator.writeStringField(TYPE, LIST); + writeType(ELEMENT, type.asListType().elementType(), generator); + generator.writeEndObject(); + return; + case MAP: + UdfMapType mapType = type.asMapType(); + generator.writeStartObject(); + generator.writeStringField(TYPE, MAP); + writeType(KEY, mapType.keyType(), generator); + writeType(VALUE, mapType.valueType(), generator); + generator.writeEndObject(); + return; + case STRUCT: + List fields = type.asStructType().fields(); + generator.writeStartObject(); + generator.writeStringField(TYPE, STRUCT); + generator.writeArrayFieldStart(FIELDS); + for (UdfFieldType field : fields) { + generator.writeStartObject(); + generator.writeStringField(NAME, field.name()); + writeType(TYPE, field.type(), generator); + generator.writeEndObject(); + } + generator.writeEndArray(); + generator.writeEndObject(); + return; + default: + throw new IllegalArgumentException("Unknown UDF type: " + type); } } } diff --git a/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java b/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java index 78b62ecfecc1..f8d9fd28fe4d 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java +++ b/core/src/main/java/org/apache/iceberg/udf/UnknownUdfRepresentation.java @@ -21,4 +21,8 @@ import org.immutables.value.Value; @Value.Immutable +@Value.Style( + typeImmutable = "ImmutableUnknownUdfRepresentation", + visibilityString = "PACKAGE", + builderVisibilityString = "PACKAGE") interface UnknownUdfRepresentation extends UdfRepresentation {} diff --git a/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java index 1fc960c11d89..ebbc846063de 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java @@ -24,10 +24,10 @@ import com.fasterxml.jackson.databind.JsonNode; import org.junit.jupiter.api.Test; -public class TestSQLUdfRepresentationParser { +class TestSQLUdfRepresentationParser { @Test - public void testParseSqlUdfRepresentation() { + void parseSqlUdfRepresentation() { String json = "{\"type\":\"sql\", \"sql\": \"x + 1\", \"dialect\": \"spark\"}"; SQLUdfRepresentation representation = ImmutableSQLUdfRepresentation.builder().sql("x + 1").dialect("spark").build(); @@ -38,7 +38,7 @@ public void testParseSqlUdfRepresentation() { } @Test - public void testParseMissingRequiredFields() { + void parseMissingRequiredFields() { String missingDialect = "{\"type\":\"sql\", \"sql\": \"x + 1\"}"; assertThatThrownBy(() -> UdfRepresentationParser.fromJson(missingDialect)) .isInstanceOf(IllegalArgumentException.class) @@ -56,7 +56,7 @@ public void testParseMissingRequiredFields() { } @Test - public void testRoundTripSerialization() { + void roundTripSerialization() { String expectedJson = "{\"type\":\"sql\",\"sql\":\"x + 1\",\"dialect\":\"spark\"}"; SQLUdfRepresentation representation = ImmutableSQLUdfRepresentation.builder().sql("x + 1").dialect("spark").build(); @@ -70,7 +70,7 @@ public void testRoundTripSerialization() { } @Test - public void testRoundTripWithTrinoDialect() { + void roundTripWithTrinoDialect() { SQLUdfRepresentation representation = ImmutableSQLUdfRepresentation.builder().sql("x + 1.0").dialect("trino").build(); @@ -85,7 +85,7 @@ public void testRoundTripWithTrinoDialect() { } @Test - public void testNullSqlUdfRepresentation() { + void nullSqlUdfRepresentation() { assertThatThrownBy(() -> SQLUdfRepresentationParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid SQL UDF representation: null"); diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java index 7d16c9c6cc21..10a6f5f8dfb9 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java @@ -22,116 +22,165 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import com.fasterxml.jackson.databind.JsonNode; -import java.util.Map; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.junit.jupiter.api.Test; -public class TestUdfParameterParser { +class TestUdfParameterParser { @Test - public void testParsePrimitiveTypeParameter() { + void parsePrimitiveTypeParameter() { String json = "{\"name\":\"x\",\"type\":\"int\"}"; - UdfParameter parameter = - ImmutableUdfParameter.builder().name("x").type(UdfType.primitive("int")).build(); + UdfParameter expected = + ImmutableUdfParameter.builder().name("x").type(UdfPrimitiveType.of("int")).build(); - UdfParameter parsed = UdfParameterParser.fromJson(json); - assertThat(parsed.name()).isEqualTo("x"); - assertThat(parsed.type()).isEqualTo(UdfType.primitive("int")); - assertThat(parsed.doc()).isNull(); - assertThat(parsed).isEqualTo(parameter); + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } @Test - public void testParseParameterWithDoc() { + void parseParameterWithDoc() { String json = "{\"name\":\"x\",\"type\":\"int\",\"doc\":\"Input integer\"}"; - UdfParameter parameter = + UdfParameter expected = ImmutableUdfParameter.builder() .name("x") - .type(UdfType.primitive("int")) + .type(UdfPrimitiveType.of("int")) .doc("Input integer") .build(); - UdfParameter parsed = UdfParameterParser.fromJson(json); - assertThat(parsed.name()).isEqualTo("x"); - assertThat(parsed.type()).isEqualTo(UdfType.primitive("int")); - assertThat(parsed.doc()).isEqualTo("Input integer"); - assertThat(parsed).isEqualTo(parameter); + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } @Test - public void testParseDecimalTypeParameter() { + void parseDecimalTypeParameter() { String json = "{\"name\":\"amount\",\"type\":\"decimal(9,2)\"}"; - UdfParameter parsed = UdfParameterParser.fromJson(json); - assertThat(parsed.name()).isEqualTo("amount"); - assertThat(parsed.type()).isEqualTo(UdfType.primitive("decimal(9,2)")); + UdfParameter expected = + ImmutableUdfParameter.builder() + .name("amount") + .type(UdfPrimitiveType.of("decimal(9,2)")) + .build(); + + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } @Test - public void testParseVariantTypeParameter() { + void parseVariantTypeParameter() { String json = "{\"name\":\"data\",\"type\":\"variant\"}"; - UdfParameter parsed = UdfParameterParser.fromJson(json); - assertThat(parsed.name()).isEqualTo("data"); - assertThat(parsed.type()).isEqualTo(UdfType.primitive("variant")); + UdfParameter expected = + ImmutableUdfParameter.builder().name("data").type(UdfPrimitiveType.of("variant")).build(); + + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } @Test - public void testParseListTypeParameter() { + void parseListTypeParameter() { String json = "{\"name\":\"items\",\"type\":{\"type\":\"list\",\"element\":\"string\"}}"; - UdfParameter parsed = UdfParameterParser.fromJson(json); - assertThat(parsed.name()).isEqualTo("items"); - assertThat(parsed.type().isPrimitive()).isFalse(); + UdfParameter expected = + ImmutableUdfParameter.builder() + .name("items") + .type(UdfListType.of(UdfPrimitiveType.of("string"))) + .build(); - Map typeMap = parsed.type().asNested(); - assertThat(typeMap).containsEntry("type", "list"); - assertThat(typeMap).containsEntry("element", "string"); + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } @Test - public void testParseMapTypeParameter() { + void parseMapTypeParameter() { String json = "{\"name\":\"lookup\",\"type\":{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"; - UdfParameter parsed = UdfParameterParser.fromJson(json); - assertThat(parsed.name()).isEqualTo("lookup"); - assertThat(parsed.type().isPrimitive()).isFalse(); - - Map typeMap = parsed.type().asNested(); - assertThat(typeMap).containsEntry("type", "map"); - assertThat(typeMap).containsEntry("key", "string"); - assertThat(typeMap).containsEntry("value", "int"); + UdfParameter expected = + ImmutableUdfParameter.builder() + .name("lookup") + .type(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))) + .build(); + + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } @Test - public void testRoundTripPrimitiveType() { + void parseStructTypeParameter() { + String json = + "{\"name\":\"row\",\"type\":{\"type\":\"struct\",\"fields\":[" + + "{\"name\":\"id\",\"type\":\"int\"}," + + "{\"name\":\"label\",\"type\":\"string\"}]}}"; + UdfParameter expected = + ImmutableUdfParameter.builder() + .name("row") + .type( + UdfStructType.of( + UdfFieldType.of("id", UdfPrimitiveType.of("int")), + UdfFieldType.of("label", UdfPrimitiveType.of("string")))) + .build(); + + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); + } + + @Test + void parseNestedListOfStruct() { + String json = + "{\"name\":\"records\",\"type\":{\"type\":\"list\",\"element\":" + + "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"}]}}}"; + UdfParameter expected = + ImmutableUdfParameter.builder() + .name("records") + .type( + UdfListType.of(UdfStructType.of(UdfFieldType.of("id", UdfPrimitiveType.of("int"))))) + .build(); + + assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); + } + + @Test + void roundTripPrimitiveType() { UdfParameter parameter = ImmutableUdfParameter.builder() .name("x") - .type(UdfType.primitive("int")) + .type(UdfPrimitiveType.of("int")) .doc("Input integer") .build(); String serialized = UdfParameterParser.toJson(parameter); - UdfParameter deserialized = UdfParameterParser.fromJson(serialized); + assertThat(UdfParameterParser.fromJson(serialized)).isEqualTo(parameter); + } + + @Test + void roundTripListType() { + UdfParameter parameter = + ImmutableUdfParameter.builder() + .name("items") + .type(UdfListType.of(UdfPrimitiveType.of("string"))) + .build(); - assertThat(deserialized).isEqualTo(parameter); + String serialized = UdfParameterParser.toJson(parameter); + assertThat(UdfParameterParser.fromJson(serialized)).isEqualTo(parameter); } @Test - public void testRoundTripNestedType() { - Map listType = ImmutableMap.of("type", "list", "element", "string"); + void roundTripMapType() { UdfParameter parameter = - ImmutableUdfParameter.builder().name("items").type(UdfType.nested(listType)).build(); + ImmutableUdfParameter.builder() + .name("lookup") + .type(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))) + .build(); String serialized = UdfParameterParser.toJson(parameter); - UdfParameter deserialized = UdfParameterParser.fromJson(serialized); + assertThat(UdfParameterParser.fromJson(serialized)).isEqualTo(parameter); + } + + @Test + void roundTripStructType() { + UdfParameter parameter = + ImmutableUdfParameter.builder() + .name("row") + .type( + UdfStructType.of( + UdfFieldType.of("id", UdfPrimitiveType.of("int")), + UdfFieldType.of("label", UdfPrimitiveType.of("string")))) + .build(); - assertThat(deserialized.name()).isEqualTo("items"); - Map roundTrippedType = deserialized.type().asNested(); - assertThat(roundTrippedType).containsEntry("type", "list"); - assertThat(roundTrippedType).containsEntry("element", "string"); + String serialized = UdfParameterParser.toJson(parameter); + assertThat(UdfParameterParser.fromJson(serialized)).isEqualTo(parameter); } @Test - public void testNullParameter() { + void nullParameter() { assertThatThrownBy(() -> UdfParameterParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid UDF parameter: null"); @@ -142,7 +191,7 @@ public void testNullParameter() { } @Test - public void testMissingRequiredFields() { + void missingRequiredFields() { String missingName = "{\"type\":\"int\"}"; assertThatThrownBy(() -> UdfParameterParser.fromJson(missingName)) .isInstanceOf(IllegalArgumentException.class) diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java index aba831a1c753..9b26ccfb9216 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java @@ -23,10 +23,10 @@ import org.junit.jupiter.api.Test; -public class TestUdfRepresentationParser { +class TestUdfRepresentationParser { @Test - public void testParseUnknownRepresentation() { + void parseUnknownRepresentation() { String json = "{\"type\":\"python\"}"; UdfRepresentation unknownRepresentation = UdfRepresentationParser.fromJson(json); assertThat(unknownRepresentation) @@ -38,14 +38,14 @@ public void testParseUnknownRepresentation() { } @Test - public void testNullRepresentation() { + void nullRepresentation() { assertThatThrownBy(() -> UdfRepresentationParser.toJson(null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid UDF representation: null"); } @Test - public void testMissingType() { + void missingType() { assertThatThrownBy(() -> UdfRepresentationParser.fromJson("{\"sql\":\"x + 1\"}")) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse missing string: type"); diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java index 81bfad065738..41726eb10e53 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java @@ -22,84 +22,84 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import com.fasterxml.jackson.databind.JsonNode; -import java.util.Map; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.util.JsonUtil; import org.junit.jupiter.api.Test; -public class TestUdfTypeUtil { +class TestUdfTypeUtil { @Test - public void testReadPrimitiveType() { + void readPrimitiveType() { JsonNode node = JsonUtil.mapper().valueToTree("int"); UdfType type = UdfTypeUtil.readType(node); - assertThat(type.isPrimitive()).isTrue(); - assertThat(type.asPrimitive()).isEqualTo("int"); + assertThat(type).isEqualTo(UdfPrimitiveType.of("int")); } @Test - public void testReadDecimalType() { + void readDecimalType() { JsonNode node = JsonUtil.mapper().valueToTree("decimal(9,2)"); UdfType type = UdfTypeUtil.readType(node); - assertThat(type.isPrimitive()).isTrue(); - assertThat(type.asPrimitive()).isEqualTo("decimal(9,2)"); + assertThat(type).isEqualTo(UdfPrimitiveType.of("decimal(9,2)")); } @Test - public void testReadVariantType() { + void readVariantType() { JsonNode node = JsonUtil.mapper().valueToTree("variant"); UdfType type = UdfTypeUtil.readType(node); - assertThat(type.isPrimitive()).isTrue(); - assertThat(type.asPrimitive()).isEqualTo("variant"); + assertThat(type).isEqualTo(UdfPrimitiveType.of("variant")); } @Test - public void testReadListType() { - Map listType = ImmutableMap.of("type", "list", "element", "string"); - JsonNode node = JsonUtil.mapper().valueToTree(listType); + void readListType() { + JsonNode node = JsonUtil.parse("{\"type\":\"list\",\"element\":\"string\"}", n -> n); UdfType type = UdfTypeUtil.readType(node); - assertThat(type.isPrimitive()).isFalse(); - - Map typeMap = type.asNested(); - assertThat(typeMap).containsEntry("type", "list"); - assertThat(typeMap).containsEntry("element", "string"); + assertThat(type).isEqualTo(UdfListType.of(UdfPrimitiveType.of("string"))); } @Test - public void testReadMapType() { - Map mapType = ImmutableMap.of("type", "map", "key", "string", "value", "int"); - JsonNode node = JsonUtil.mapper().valueToTree(mapType); + void readMapType() { + JsonNode node = + JsonUtil.parse("{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}", n -> n); UdfType type = UdfTypeUtil.readType(node); - assertThat(type.isPrimitive()).isFalse(); - - Map typeMap = type.asNested(); - assertThat(typeMap).containsEntry("type", "map"); - assertThat(typeMap).containsEntry("key", "string"); - assertThat(typeMap).containsEntry("value", "int"); + assertThat(type) + .isEqualTo(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))); } @Test - public void testReadStructType() { + void readStructType() { String structJson = - "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"name\",\"type\":\"string\"}]}"; + "{\"type\":\"struct\",\"fields\":[" + + "{\"name\":\"id\",\"type\":\"int\"}," + + "{\"name\":\"name\",\"type\":\"string\"}]}"; JsonNode node = JsonUtil.parse(structJson, n -> n); - UdfType type = UdfTypeUtil.readType(node); - assertThat(type.isPrimitive()).isFalse(); + UdfType expected = + UdfStructType.of( + UdfFieldType.of("id", UdfPrimitiveType.of("int")), + UdfFieldType.of("name", UdfPrimitiveType.of("string"))); + + assertThat(UdfTypeUtil.readType(node)).isEqualTo(expected); + } + + @Test + void readNestedListOfMap() { + String json = + "{\"type\":\"list\",\"element\":" + + "{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"; + JsonNode node = JsonUtil.parse(json, n -> n); + UdfType expected = + UdfListType.of(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))); - Map typeMap = type.asNested(); - assertThat(typeMap).containsEntry("type", "struct"); - assertThat(typeMap).containsKey("fields"); + assertThat(UdfTypeUtil.readType(node)).isEqualTo(expected); } @Test - public void testReadNullNode() { + void readNullNode() { assertThatThrownBy(() -> UdfTypeUtil.readType(null)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot read type from null node"); } @Test - public void testReadArrayNode() { + void readArrayNode() { JsonNode node = JsonUtil.mapper().valueToTree(new int[] {1, 2, 3}); assertThatThrownBy(() -> UdfTypeUtil.readType(node)) .isInstanceOf(IllegalArgumentException.class) @@ -107,12 +107,20 @@ public void testReadArrayNode() { } @Test - public void testWritePrimitiveType() { + void readUnknownNestedType() { + JsonNode node = JsonUtil.parse("{\"type\":\"set\"}", n -> n); + assertThatThrownBy(() -> UdfTypeUtil.readType(node)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Cannot parse UDF type from object with type: set"); + } + + @Test + void writePrimitiveType() { String json = JsonUtil.generate( gen -> { gen.writeStartObject(); - UdfTypeUtil.writeType("return-type", UdfType.primitive("int"), gen); + UdfTypeUtil.writeType("return-type", UdfPrimitiveType.of("int"), gen); gen.writeEndObject(); }, false); @@ -121,24 +129,60 @@ public void testWritePrimitiveType() { } @Test - public void testWriteNestedType() { - Map listType = ImmutableMap.of("type", "list", "element", "string"); + void writeListType() { + UdfType listType = UdfListType.of(UdfPrimitiveType.of("string")); String json = JsonUtil.generate( gen -> { gen.writeStartObject(); - UdfTypeUtil.writeType("return-type", UdfType.nested(listType), gen); + UdfTypeUtil.writeType("return-type", listType, gen); gen.writeEndObject(); }, false); - assertThat(json).contains("\"return-type\""); - assertThat(json).contains("\"type\":\"list\""); - assertThat(json).contains("\"element\":\"string\""); + assertThat(json).isEqualTo("{\"return-type\":{\"type\":\"list\",\"element\":\"string\"}}"); } @Test - public void testWriteNullType() { + void writeMapType() { + UdfType mapType = UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int")); + String json = + JsonUtil.generate( + gen -> { + gen.writeStartObject(); + UdfTypeUtil.writeType("return-type", mapType, gen); + gen.writeEndObject(); + }, + false); + + assertThat(json) + .isEqualTo("{\"return-type\":{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"); + } + + @Test + void writeStructType() { + UdfType structType = + UdfStructType.of( + UdfFieldType.of("id", UdfPrimitiveType.of("int")), + UdfFieldType.of("name", UdfPrimitiveType.of("string"))); + String json = + JsonUtil.generate( + gen -> { + gen.writeStartObject(); + UdfTypeUtil.writeType("return-type", structType, gen); + gen.writeEndObject(); + }, + false); + + assertThat(json) + .isEqualTo( + "{\"return-type\":{\"type\":\"struct\",\"fields\":[" + + "{\"name\":\"id\",\"type\":\"int\"}," + + "{\"name\":\"name\",\"type\":\"string\"}]}}"); + } + + @Test + void writeNullType() { assertThatThrownBy( () -> JsonUtil.generate( @@ -153,32 +197,25 @@ public void testWriteNullType() { } @Test - public void testWriteTypeValue() { - String json = - JsonUtil.generate( - gen -> { - gen.writeStartArray(); - UdfTypeUtil.writeTypeValue(UdfType.primitive("int"), gen); - gen.writeEndArray(); - }, - false); + void roundTripStructWithListAndMap() { + UdfType structType = + UdfStructType.of( + UdfFieldType.of("id", UdfPrimitiveType.of("int")), + UdfFieldType.of("tags", UdfListType.of(UdfPrimitiveType.of("string"))), + UdfFieldType.of( + "props", UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int")))); - assertThat(json).isEqualTo("[\"int\"]"); - } - - @Test - public void testWriteNestedTypeValue() { - Map listType = ImmutableMap.of("type", "list", "element", "string"); String json = JsonUtil.generate( gen -> { - gen.writeStartArray(); - UdfTypeUtil.writeTypeValue(UdfType.nested(listType), gen); - gen.writeEndArray(); + gen.writeStartObject(); + UdfTypeUtil.writeType("type", structType, gen); + gen.writeEndObject(); }, false); - assertThat(json).contains("\"type\":\"list\""); - assertThat(json).contains("\"element\":\"string\""); + JsonNode node = JsonUtil.parse(json, n -> n); + UdfType deserialized = UdfTypeUtil.readType(node.get("type")); + assertThat(deserialized).isEqualTo(structType); } } From eed9f234ca86e4f0ecd39fcc6c263bfe09834a92 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Mon, 4 May 2026 10:55:20 -0700 Subject: [PATCH 4/6] address comments --- .../apache/iceberg/udf/UdfPrimitiveType.java | 2 +- .../org/apache/iceberg/udf/UdfStructType.java | 4 +- .../iceberg/udf/UdfRepresentationParser.java | 25 +++---- .../org/apache/iceberg/udf/UdfTypeUtil.java | 39 +++++----- .../udf/TestSQLUdfRepresentationParser.java | 25 ++++--- .../iceberg/udf/TestUdfParameterParser.java | 73 +++++++++++++++---- .../udf/TestUdfRepresentationParser.java | 10 ++- .../apache/iceberg/udf/TestUdfTypeUtil.java | 61 ++++++++++++---- 8 files changed, 157 insertions(+), 82 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java b/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java index 7ff73b46af3d..32d51f7132d8 100644 --- a/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java +++ b/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java @@ -68,7 +68,7 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hashCode(typeString); + return Objects.hash(UdfPrimitiveType.class, typeString); } @Override diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java b/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java index 4eff9a05a01a..1f6cbaaff493 100644 --- a/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java +++ b/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java @@ -26,8 +26,8 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; /** - * A UDF struct type with an ordered list of named fields. Unlike Iceberg struct types, UDF struct - * fields do not have field IDs. + * A UDF struct type with an ordered list of named fields. Based on Iceberg struct types but + * intentionally omits field IDs and element nullability. */ public final class UdfStructType implements UdfType { diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java b/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java index 4128d64e16d8..54252732212b 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java +++ b/core/src/main/java/org/apache/iceberg/udf/UdfRepresentationParser.java @@ -33,14 +33,12 @@ private UdfRepresentationParser() {} static void toJson(UdfRepresentation representation, JsonGenerator generator) throws IOException { Preconditions.checkArgument(representation != null, "Invalid UDF representation: null"); switch (representation.type().toLowerCase(Locale.ROOT)) { - case UdfRepresentation.Type.SQL: - SQLUdfRepresentationParser.toJson((SQLUdfRepresentation) representation, generator); - break; - - default: - throw new UnsupportedOperationException( - String.format( - "Cannot serialize unsupported UDF representation: %s", representation.type())); + case UdfRepresentation.Type.SQL -> + SQLUdfRepresentationParser.toJson((SQLUdfRepresentation) representation, generator); + default -> + throw new UnsupportedOperationException( + String.format( + "Cannot serialize unsupported UDF representation: %s", representation.type())); } } @@ -57,12 +55,9 @@ static UdfRepresentation fromJson(JsonNode node) { Preconditions.checkArgument( node.isObject(), "Cannot parse UDF representation from non-object: %s", node); String type = JsonUtil.getString(TYPE, node).toLowerCase(Locale.ROOT); - switch (type) { - case UdfRepresentation.Type.SQL: - return SQLUdfRepresentationParser.fromJson(node); - - default: - return ImmutableUnknownUdfRepresentation.builder().type(type).build(); - } + return switch (type) { + case UdfRepresentation.Type.SQL -> SQLUdfRepresentationParser.fromJson(node); + default -> ImmutableUnknownUdfRepresentation.builder().type(type).build(); + }; } } diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java index b3595047e373..9742cb38f014 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java +++ b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java @@ -52,17 +52,15 @@ static UdfType readType(JsonNode node) { return UdfPrimitiveType.of(node.asText()); } else if (node.isObject()) { String typeName = JsonUtil.getString(TYPE, node); - switch (typeName) { - case LIST: - return UdfListType.of(readType(node.get(ELEMENT))); - case MAP: - return UdfMapType.of(readType(node.get(KEY)), readType(node.get(VALUE))); - case STRUCT: - return readStruct(node); - default: - throw new IllegalArgumentException( - String.format("Cannot parse UDF type from object with type: %s", typeName)); - } + return switch (typeName) { + case LIST -> UdfListType.of(readType(node.get(ELEMENT))); + case MAP -> UdfMapType.of(readType(node.get(KEY)), readType(node.get(VALUE))); + case STRUCT -> readStruct(node); + default -> + throw new IllegalArgumentException( + String.format( + "Cannot parse UDF type from object with unknown type %s: %s", typeName, node)); + }; } else { throw new IllegalArgumentException( String.format("Cannot parse UDF type from node: %s", node)); @@ -97,24 +95,22 @@ static void writeType(String fieldName, UdfType type, JsonGenerator generator) private static void writeTypeValue(UdfType type, JsonGenerator generator) throws IOException { switch (type.typeId()) { - case PRIMITIVE: - generator.writeString(type.asPrimitive().typeString()); - return; - case LIST: + case PRIMITIVE -> generator.writeString(type.asPrimitive().typeString()); + case LIST -> { generator.writeStartObject(); generator.writeStringField(TYPE, LIST); writeType(ELEMENT, type.asListType().elementType(), generator); generator.writeEndObject(); - return; - case MAP: + } + case MAP -> { UdfMapType mapType = type.asMapType(); generator.writeStartObject(); generator.writeStringField(TYPE, MAP); writeType(KEY, mapType.keyType(), generator); writeType(VALUE, mapType.valueType(), generator); generator.writeEndObject(); - return; - case STRUCT: + } + case STRUCT -> { List fields = type.asStructType().fields(); generator.writeStartObject(); generator.writeStringField(TYPE, STRUCT); @@ -127,9 +123,8 @@ private static void writeTypeValue(UdfType type, JsonGenerator generator) throws } generator.writeEndArray(); generator.writeEndObject(); - return; - default: - throw new IllegalArgumentException("Unknown UDF type: " + type); + } + default -> throw new IllegalArgumentException("Unknown UDF type: " + type); } } } diff --git a/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java index ebbc846063de..bca7baae3264 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java @@ -28,7 +28,9 @@ class TestSQLUdfRepresentationParser { @Test void parseSqlUdfRepresentation() { - String json = "{\"type\":\"sql\", \"sql\": \"x + 1\", \"dialect\": \"spark\"}"; + String json = + """ + {"type":"sql", "sql": "x + 1", "dialect": "spark"}"""; SQLUdfRepresentation representation = ImmutableSQLUdfRepresentation.builder().sql("x + 1").dialect("spark").build(); @@ -39,17 +41,23 @@ void parseSqlUdfRepresentation() { @Test void parseMissingRequiredFields() { - String missingDialect = "{\"type\":\"sql\", \"sql\": \"x + 1\"}"; + String missingDialect = + """ + {"type":"sql", "sql": "x + 1"}"""; assertThatThrownBy(() -> UdfRepresentationParser.fromJson(missingDialect)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse missing string: dialect"); - String missingSql = "{\"type\":\"sql\", \"dialect\": \"spark\"}"; + String missingSql = + """ + {"type":"sql", "dialect": "spark"}"""; assertThatThrownBy(() -> UdfRepresentationParser.fromJson(missingSql)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse missing string: sql"); - String missingType = "{\"sql\":\"x + 1\",\"dialect\":\"spark\"}"; + String missingType = + """ + {"sql":"x + 1","dialect":"spark"}"""; assertThatThrownBy(() -> UdfRepresentationParser.fromJson(missingType)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse missing string: type"); @@ -57,16 +65,11 @@ void parseMissingRequiredFields() { @Test void roundTripSerialization() { - String expectedJson = "{\"type\":\"sql\",\"sql\":\"x + 1\",\"dialect\":\"spark\"}"; SQLUdfRepresentation representation = ImmutableSQLUdfRepresentation.builder().sql("x + 1").dialect("spark").build(); - assertThat(UdfRepresentationParser.toJson(representation)) - .as("Should be able to serialize valid SQL UDF representation") - .isEqualTo(expectedJson); - - assertThat(UdfRepresentationParser.fromJson(UdfRepresentationParser.toJson(representation))) - .isEqualTo(representation); + String serialized = UdfRepresentationParser.toJson(representation); + assertThat(UdfRepresentationParser.fromJson(serialized)).isEqualTo(representation); } @Test diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java index 10a6f5f8dfb9..211710f17bbc 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java @@ -28,7 +28,9 @@ class TestUdfParameterParser { @Test void parsePrimitiveTypeParameter() { - String json = "{\"name\":\"x\",\"type\":\"int\"}"; + String json = + """ + {"name":"x","type":"int"}"""; UdfParameter expected = ImmutableUdfParameter.builder().name("x").type(UdfPrimitiveType.of("int")).build(); @@ -37,7 +39,9 @@ void parsePrimitiveTypeParameter() { @Test void parseParameterWithDoc() { - String json = "{\"name\":\"x\",\"type\":\"int\",\"doc\":\"Input integer\"}"; + String json = + """ + {"name":"x","type":"int","doc":"Input integer"}"""; UdfParameter expected = ImmutableUdfParameter.builder() .name("x") @@ -50,7 +54,9 @@ void parseParameterWithDoc() { @Test void parseDecimalTypeParameter() { - String json = "{\"name\":\"amount\",\"type\":\"decimal(9,2)\"}"; + String json = + """ + {"name":"amount","type":"decimal(9,2)"}"""; UdfParameter expected = ImmutableUdfParameter.builder() .name("amount") @@ -62,7 +68,9 @@ void parseDecimalTypeParameter() { @Test void parseVariantTypeParameter() { - String json = "{\"name\":\"data\",\"type\":\"variant\"}"; + String json = + """ + {"name":"data","type":"variant"}"""; UdfParameter expected = ImmutableUdfParameter.builder().name("data").type(UdfPrimitiveType.of("variant")).build(); @@ -71,7 +79,15 @@ void parseVariantTypeParameter() { @Test void parseListTypeParameter() { - String json = "{\"name\":\"items\",\"type\":{\"type\":\"list\",\"element\":\"string\"}}"; + String json = + """ + { + "name": "items", + "type": { + "type": "list", + "element": "string" + } + }"""; UdfParameter expected = ImmutableUdfParameter.builder() .name("items") @@ -84,7 +100,15 @@ void parseListTypeParameter() { @Test void parseMapTypeParameter() { String json = - "{\"name\":\"lookup\",\"type\":{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"; + """ + { + "name": "lookup", + "type": { + "type": "map", + "key": "string", + "value": "int" + } + }"""; UdfParameter expected = ImmutableUdfParameter.builder() .name("lookup") @@ -97,9 +121,17 @@ void parseMapTypeParameter() { @Test void parseStructTypeParameter() { String json = - "{\"name\":\"row\",\"type\":{\"type\":\"struct\",\"fields\":[" - + "{\"name\":\"id\",\"type\":\"int\"}," - + "{\"name\":\"label\",\"type\":\"string\"}]}}"; + """ + { + "name": "row", + "type": { + "type": "struct", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "label", "type": "string"} + ] + } + }"""; UdfParameter expected = ImmutableUdfParameter.builder() .name("row") @@ -115,8 +147,19 @@ void parseStructTypeParameter() { @Test void parseNestedListOfStruct() { String json = - "{\"name\":\"records\",\"type\":{\"type\":\"list\",\"element\":" - + "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"int\"}]}}}"; + """ + { + "name": "records", + "type": { + "type": "list", + "element": { + "type": "struct", + "fields": [ + {"name": "id", "type": "int"} + ] + } + } + }"""; UdfParameter expected = ImmutableUdfParameter.builder() .name("records") @@ -192,12 +235,16 @@ void nullParameter() { @Test void missingRequiredFields() { - String missingName = "{\"type\":\"int\"}"; + String missingName = + """ + {"type":"int"}"""; assertThatThrownBy(() -> UdfParameterParser.fromJson(missingName)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse missing string: name"); - String missingType = "{\"name\":\"x\"}"; + String missingType = + """ + {"name":"x"}"""; assertThatThrownBy(() -> UdfParameterParser.fromJson(missingType)) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot read type from null node"); diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java index 9b26ccfb9216..0536cc2ad942 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfRepresentationParser.java @@ -27,7 +27,9 @@ class TestUdfRepresentationParser { @Test void parseUnknownRepresentation() { - String json = "{\"type\":\"python\"}"; + String json = + """ + {"type":"python"}"""; UdfRepresentation unknownRepresentation = UdfRepresentationParser.fromJson(json); assertThat(unknownRepresentation) .isEqualTo(ImmutableUnknownUdfRepresentation.builder().type("python").build()); @@ -46,7 +48,11 @@ void nullRepresentation() { @Test void missingType() { - assertThatThrownBy(() -> UdfRepresentationParser.fromJson("{\"sql\":\"x + 1\"}")) + assertThatThrownBy( + () -> + UdfRepresentationParser.fromJson( + """ + {"sql":"x + 1"}""")) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot parse missing string: type"); } diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java index 41726eb10e53..e233a024a27a 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java @@ -50,15 +50,20 @@ void readVariantType() { @Test void readListType() { - JsonNode node = JsonUtil.parse("{\"type\":\"list\",\"element\":\"string\"}", n -> n); + String json = + """ + {"type":"list","element":"string"}"""; + JsonNode node = JsonUtil.parse(json, n -> n); UdfType type = UdfTypeUtil.readType(node); assertThat(type).isEqualTo(UdfListType.of(UdfPrimitiveType.of("string"))); } @Test void readMapType() { - JsonNode node = - JsonUtil.parse("{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}", n -> n); + String json = + """ + {"type":"map","key":"string","value":"int"}"""; + JsonNode node = JsonUtil.parse(json, n -> n); UdfType type = UdfTypeUtil.readType(node); assertThat(type) .isEqualTo(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))); @@ -67,9 +72,14 @@ void readMapType() { @Test void readStructType() { String structJson = - "{\"type\":\"struct\",\"fields\":[" - + "{\"name\":\"id\",\"type\":\"int\"}," - + "{\"name\":\"name\",\"type\":\"string\"}]}"; + """ + { + "type": "struct", + "fields": [ + {"name": "id", "type": "int"}, + {"name": "name", "type": "string"} + ] + }"""; JsonNode node = JsonUtil.parse(structJson, n -> n); UdfType expected = UdfStructType.of( @@ -82,8 +92,15 @@ void readStructType() { @Test void readNestedListOfMap() { String json = - "{\"type\":\"list\",\"element\":" - + "{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"; + """ + { + "type": "list", + "element": { + "type": "map", + "key": "string", + "value": "int" + } + }"""; JsonNode node = JsonUtil.parse(json, n -> n); UdfType expected = UdfListType.of(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))); @@ -108,10 +125,13 @@ void readArrayNode() { @Test void readUnknownNestedType() { - JsonNode node = JsonUtil.parse("{\"type\":\"set\"}", n -> n); + String json = + """ + {"type":"set"}"""; + JsonNode node = JsonUtil.parse(json, n -> n); assertThatThrownBy(() -> UdfTypeUtil.readType(node)) .isInstanceOf(IllegalArgumentException.class) - .hasMessageStartingWith("Cannot parse UDF type from object with type: set"); + .hasMessage("Cannot parse UDF type from object with unknown type set: {\"type\":\"set\"}"); } @Test @@ -125,7 +145,10 @@ void writePrimitiveType() { }, false); - assertThat(json).isEqualTo("{\"return-type\":\"int\"}"); + assertThat(json) + .isEqualTo( + """ + {"return-type":"int"}"""); } @Test @@ -140,7 +163,10 @@ void writeListType() { }, false); - assertThat(json).isEqualTo("{\"return-type\":{\"type\":\"list\",\"element\":\"string\"}}"); + assertThat(json) + .isEqualTo( + """ + {"return-type":{"type":"list","element":"string"}}"""); } @Test @@ -156,7 +182,9 @@ void writeMapType() { false); assertThat(json) - .isEqualTo("{\"return-type\":{\"type\":\"map\",\"key\":\"string\",\"value\":\"int\"}}"); + .isEqualTo( + """ + {"return-type":{"type":"map","key":"string","value":"int"}}"""); } @Test @@ -176,9 +204,10 @@ void writeStructType() { assertThat(json) .isEqualTo( - "{\"return-type\":{\"type\":\"struct\",\"fields\":[" - + "{\"name\":\"id\",\"type\":\"int\"}," - + "{\"name\":\"name\",\"type\":\"string\"}]}}"); + """ + {"return-type":{"type":"struct","fields":[\ + {"name":"id","type":"int"},\ + {"name":"name","type":"string"}]}}"""); } @Test From 0db77f56f25e70e8e86ab9dbfb0033c88c2ad8e5 Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Sun, 10 May 2026 19:24:22 -0700 Subject: [PATCH 5/6] Address comments --- .../org/apache/iceberg/udf/UdfFieldType.java | 72 ---- .../org/apache/iceberg/udf/UdfListType.java | 74 ---- .../org/apache/iceberg/udf/UdfMapType.java | 82 ----- .../apache/iceberg/udf/UdfPrimitiveType.java | 78 ----- .../org/apache/iceberg/udf/UdfStructType.java | 88 ----- .../java/org/apache/iceberg/udf/UdfType.java | 29 +- .../java/org/apache/iceberg/udf/UdfTypes.java | 326 ++++++++++++++++++ .../org/apache/iceberg/udf/UdfTypeUtil.java | 34 +- .../udf/TestSQLUdfRepresentationParser.java | 8 +- .../iceberg/udf/TestUdfParameterParser.java | 59 ++-- .../apache/iceberg/udf/TestUdfTypeUtil.java | 135 ++++++-- 11 files changed, 495 insertions(+), 490 deletions(-) delete mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java delete mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfListType.java delete mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfMapType.java delete mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java delete mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfStructType.java create mode 100644 api/src/main/java/org/apache/iceberg/udf/UdfTypes.java diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java b/api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java deleted file mode 100644 index 4bad7671ecce..000000000000 --- a/api/src/main/java/org/apache/iceberg/udf/UdfFieldType.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.udf; - -import java.util.Objects; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - -/** A field within a {@link UdfStructType}, with a name and a type. */ -public final class UdfFieldType { - - private final String name; - private final UdfType type; - - public static UdfFieldType of(String name, UdfType type) { - Preconditions.checkArgument(name != null, "Invalid field name: null"); - Preconditions.checkArgument(type != null, "Invalid field type: null"); - return new UdfFieldType(name, type); - } - - private UdfFieldType(String name, UdfType type) { - this.name = name; - this.type = type; - } - - public String name() { - return name; - } - - public UdfType type() { - return type; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof UdfFieldType)) { - return false; - } - - UdfFieldType that = (UdfFieldType) o; - return Objects.equals(name, that.name) && Objects.equals(type, that.type); - } - - @Override - public int hashCode() { - return Objects.hash(name, type); - } - - @Override - public String toString() { - return String.format("%s:%s", name, type); - } -} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfListType.java b/api/src/main/java/org/apache/iceberg/udf/UdfListType.java deleted file mode 100644 index b6d7c06346c8..000000000000 --- a/api/src/main/java/org/apache/iceberg/udf/UdfListType.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.udf; - -import java.util.Objects; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - -/** A UDF list type with an element type. */ -public final class UdfListType implements UdfType { - - private final UdfType elementType; - - public static UdfListType of(UdfType elementType) { - Preconditions.checkArgument(elementType != null, "Invalid element type: null"); - return new UdfListType(elementType); - } - - private UdfListType(UdfType elementType) { - this.elementType = elementType; - } - - @Override - public TypeId typeId() { - return TypeId.LIST; - } - - @Override - public UdfListType asListType() { - return this; - } - - public UdfType elementType() { - return elementType; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof UdfListType)) { - return false; - } - - return Objects.equals(elementType, ((UdfListType) o).elementType); - } - - @Override - public int hashCode() { - return Objects.hash(UdfListType.class, elementType); - } - - @Override - public String toString() { - return String.format("list<%s>", elementType); - } -} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfMapType.java b/api/src/main/java/org/apache/iceberg/udf/UdfMapType.java deleted file mode 100644 index dc0b6d07188d..000000000000 --- a/api/src/main/java/org/apache/iceberg/udf/UdfMapType.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.udf; - -import java.util.Objects; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - -/** A UDF map type with key and value types. */ -public final class UdfMapType implements UdfType { - - private final UdfType keyType; - private final UdfType valueType; - - public static UdfMapType of(UdfType keyType, UdfType valueType) { - Preconditions.checkArgument(keyType != null, "Invalid key type: null"); - Preconditions.checkArgument(valueType != null, "Invalid value type: null"); - return new UdfMapType(keyType, valueType); - } - - private UdfMapType(UdfType keyType, UdfType valueType) { - this.keyType = keyType; - this.valueType = valueType; - } - - @Override - public TypeId typeId() { - return TypeId.MAP; - } - - @Override - public UdfMapType asMapType() { - return this; - } - - public UdfType keyType() { - return keyType; - } - - public UdfType valueType() { - return valueType; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof UdfMapType)) { - return false; - } - - UdfMapType that = (UdfMapType) o; - return Objects.equals(keyType, that.keyType) && Objects.equals(valueType, that.valueType); - } - - @Override - public int hashCode() { - return Objects.hash(UdfMapType.class, keyType, valueType); - } - - @Override - public String toString() { - return String.format("map<%s,%s>", keyType, valueType); - } -} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java b/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java deleted file mode 100644 index 32d51f7132d8..000000000000 --- a/api/src/main/java/org/apache/iceberg/udf/UdfPrimitiveType.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.udf; - -import java.util.Objects; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - -/** - * A UDF primitive or semi-structured type, encoded as a type string (e.g., {@code int}, {@code - * string}, {@code decimal(9,2)}, {@code variant}). - */ -public final class UdfPrimitiveType implements UdfType { - - private final String typeString; - - public static UdfPrimitiveType of(String typeString) { - Preconditions.checkArgument(typeString != null, "Invalid primitive type: null"); - return new UdfPrimitiveType(typeString); - } - - private UdfPrimitiveType(String typeString) { - this.typeString = typeString; - } - - @Override - public TypeId typeId() { - return TypeId.PRIMITIVE; - } - - @Override - public UdfPrimitiveType asPrimitive() { - return this; - } - - /** The primitive or semi-structured type string. */ - public String typeString() { - return typeString; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof UdfPrimitiveType)) { - return false; - } - - return Objects.equals(typeString, ((UdfPrimitiveType) o).typeString); - } - - @Override - public int hashCode() { - return Objects.hash(UdfPrimitiveType.class, typeString); - } - - @Override - public String toString() { - return typeString; - } -} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java b/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java deleted file mode 100644 index 1f6cbaaff493..000000000000 --- a/api/src/main/java/org/apache/iceberg/udf/UdfStructType.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.udf; - -import java.util.Arrays; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; - -/** - * A UDF struct type with an ordered list of named fields. Based on Iceberg struct types but - * intentionally omits field IDs and element nullability. - */ -public final class UdfStructType implements UdfType { - - private final List fields; - - public static UdfStructType of(UdfFieldType... fields) { - Preconditions.checkArgument(fields != null, "Invalid fields: null"); - return of(Arrays.asList(fields)); - } - - public static UdfStructType of(List fields) { - Preconditions.checkArgument(fields != null, "Invalid fields: null"); - return new UdfStructType(ImmutableList.copyOf(fields)); - } - - private UdfStructType(List fields) { - this.fields = fields; - } - - @Override - public TypeId typeId() { - return TypeId.STRUCT; - } - - @Override - public UdfStructType asStructType() { - return this; - } - - public List fields() { - return fields; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (!(o instanceof UdfStructType)) { - return false; - } - - return Objects.equals(fields, ((UdfStructType) o).fields); - } - - @Override - public int hashCode() { - return Objects.hash(UdfStructType.class, fields); - } - - @Override - public String toString() { - return fields.stream() - .map(UdfFieldType::toString) - .collect(Collectors.joining(",", "struct<", ">")); - } -} diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfType.java b/api/src/main/java/org/apache/iceberg/udf/UdfType.java index 4a6d70954381..60442f09e110 100644 --- a/api/src/main/java/org/apache/iceberg/udf/UdfType.java +++ b/api/src/main/java/org/apache/iceberg/udf/UdfType.java @@ -20,50 +20,51 @@ /** * Represents a UDF data type as defined in the UDF spec. UDF types are based on Iceberg types but - * intentionally omit field IDs and element nullability. Implementations include {@link - * UdfPrimitiveType} for primitive and semi-structured types, and the nested types {@link - * UdfListType}, {@link UdfMapType}, and {@link UdfStructType}. + * intentionally omit field IDs and element nullability. Concrete implementations live as static + * nested classes on {@link UdfTypes}: {@link UdfTypes.PrimitiveType} for primitive and + * semi-structured types, and the nested types {@link UdfTypes.ListType}, {@link UdfTypes.MapType}, + * and {@link UdfTypes.StructType}. */ public interface UdfType { - enum TypeId { + enum TypeID { PRIMITIVE, LIST, MAP, STRUCT } - TypeId typeId(); + TypeID typeId(); - default boolean isPrimitive() { - return typeId() == TypeId.PRIMITIVE; + default boolean isPrimitiveType() { + return typeId() == TypeID.PRIMITIVE; } default boolean isListType() { - return typeId() == TypeId.LIST; + return typeId() == TypeID.LIST; } default boolean isMapType() { - return typeId() == TypeId.MAP; + return typeId() == TypeID.MAP; } default boolean isStructType() { - return typeId() == TypeId.STRUCT; + return typeId() == TypeID.STRUCT; } - default UdfPrimitiveType asPrimitive() { + default UdfTypes.PrimitiveType asPrimitiveType() { throw new IllegalArgumentException("Not a primitive type: " + this); } - default UdfListType asListType() { + default UdfTypes.ListType asListType() { throw new IllegalArgumentException("Not a list type: " + this); } - default UdfMapType asMapType() { + default UdfTypes.MapType asMapType() { throw new IllegalArgumentException("Not a map type: " + this); } - default UdfStructType asStructType() { + default UdfTypes.StructType asStructType() { throw new IllegalArgumentException("Not a struct type: " + this); } } diff --git a/api/src/main/java/org/apache/iceberg/udf/UdfTypes.java b/api/src/main/java/org/apache/iceberg/udf/UdfTypes.java new file mode 100644 index 000000000000..8a0dcf3a9d74 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/udf/UdfTypes.java @@ -0,0 +1,326 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.udf; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.types.Types; + +/** + * Concrete implementations of {@link UdfType}: {@link PrimitiveType} for primitive and + * semi-structured types and {@link ListType}, {@link MapType}, {@link StructType} for nested types. + * {@link NestedField} represents a named field inside a {@link StructType}. + */ +public class UdfTypes { + + private UdfTypes() {} + + /** + * A UDF primitive or semi-structured type, encoded as a type string (e.g., {@code int}, {@code + * string}, {@code decimal(9, 2)}, {@code variant}). + * + *

The type string must be a recognized Iceberg primitive or semi-structured type as understood + * by {@link Types#fromTypeName(String)}. The input is canonicalized to Iceberg's standard form + * (lowercase, normalized whitespace), so {@code PrimitiveType.of("INT")} and {@code + * PrimitiveType.of("Decimal( 9 , 2 )")} produce {@code int} and {@code decimal(9, 2)} + * respectively. + */ + public static final class PrimitiveType implements UdfType { + + private final String typeString; + + public static PrimitiveType of(String typeString) { + Preconditions.checkArgument(typeString != null, "Invalid primitive type: null"); + // Validate against Iceberg's primitive/semi-structured type vocabulary and use the parsed + // type's canonical toString() so callers don't have to worry about casing or whitespace. + String canonical = Types.fromTypeName(typeString).toString(); + return new PrimitiveType(canonical); + } + + private PrimitiveType(String typeString) { + this.typeString = typeString; + } + + @Override + public TypeID typeId() { + return TypeID.PRIMITIVE; + } + + @Override + public PrimitiveType asPrimitiveType() { + return this; + } + + /** The primitive or semi-structured type string. */ + public String typeString() { + return typeString; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof PrimitiveType)) { + return false; + } + + return Objects.equals(typeString, ((PrimitiveType) o).typeString); + } + + @Override + public int hashCode() { + return Objects.hash(PrimitiveType.class, typeString); + } + + @Override + public String toString() { + return typeString; + } + } + + /** A UDF list type with an element type. */ + public static final class ListType implements UdfType { + + private final UdfType elementType; + + public static ListType of(UdfType elementType) { + Preconditions.checkArgument(elementType != null, "Invalid element type: null"); + return new ListType(elementType); + } + + private ListType(UdfType elementType) { + this.elementType = elementType; + } + + @Override + public TypeID typeId() { + return TypeID.LIST; + } + + @Override + public ListType asListType() { + return this; + } + + public UdfType elementType() { + return elementType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof ListType)) { + return false; + } + + return Objects.equals(elementType, ((ListType) o).elementType); + } + + @Override + public int hashCode() { + return Objects.hash(ListType.class, elementType); + } + + @Override + public String toString() { + return String.format("list<%s>", elementType); + } + } + + /** A UDF map type with key and value types. */ + public static final class MapType implements UdfType { + + private final UdfType keyType; + private final UdfType valueType; + + public static MapType of(UdfType keyType, UdfType valueType) { + Preconditions.checkArgument(keyType != null, "Invalid key type: null"); + Preconditions.checkArgument(valueType != null, "Invalid value type: null"); + return new MapType(keyType, valueType); + } + + private MapType(UdfType keyType, UdfType valueType) { + this.keyType = keyType; + this.valueType = valueType; + } + + @Override + public TypeID typeId() { + return TypeID.MAP; + } + + @Override + public MapType asMapType() { + return this; + } + + public UdfType keyType() { + return keyType; + } + + public UdfType valueType() { + return valueType; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof MapType)) { + return false; + } + + MapType that = (MapType) o; + return Objects.equals(keyType, that.keyType) && Objects.equals(valueType, that.valueType); + } + + @Override + public int hashCode() { + return Objects.hash(MapType.class, keyType, valueType); + } + + @Override + public String toString() { + return String.format("map<%s,%s>", keyType, valueType); + } + } + + /** + * A UDF struct type with an ordered list of named fields. Based on Iceberg struct types but + * intentionally omits field IDs and element nullability. + */ + public static final class StructType implements UdfType { + + private final List fields; + + public static StructType of(NestedField... fields) { + Preconditions.checkArgument(fields != null, "Invalid fields: null"); + return of(Arrays.asList(fields)); + } + + public static StructType of(List fields) { + Preconditions.checkArgument(fields != null, "Invalid fields: null"); + return new StructType(ImmutableList.copyOf(fields)); + } + + private StructType(List fields) { + this.fields = fields; + } + + @Override + public TypeID typeId() { + return TypeID.STRUCT; + } + + @Override + public StructType asStructType() { + return this; + } + + public List fields() { + return fields; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof StructType)) { + return false; + } + + return Objects.equals(fields, ((StructType) o).fields); + } + + @Override + public int hashCode() { + return Objects.hash(StructType.class, fields); + } + + @Override + public String toString() { + return fields.stream() + .map(NestedField::toString) + .collect(Collectors.joining(",", "struct<", ">")); + } + } + + /** A field within a {@link StructType}, with a name and a type. */ + public static final class NestedField { + + private final String name; + private final UdfType type; + + public static NestedField of(String name, UdfType type) { + Preconditions.checkArgument(name != null, "Invalid field name: null"); + Preconditions.checkArgument(type != null, "Invalid field type: null"); + return new NestedField(name, type); + } + + private NestedField(String name, UdfType type) { + this.name = name; + this.type = type; + } + + public String name() { + return name; + } + + public UdfType type() { + return type; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + + if (!(o instanceof NestedField)) { + return false; + } + + NestedField that = (NestedField) o; + return Objects.equals(name, that.name) && Objects.equals(type, that.type); + } + + @Override + public int hashCode() { + return Objects.hash(name, type); + } + + @Override + public String toString() { + return String.format("%s:%s", name, type); + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java index 9742cb38f014..1dedca1f8a6c 100644 --- a/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java +++ b/core/src/main/java/org/apache/iceberg/udf/UdfTypeUtil.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.JsonNode; import java.io.IOException; import java.util.List; +import java.util.Locale; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.util.JsonUtil; @@ -49,12 +50,14 @@ static UdfType readType(JsonNode node) { Preconditions.checkArgument(node != null, "Cannot read type from null node"); if (node.isTextual()) { - return UdfPrimitiveType.of(node.asText()); + return UdfTypes.PrimitiveType.of(node.asText()); } else if (node.isObject()) { - String typeName = JsonUtil.getString(TYPE, node); + String typeName = JsonUtil.getString(TYPE, node).toLowerCase(Locale.ROOT); return switch (typeName) { - case LIST -> UdfListType.of(readType(node.get(ELEMENT))); - case MAP -> UdfMapType.of(readType(node.get(KEY)), readType(node.get(VALUE))); + case LIST -> UdfTypes.ListType.of(readType(JsonUtil.get(ELEMENT, node))); + case MAP -> + UdfTypes.MapType.of( + readType(JsonUtil.get(KEY, node)), readType(JsonUtil.get(VALUE, node))); case STRUCT -> readStruct(node); default -> throw new IllegalArgumentException( @@ -67,22 +70,21 @@ static UdfType readType(JsonNode node) { } } - private static UdfStructType readStruct(JsonNode node) { - JsonNode fieldsNode = node.get(FIELDS); + private static UdfTypes.StructType readStruct(JsonNode node) { + JsonNode fieldsNode = JsonUtil.get(FIELDS, node); Preconditions.checkArgument( - fieldsNode != null && fieldsNode.isArray(), - "Cannot parse struct type from non-array fields: %s", - fieldsNode); + fieldsNode.isArray(), "Cannot parse struct type from non-array fields: %s", fieldsNode); - ImmutableList.Builder fields = ImmutableList.builder(); + ImmutableList.Builder fields = ImmutableList.builder(); for (JsonNode fieldNode : fieldsNode) { Preconditions.checkArgument( fieldNode.isObject(), "Cannot parse struct field from non-object: %s", fieldNode); fields.add( - UdfFieldType.of(JsonUtil.getString(NAME, fieldNode), readType(fieldNode.get(TYPE)))); + UdfTypes.NestedField.of( + JsonUtil.getString(NAME, fieldNode), readType(JsonUtil.get(TYPE, fieldNode)))); } - return UdfStructType.of(fields.build()); + return UdfTypes.StructType.of(fields.build()); } /** Writes a UDF type to a JSON generator under the given field name. */ @@ -95,7 +97,7 @@ static void writeType(String fieldName, UdfType type, JsonGenerator generator) private static void writeTypeValue(UdfType type, JsonGenerator generator) throws IOException { switch (type.typeId()) { - case PRIMITIVE -> generator.writeString(type.asPrimitive().typeString()); + case PRIMITIVE -> generator.writeString(type.asPrimitiveType().typeString()); case LIST -> { generator.writeStartObject(); generator.writeStringField(TYPE, LIST); @@ -103,7 +105,7 @@ private static void writeTypeValue(UdfType type, JsonGenerator generator) throws generator.writeEndObject(); } case MAP -> { - UdfMapType mapType = type.asMapType(); + UdfTypes.MapType mapType = type.asMapType(); generator.writeStartObject(); generator.writeStringField(TYPE, MAP); writeType(KEY, mapType.keyType(), generator); @@ -111,11 +113,11 @@ private static void writeTypeValue(UdfType type, JsonGenerator generator) throws generator.writeEndObject(); } case STRUCT -> { - List fields = type.asStructType().fields(); + List fields = type.asStructType().fields(); generator.writeStartObject(); generator.writeStringField(TYPE, STRUCT); generator.writeArrayFieldStart(FIELDS); - for (UdfFieldType field : fields) { + for (UdfTypes.NestedField field : fields) { generator.writeStartObject(); generator.writeStringField(NAME, field.name()); writeType(TYPE, field.type(), generator); diff --git a/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java index bca7baae3264..518e67229520 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestSQLUdfRepresentationParser.java @@ -78,13 +78,7 @@ void roundTripWithTrinoDialect() { ImmutableSQLUdfRepresentation.builder().sql("x + 1.0").dialect("trino").build(); String serialized = UdfRepresentationParser.toJson(representation); - UdfRepresentation deserialized = UdfRepresentationParser.fromJson(serialized); - - assertThat(deserialized).isInstanceOf(SQLUdfRepresentation.class); - SQLUdfRepresentation sqlRepr = (SQLUdfRepresentation) deserialized; - assertThat(sqlRepr.sql()).isEqualTo("x + 1.0"); - assertThat(sqlRepr.dialect()).isEqualTo("trino"); - assertThat(sqlRepr.type()).isEqualTo("sql"); + assertThat(UdfRepresentationParser.fromJson(serialized)).isEqualTo(representation); } @Test diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java index 211710f17bbc..c86bc3eeb780 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfParameterParser.java @@ -32,7 +32,7 @@ void parsePrimitiveTypeParameter() { """ {"name":"x","type":"int"}"""; UdfParameter expected = - ImmutableUdfParameter.builder().name("x").type(UdfPrimitiveType.of("int")).build(); + ImmutableUdfParameter.builder().name("x").type(UdfTypes.PrimitiveType.of("int")).build(); assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } @@ -45,38 +45,13 @@ void parseParameterWithDoc() { UdfParameter expected = ImmutableUdfParameter.builder() .name("x") - .type(UdfPrimitiveType.of("int")) + .type(UdfTypes.PrimitiveType.of("int")) .doc("Input integer") .build(); assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); } - @Test - void parseDecimalTypeParameter() { - String json = - """ - {"name":"amount","type":"decimal(9,2)"}"""; - UdfParameter expected = - ImmutableUdfParameter.builder() - .name("amount") - .type(UdfPrimitiveType.of("decimal(9,2)")) - .build(); - - assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); - } - - @Test - void parseVariantTypeParameter() { - String json = - """ - {"name":"data","type":"variant"}"""; - UdfParameter expected = - ImmutableUdfParameter.builder().name("data").type(UdfPrimitiveType.of("variant")).build(); - - assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); - } - @Test void parseListTypeParameter() { String json = @@ -91,7 +66,7 @@ void parseListTypeParameter() { UdfParameter expected = ImmutableUdfParameter.builder() .name("items") - .type(UdfListType.of(UdfPrimitiveType.of("string"))) + .type(UdfTypes.ListType.of(UdfTypes.PrimitiveType.of("string"))) .build(); assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); @@ -112,7 +87,9 @@ void parseMapTypeParameter() { UdfParameter expected = ImmutableUdfParameter.builder() .name("lookup") - .type(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))) + .type( + UdfTypes.MapType.of( + UdfTypes.PrimitiveType.of("string"), UdfTypes.PrimitiveType.of("int"))) .build(); assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); @@ -136,9 +113,9 @@ void parseStructTypeParameter() { ImmutableUdfParameter.builder() .name("row") .type( - UdfStructType.of( - UdfFieldType.of("id", UdfPrimitiveType.of("int")), - UdfFieldType.of("label", UdfPrimitiveType.of("string")))) + UdfTypes.StructType.of( + UdfTypes.NestedField.of("id", UdfTypes.PrimitiveType.of("int")), + UdfTypes.NestedField.of("label", UdfTypes.PrimitiveType.of("string")))) .build(); assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); @@ -164,7 +141,9 @@ void parseNestedListOfStruct() { ImmutableUdfParameter.builder() .name("records") .type( - UdfListType.of(UdfStructType.of(UdfFieldType.of("id", UdfPrimitiveType.of("int"))))) + UdfTypes.ListType.of( + UdfTypes.StructType.of( + UdfTypes.NestedField.of("id", UdfTypes.PrimitiveType.of("int"))))) .build(); assertThat(UdfParameterParser.fromJson(json)).isEqualTo(expected); @@ -175,7 +154,7 @@ void roundTripPrimitiveType() { UdfParameter parameter = ImmutableUdfParameter.builder() .name("x") - .type(UdfPrimitiveType.of("int")) + .type(UdfTypes.PrimitiveType.of("int")) .doc("Input integer") .build(); @@ -188,7 +167,7 @@ void roundTripListType() { UdfParameter parameter = ImmutableUdfParameter.builder() .name("items") - .type(UdfListType.of(UdfPrimitiveType.of("string"))) + .type(UdfTypes.ListType.of(UdfTypes.PrimitiveType.of("string"))) .build(); String serialized = UdfParameterParser.toJson(parameter); @@ -200,7 +179,9 @@ void roundTripMapType() { UdfParameter parameter = ImmutableUdfParameter.builder() .name("lookup") - .type(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))) + .type( + UdfTypes.MapType.of( + UdfTypes.PrimitiveType.of("string"), UdfTypes.PrimitiveType.of("int"))) .build(); String serialized = UdfParameterParser.toJson(parameter); @@ -213,9 +194,9 @@ void roundTripStructType() { ImmutableUdfParameter.builder() .name("row") .type( - UdfStructType.of( - UdfFieldType.of("id", UdfPrimitiveType.of("int")), - UdfFieldType.of("label", UdfPrimitiveType.of("string")))) + UdfTypes.StructType.of( + UdfTypes.NestedField.of("id", UdfTypes.PrimitiveType.of("int")), + UdfTypes.NestedField.of("label", UdfTypes.PrimitiveType.of("string")))) .build(); String serialized = UdfParameterParser.toJson(parameter); diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java index e233a024a27a..560f674f309a 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java @@ -31,21 +31,21 @@ class TestUdfTypeUtil { void readPrimitiveType() { JsonNode node = JsonUtil.mapper().valueToTree("int"); UdfType type = UdfTypeUtil.readType(node); - assertThat(type).isEqualTo(UdfPrimitiveType.of("int")); + assertThat(type).isEqualTo(UdfTypes.PrimitiveType.of("int")); } @Test void readDecimalType() { JsonNode node = JsonUtil.mapper().valueToTree("decimal(9,2)"); UdfType type = UdfTypeUtil.readType(node); - assertThat(type).isEqualTo(UdfPrimitiveType.of("decimal(9,2)")); + assertThat(type).isEqualTo(UdfTypes.PrimitiveType.of("decimal(9,2)")); } @Test void readVariantType() { JsonNode node = JsonUtil.mapper().valueToTree("variant"); UdfType type = UdfTypeUtil.readType(node); - assertThat(type).isEqualTo(UdfPrimitiveType.of("variant")); + assertThat(type).isEqualTo(UdfTypes.PrimitiveType.of("variant")); } @Test @@ -55,7 +55,7 @@ void readListType() { {"type":"list","element":"string"}"""; JsonNode node = JsonUtil.parse(json, n -> n); UdfType type = UdfTypeUtil.readType(node); - assertThat(type).isEqualTo(UdfListType.of(UdfPrimitiveType.of("string"))); + assertThat(type).isEqualTo(UdfTypes.ListType.of(UdfTypes.PrimitiveType.of("string"))); } @Test @@ -66,7 +66,9 @@ void readMapType() { JsonNode node = JsonUtil.parse(json, n -> n); UdfType type = UdfTypeUtil.readType(node); assertThat(type) - .isEqualTo(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))); + .isEqualTo( + UdfTypes.MapType.of( + UdfTypes.PrimitiveType.of("string"), UdfTypes.PrimitiveType.of("int"))); } @Test @@ -82,9 +84,9 @@ void readStructType() { }"""; JsonNode node = JsonUtil.parse(structJson, n -> n); UdfType expected = - UdfStructType.of( - UdfFieldType.of("id", UdfPrimitiveType.of("int")), - UdfFieldType.of("name", UdfPrimitiveType.of("string"))); + UdfTypes.StructType.of( + UdfTypes.NestedField.of("id", UdfTypes.PrimitiveType.of("int")), + UdfTypes.NestedField.of("name", UdfTypes.PrimitiveType.of("string"))); assertThat(UdfTypeUtil.readType(node)).isEqualTo(expected); } @@ -103,11 +105,56 @@ void readNestedListOfMap() { }"""; JsonNode node = JsonUtil.parse(json, n -> n); UdfType expected = - UdfListType.of(UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int"))); + UdfTypes.ListType.of( + UdfTypes.MapType.of( + UdfTypes.PrimitiveType.of("string"), UdfTypes.PrimitiveType.of("int"))); assertThat(UdfTypeUtil.readType(node)).isEqualTo(expected); } + @Test + void primitiveTypeRejectsUnknownVocabulary() { + assertThatThrownBy(() -> UdfTypes.PrimitiveType.of("foo")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot parse type string to primitive: foo"); + + assertThatThrownBy(() -> UdfTypes.PrimitiveType.of("struct")) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + void readPrimitiveTypeIsCaseInsensitive() { + JsonNode upper = JsonUtil.mapper().valueToTree("INT"); + JsonNode mixed = JsonUtil.mapper().valueToTree("Decimal(9,2)"); + assertThat(UdfTypeUtil.readType(upper)).isEqualTo(UdfTypes.PrimitiveType.of("int")); + assertThat(UdfTypeUtil.readType(mixed)).isEqualTo(UdfTypes.PrimitiveType.of("decimal(9,2)")); + } + + @Test + void readNestedTypeNameIsCaseInsensitive() { + String listJson = + """ + {"type":"LIST","element":"string"}"""; + assertThat(UdfTypeUtil.readType(JsonUtil.parse(listJson, n -> n))) + .isEqualTo(UdfTypes.ListType.of(UdfTypes.PrimitiveType.of("string"))); + + String mapJson = + """ + {"type":"Map","key":"string","value":"int"}"""; + assertThat(UdfTypeUtil.readType(JsonUtil.parse(mapJson, n -> n))) + .isEqualTo( + UdfTypes.MapType.of( + UdfTypes.PrimitiveType.of("string"), UdfTypes.PrimitiveType.of("int"))); + + String structJson = + """ + {"type":"STRUCT","fields":[{"name":"id","type":"int"}]}"""; + assertThat(UdfTypeUtil.readType(JsonUtil.parse(structJson, n -> n))) + .isEqualTo( + UdfTypes.StructType.of( + UdfTypes.NestedField.of("id", UdfTypes.PrimitiveType.of("int")))); + } + @Test void readNullNode() { assertThatThrownBy(() -> UdfTypeUtil.readType(null)) @@ -134,13 +181,57 @@ void readUnknownNestedType() { .hasMessage("Cannot parse UDF type from object with unknown type set: {\"type\":\"set\"}"); } + @Test + void readListMissingElement() { + JsonNode node = JsonUtil.parse("{\"type\":\"list\"}", n -> n); + assertThatThrownBy(() -> UdfTypeUtil.readType(node)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing field: element"); + } + + @Test + void readMapMissingKeyOrValue() { + JsonNode missingKey = JsonUtil.parse("{\"type\":\"map\",\"value\":\"int\"}", n -> n); + assertThatThrownBy(() -> UdfTypeUtil.readType(missingKey)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing field: key"); + + JsonNode missingValue = JsonUtil.parse("{\"type\":\"map\",\"key\":\"string\"}", n -> n); + assertThatThrownBy(() -> UdfTypeUtil.readType(missingValue)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing field: value"); + } + + @Test + void readStructWithInvalidField() { + JsonNode missingName = + JsonUtil.parse("{\"type\":\"struct\",\"fields\":[{\"type\":\"int\"}]}", n -> n); + assertThatThrownBy(() -> UdfTypeUtil.readType(missingName)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing string: name"); + + JsonNode missingType = + JsonUtil.parse("{\"type\":\"struct\",\"fields\":[{\"name\":\"id\"}]}", n -> n); + assertThatThrownBy(() -> UdfTypeUtil.readType(missingType)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot parse missing field: type"); + } + + @Test + void readStructFieldNotObject() { + JsonNode node = JsonUtil.parse("{\"type\":\"struct\",\"fields\":[\"oops\"]}", n -> n); + assertThatThrownBy(() -> UdfTypeUtil.readType(node)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageStartingWith("Cannot parse struct field from non-object:"); + } + @Test void writePrimitiveType() { String json = JsonUtil.generate( gen -> { gen.writeStartObject(); - UdfTypeUtil.writeType("return-type", UdfPrimitiveType.of("int"), gen); + UdfTypeUtil.writeType("return-type", UdfTypes.PrimitiveType.of("int"), gen); gen.writeEndObject(); }, false); @@ -153,7 +244,7 @@ void writePrimitiveType() { @Test void writeListType() { - UdfType listType = UdfListType.of(UdfPrimitiveType.of("string")); + UdfType listType = UdfTypes.ListType.of(UdfTypes.PrimitiveType.of("string")); String json = JsonUtil.generate( gen -> { @@ -171,7 +262,8 @@ void writeListType() { @Test void writeMapType() { - UdfType mapType = UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int")); + UdfType mapType = + UdfTypes.MapType.of(UdfTypes.PrimitiveType.of("string"), UdfTypes.PrimitiveType.of("int")); String json = JsonUtil.generate( gen -> { @@ -190,9 +282,9 @@ void writeMapType() { @Test void writeStructType() { UdfType structType = - UdfStructType.of( - UdfFieldType.of("id", UdfPrimitiveType.of("int")), - UdfFieldType.of("name", UdfPrimitiveType.of("string"))); + UdfTypes.StructType.of( + UdfTypes.NestedField.of("id", UdfTypes.PrimitiveType.of("int")), + UdfTypes.NestedField.of("name", UdfTypes.PrimitiveType.of("string"))); String json = JsonUtil.generate( gen -> { @@ -228,11 +320,14 @@ void writeNullType() { @Test void roundTripStructWithListAndMap() { UdfType structType = - UdfStructType.of( - UdfFieldType.of("id", UdfPrimitiveType.of("int")), - UdfFieldType.of("tags", UdfListType.of(UdfPrimitiveType.of("string"))), - UdfFieldType.of( - "props", UdfMapType.of(UdfPrimitiveType.of("string"), UdfPrimitiveType.of("int")))); + UdfTypes.StructType.of( + UdfTypes.NestedField.of("id", UdfTypes.PrimitiveType.of("int")), + UdfTypes.NestedField.of( + "tags", UdfTypes.ListType.of(UdfTypes.PrimitiveType.of("string"))), + UdfTypes.NestedField.of( + "props", + UdfTypes.MapType.of( + UdfTypes.PrimitiveType.of("string"), UdfTypes.PrimitiveType.of("int")))); String json = JsonUtil.generate( From 690408d9ea53762bd9df89d3ae6bdc865916a92d Mon Sep 17 00:00:00 2001 From: Huaxin Gao Date: Sun, 10 May 2026 20:23:51 -0700 Subject: [PATCH 6/6] fix checkstyle --- core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java index 560f674f309a..51072f406901 100644 --- a/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java +++ b/core/src/test/java/org/apache/iceberg/udf/TestUdfTypeUtil.java @@ -119,7 +119,8 @@ void primitiveTypeRejectsUnknownVocabulary() { .hasMessageContaining("Cannot parse type string to primitive: foo"); assertThatThrownBy(() -> UdfTypes.PrimitiveType.of("struct")) - .isInstanceOf(IllegalArgumentException.class); + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot parse type string to primitive: struct"); } @Test