From 1c51206cc2dc708de425943e73c0aa2dfe1b92f2 Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Thu, 11 Jun 2026 13:57:18 -0700 Subject: [PATCH 1/3] HIVE-29668: Add -rebuildIndexes utility to reconstruct backend Metastore indexes --- .../schematool/AbstractIndexRebuilder.java | 81 ++++ .../tools/schematool/HiveSchemaHelper.java | 4 +- .../metastore/tools/schematool/IndexInfo.java | 42 ++ .../tools/schematool/IndexRebuilder.java | 43 ++ .../schematool/IndexRebuilderFactory.java | 48 +++ .../tools/schematool/MSSQLIndexRebuilder.java | 120 ++++++ .../tools/schematool/MetastoreSchemaTool.java | 2 + .../tools/schematool/MySQLIndexRebuilder.java | 128 ++++++ .../schematool/OracleIndexRebuilder.java | 107 +++++ .../schematool/PostgresIndexRebuilder.java | 142 +++++++ .../schematool/SchemaToolCommandLine.java | 9 +- .../SchemaToolTaskRebuildIndexes.java | 99 +++++ .../schematool/TestMSSQLIndexRebuilder.java | 338 +++++++++++++++ .../schematool/TestMySQLIndexRebuilder.java | 342 +++++++++++++++ .../schematool/TestOracleIndexRebuilder.java | 397 ++++++++++++++++++ .../TestPostgresIndexRebuilder.java | 367 ++++++++++++++++ .../TestSchemaToolTaskRebuildIndexes.java | 93 ++++ 17 files changed, 2357 insertions(+), 5 deletions(-) create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/AbstractIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MSSQLIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/OracleIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java create mode 100644 standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java create mode 100644 standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestSchemaToolTaskRebuildIndexes.java diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/AbstractIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/AbstractIndexRebuilder.java new file mode 100644 index 000000000000..36c40648b110 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/AbstractIndexRebuilder.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.stream.Collectors; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Shared duplicate-check and DDL execution logic for {@link IndexRebuilder}. */ +public abstract class AbstractIndexRebuilder implements IndexRebuilder { + + private static final Logger LOG = LoggerFactory.getLogger(AbstractIndexRebuilder.class); + + protected final Connection conn; + protected final boolean needsQuotedIdentifier; + protected final String quoteCharacter; + + protected AbstractIndexRebuilder(Connection conn, boolean needsQuotedIdentifier, + String quoteCharacter) { + this.conn = conn; + this.needsQuotedIdentifier = needsQuotedIdentifier; + this.quoteCharacter = quoteCharacter; + } + + @Override + public long findDuplicates(IndexInfo index) throws HiveMetaException { + if (!index.unique()) { + return 0; + } + String quotedCols = index.columns().stream() + .map(c -> "" + c + "") + .collect(Collectors.joining(", ")); + String sql = MetastoreSchemaTool.quote( + "SELECT COUNT(*) FROM (SELECT " + quotedCols + + " FROM " + index.tableName() + "" + + " GROUP BY " + quotedCols + + " HAVING COUNT(*) > 1) duplicates", + needsQuotedIdentifier, quoteCharacter); + try (PreparedStatement ps = conn.prepareStatement(sql); + ResultSet rs = ps.executeQuery()) { + return rs.next() ? rs.getLong(1) : 0; + } catch (SQLException e) { + throw new HiveMetaException( + "Failed to check for duplicate rows for index \"" + index.indexName() + "\"", e); + } + } + + /** Executes one or more DDL statements, logging each before execution. */ + protected void executeRebuild(IndexInfo index, String... ddls) throws HiveMetaException { + try (Statement stmt = conn.createStatement()) { + for (String ddl : ddls) { + LOG.info("Executing: {}", ddl); + stmt.execute(ddl); + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to rebuild index \"" + index.indexName() + "\"", e); + } + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/HiveSchemaHelper.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/HiveSchemaHelper.java index 47000cc41b20..041ef39e924e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/HiveSchemaHelper.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/HiveSchemaHelper.java @@ -42,7 +42,7 @@ public class HiveSchemaHelper { public static final String DB_HIVE = "hive"; public static final String DB_MSSQL = "mssql"; public static final String DB_MYSQL = "mysql"; - public static final String DB_POSTGRACE = "postgres"; + public static final String DB_POSTGRES = "postgres"; public static final String DB_ORACLE = "oracle"; public static final String EMBEDDED_HS2_URL = "jdbc:hive2://?hive.conf.restricted.list=;hive.security.authorization.sqlstd.confwhitelist=.*;" @@ -581,7 +581,7 @@ public static NestedScriptParser getDbCommandParser(String dbName, return new MSSQLCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine); } else if (dbName.equalsIgnoreCase(DB_MYSQL)) { return new MySqlCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine); - } else if (dbName.equalsIgnoreCase(DB_POSTGRACE)) { + } else if (dbName.equalsIgnoreCase(DB_POSTGRES)) { return new PostgresCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine); } else if (dbName.equalsIgnoreCase(DB_ORACLE)) { return new OracleCommandParser(dbOpts, msUsername, msPassword, conf, usingSqlLine); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java new file mode 100644 index 000000000000..b5c2c66087a4 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.util.List; +import org.jetbrains.annotations.NotNull; + +/** + * Record for a DB index, used by {@link IndexRebuilder}. + */ +public record IndexInfo( + String indexName, + String tableName, + boolean unique, + boolean constraintBacked, + List columns) { + + public IndexInfo { + columns = List.copyOf(columns); + } + + @Override + public @NotNull String toString() { + return (constraintBacked ? "CONSTRAINT" : "INDEX") + + " \"" + indexName + "\" ON \"" + tableName + "\""; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilder.java new file mode 100644 index 000000000000..bf32dc6ee8f9 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilder.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.util.List; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * Interface for rebuilding indexes in the HMS backend database. + * Implementations should extend {@link AbstractIndexRebuilder}. + */ +public interface IndexRebuilder { + + /** Returns all B-tree indexes in the current HMS schema. */ + List loadIndexes() throws HiveMetaException; + + /** + * Returns duplicate key-group count for the index. Rebuild must be blocked when this is + * greater than zero. Always returns zero for non-unique indexes. + */ + long findDuplicates(IndexInfo index) throws HiveMetaException; + + void rebuildIndex(IndexInfo index) throws HiveMetaException; + + /** Returns a description of the DDL that would be executed to rebuild the index. */ + String describeRebuildDDL(IndexInfo index); +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java new file mode 100644 index 000000000000..1711206ebb00 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * Factory for creating {@link IndexRebuilder} instances for the given database type. + * Add support for a new backend by adding a {@code case} branch. + */ +public final class IndexRebuilderFactory { + + private IndexRebuilderFactory() { + } + + public static IndexRebuilder create(String dbType, Connection conn, + MetastoreSchemaTool schemaTool) throws HiveMetaException { + return switch (dbType.toLowerCase()) { + case HiveSchemaHelper.DB_POSTGRES -> + new PostgresIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + case HiveSchemaHelper.DB_MYSQL -> + new MySQLIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + case HiveSchemaHelper.DB_ORACLE -> + new OracleIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + case HiveSchemaHelper.DB_MSSQL -> + new MSSQLIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + default -> throw new HiveMetaException( + "-rebuildIndexes is not supported for -dbType " + dbType + "."); + }; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MSSQLIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MSSQLIndexRebuilder.java new file mode 100644 index 000000000000..66c4cfb8eee7 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MSSQLIndexRebuilder.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * SQL Server implementation of {@link IndexRebuilder}. + * + *

Uses {@code sys.indexes} / {@code sys.index_columns} / {@code sys.columns} and rebuilds + * with {@code ALTER INDEX name ON table REBUILD}. + */ +class MSSQLIndexRebuilder extends AbstractIndexRebuilder { + + // i.type: 1 = clustered, 2 = nonclustered. + // i.name IS NOT NULL excludes heap pseudo-entries (type 0). + private static final String QUERY_INDEXES = """ + SELECT i.name AS index_name, + t.name AS table_name, + i.is_unique + FROM sys.indexes i + JOIN sys.tables t ON t.object_id = i.object_id + WHERE t.is_ms_shipped = 0 + AND i.type IN (1, 2) + AND i.name IS NOT NULL + ORDER BY t.name, i.name"""; + + // INCLUDE columns are not key columns; exclude them. + private static final String QUERY_INDEX_COLUMNS = """ + SELECT c.name AS column_name + FROM sys.indexes i + JOIN sys.tables t ON t.object_id = i.object_id + JOIN sys.index_columns ic ON ic.object_id = i.object_id + AND ic.index_id = i.index_id + JOIN sys.columns c ON c.object_id = ic.object_id + AND c.column_id = ic.column_id + WHERE t.name = ? AND i.name = ? + AND ic.is_included_column = 0 + ORDER BY ic.key_ordinal"""; + + MSSQLIndexRebuilder(Connection conn, boolean needsQuotedIdentifier, String quoteCharacter) { + super(conn, needsQuotedIdentifier, quoteCharacter); + } + + @Override + public List loadIndexes() throws HiveMetaException { + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(QUERY_INDEXES)) { + List indexes = new ArrayList<>(); + while (rs.next()) { + String indexName = rs.getString("index_name"); + String tableName = rs.getString("table_name"); + boolean isUnique = rs.getBoolean("is_unique"); + List columns = loadIndexColumns(tableName, indexName); + // constraintBacked is irrelevant for MSSQL: ALTER INDEX REBUILD works identically + // for all index types + indexes.add(new IndexInfo(indexName, tableName, isUnique, false, columns)); + } + return indexes; + } catch (SQLException e) { + throw new HiveMetaException("Failed to load indexes from SQL Server catalog", e); + } + } + + @Override + public void rebuildIndex(IndexInfo index) throws HiveMetaException { + executeRebuild(index, buildRebuildDdl(index)); + } + + @Override + public String describeRebuildDDL(IndexInfo index) { + return buildRebuildDdl(index) + ";"; + } + + private String buildRebuildDdl(IndexInfo index) { + // MSSQL requires the table name because index names are unique per-table, not per-schema. + return MetastoreSchemaTool.quote( + "ALTER INDEX " + index.indexName() + "" + + " ON " + index.tableName() + " REBUILD", + needsQuotedIdentifier, quoteCharacter); + } + + private List loadIndexColumns(String tableName, String indexName) throws SQLException { + List columns = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement(QUERY_INDEX_COLUMNS)) { + ps.setString(1, tableName); + ps.setString(2, indexName); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + columns.add(rs.getString("column_name")); + } + } + } + return columns; + } +} + diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MetastoreSchemaTool.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MetastoreSchemaTool.java index 2dc07cb319bf..acaadac7915d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MetastoreSchemaTool.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MetastoreSchemaTool.java @@ -477,6 +477,8 @@ public int run(String metastoreHome, String[] args, OptionGroup additionalOption task = new SchemaToolTaskDrop(); } else if (cmdLine.hasOption("createLogsTable")) { task = new SchemaToolTaskCreateLogsTable(); + } else if (cmdLine.hasOption("rebuildIndexes")) { + task = new SchemaToolTaskRebuildIndexes(); } else { throw new HiveMetaException("No task defined!"); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java new file mode 100644 index 000000000000..9d90b8530a3c --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * MySQL/MariaDB implementation of {@link IndexRebuilder}. + * + *

Loads metadata from {@code INFORMATION_SCHEMA.STATISTICS} and rebuilds with one + * atomic {@code ALTER TABLE DROP ..., ADD ...} statement. + */ +class MySQLIndexRebuilder extends AbstractIndexRebuilder { + + // Keep key column order by SEQ_IN_INDEX. + // PRIMARY is the only special drop form (DROP PRIMARY KEY); other UNIQUE definitions are + // dropped as named indexes. + private static final String QUERY_INDEXES = """ + SELECT TABLE_NAME, + INDEX_NAME, + (NON_UNIQUE = 0) AS is_unique, + (INDEX_NAME = 'PRIMARY') AS constraint_backed, + COLUMN_NAME + FROM INFORMATION_SCHEMA.STATISTICS + WHERE TABLE_SCHEMA = DATABASE() + AND INDEX_TYPE = 'BTREE' + ORDER BY TABLE_NAME, INDEX_NAME, SEQ_IN_INDEX"""; + + MySQLIndexRebuilder(Connection conn, boolean needsQuotedIdentifier, String quoteCharacter) { + super(conn, needsQuotedIdentifier, quoteCharacter); + } + + @Override + public void rebuildIndex(IndexInfo index) throws HiveMetaException { + executeRebuild(index, buildAtomicRebuildDdl(index)); + } + + @Override + public String describeRebuildDDL(IndexInfo index) { + return buildAtomicRebuildDdl(index) + ";"; + } + + private String buildAtomicRebuildDdl(IndexInfo index) { + String quotedCols = index.columns().stream() + .map(c -> "" + c + "") + .collect(Collectors.joining(", ")); + String template; + if (index.constraintBacked()) { + // DROP PRIMARY KEY takes no name. + template = "ALTER TABLE " + index.tableName() + "" + + " DROP PRIMARY KEY, ADD PRIMARY KEY (" + quotedCols + ")"; + } else { + template = "ALTER TABLE " + index.tableName() + "" + + " DROP INDEX " + index.indexName() + "," + + (index.unique() ? " ADD UNIQUE INDEX " : " ADD INDEX ") + + "" + index.indexName() + "" + + " (" + quotedCols + ") USING BTREE"; + } + return MetastoreSchemaTool.quote(template, needsQuotedIdentifier, quoteCharacter); + } + + @Override + public List loadIndexes() throws HiveMetaException { + // STATISTICS returns one row per index column; accumulate rows into one index object. + LinkedHashMap byKey = new LinkedHashMap<>(); + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(QUERY_INDEXES)) { + while (rs.next()) { + String tableName = rs.getString("TABLE_NAME"); + String indexName = rs.getString("INDEX_NAME"); + boolean isUnique = rs.getBoolean("is_unique"); + boolean isConstraintBacked = rs.getBoolean("constraint_backed"); + String column = rs.getString("COLUMN_NAME"); + // Use a delimiter to avoid collisions when table/index names are concatenated. + byKey.computeIfAbsent(tableName + "\0" + indexName, + k -> new IndexAccumulator(indexName, tableName, isUnique, isConstraintBacked)) + .columns.add(column); + } + } catch (SQLException e) { + throw new HiveMetaException("Failed to load indexes from MySQL catalog", e); + } + List indexes = new ArrayList<>(byKey.size()); + for (IndexAccumulator acc : byKey.values()) { + indexes.add(new IndexInfo(acc.indexName, acc.tableName, acc.unique, acc.constraintBacked, + acc.columns)); + } + return indexes; + } + + private static final class IndexAccumulator { + final String indexName; + final String tableName; + final boolean unique; + final boolean constraintBacked; + final List columns = new ArrayList<>(); + + IndexAccumulator(String indexName, String tableName, boolean unique, boolean constraintBacked) { + this.indexName = indexName; + this.tableName = tableName; + this.unique = unique; + this.constraintBacked = constraintBacked; + } + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/OracleIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/OracleIndexRebuilder.java new file mode 100644 index 000000000000..10542f8e4593 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/OracleIndexRebuilder.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * Oracle implementation of {@link IndexRebuilder}. + * + *

Uses {@code USER_INDEXES} / {@code USER_IND_COLUMNS}, automatically scoped to the + * current user. Rebuilds via {@code ALTER INDEX name REBUILD} — the index is never dropped, + * so PK constraints and FK references remain intact throughout. + */ +class OracleIndexRebuilder extends AbstractIndexRebuilder { + + // CASE WHEN required: Oracle does not support boolean expressions in SELECT. + // INDEX_TYPE = 'NORMAL': limits to standard B-tree indexes. + private static final String QUERY_INDEXES = """ + SELECT INDEX_NAME, + TABLE_NAME, + CASE WHEN UNIQUENESS = 'UNIQUE' THEN 1 ELSE 0 END AS is_unique + FROM USER_INDEXES + WHERE INDEX_TYPE = 'NORMAL' + ORDER BY TABLE_NAME, INDEX_NAME"""; + + private static final String QUERY_INDEX_COLUMNS = """ + SELECT COLUMN_NAME + FROM USER_IND_COLUMNS + WHERE INDEX_NAME = ? + ORDER BY COLUMN_POSITION"""; + + OracleIndexRebuilder(Connection conn, boolean needsQuotedIdentifier, String quoteCharacter) { + super(conn, needsQuotedIdentifier, quoteCharacter); + } + + @Override + public List loadIndexes() throws HiveMetaException { + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(QUERY_INDEXES)) { + List indexes = new ArrayList<>(); + while (rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + String tableName = rs.getString("TABLE_NAME"); + boolean isUnique = rs.getInt("is_unique") == 1; + List columns = loadIndexColumns(indexName); + // constraintBacked is not meaningful for Oracle as ALTER INDEX REBUILD works + // identically for all index types + indexes.add(new IndexInfo(indexName, tableName, isUnique, false, columns)); + } + return indexes; + } catch (SQLException e) { + throw new HiveMetaException("Failed to load indexes from Oracle catalog", e); + } + } + + @Override + public void rebuildIndex(IndexInfo index) throws HiveMetaException { + executeRebuild(index, buildRebuildDdl(index)); + } + + @Override + public String describeRebuildDDL(IndexInfo index) { + return buildRebuildDdl(index) + ";"; + } + + private String buildRebuildDdl(IndexInfo index) { + return MetastoreSchemaTool.quote( + "ALTER INDEX " + index.indexName() + " REBUILD", + needsQuotedIdentifier, quoteCharacter); + } + + private List loadIndexColumns(String indexName) throws SQLException { + List columns = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement(QUERY_INDEX_COLUMNS)) { + ps.setString(1, indexName); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + columns.add(rs.getString("COLUMN_NAME")); + } + } + } + return columns; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java new file mode 100644 index 000000000000..62657dec5c78 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.metastore.HiveMetaException; + +/** + * Postgres implementation of {@link IndexRebuilder}. + * + *

Uses catalog-sourced DDL via {@code pg_get_indexdef} / {@code pg_get_constraintdef}. + * Constraint-backed indexes use DROP/ADD CONSTRAINT; standalone indexes use DROP/CREATE INDEX. + */ +class PostgresIndexRebuilder extends AbstractIndexRebuilder { + + // ic = index class, tc = table class. Restrict to btree and use pg_constraint to + // distinguish constraint-backed indexes from standalone indexes. + // relkind: 'i' = index, 'r' = ordinary table (excludes views, partitions, etc.). + private static final String QUERY_INDEXES = """ + SELECT ic.relname AS indexname, tc.relname AS tablename, ix.indisunique, + (con.conname IS NOT NULL) AS constraint_backed, -- true when index is owned by a table constraint + pg_get_indexdef(ic.oid) AS index_def, -- catalog-generated CREATE INDEX statement + con.conname AS constraint_name, + pg_get_constraintdef(con.oid) AS constraint_def -- catalog-generated constraint definition fragment + FROM pg_index ix + JOIN pg_class ic ON ic.oid = ix.indexrelid AND ic.relkind = 'i' + JOIN pg_class tc ON tc.oid = ix.indrelid AND tc.relkind = 'r' + JOIN pg_am am ON am.oid = ic.relam AND am.amname = 'btree' -- restrict to btree indexes only + LEFT JOIN pg_constraint con ON con.conindid = ic.oid -- links index to PK/UNIQUE constraint when present + WHERE ic.relnamespace = current_schema()::regnamespace -- only objects in the active schema + """; + +private static final String QUERY_INDEX_COLUMNS = """ + SELECT a.attname + FROM pg_index ix + JOIN pg_class ic ON ic.oid = ix.indexrelid AND ic.relkind = 'i' + JOIN pg_attribute a ON a.attrelid = ix.indrelid -- read columns from the base table of the index + AND a.attnum = ANY(ix.indkey) -- keep attrs whose attnum appears in index key vector + AND a.attnum > 0 -- system columns have negative attnum; exclude them + WHERE ic.relname = ? AND ic.relnamespace = current_schema()::regnamespace -- scope name lookup to active schema + ORDER BY array_position(ix.indkey, a.attnum) -- ix.indkey stores attr numbers in key order + """; + + private record PgDdl(String dropDdl, String createDdl) {} + + private final Map ddlMap = new LinkedHashMap<>(); + + PostgresIndexRebuilder(Connection conn, boolean needsQuotedIdentifier, String quoteCharacter) { + super(conn, needsQuotedIdentifier, quoteCharacter); + } + + @Override + public List loadIndexes() throws HiveMetaException { + ddlMap.clear(); + try (Statement stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(QUERY_INDEXES)) { + List indexes = new ArrayList<>(); + while (rs.next()) { + String indexName = rs.getString("indexname"); + String tableName = rs.getString("tablename"); + boolean isUnique = rs.getBoolean("indisunique"); + boolean isConstraintBacked = rs.getBoolean("constraint_backed"); + String indexDef = rs.getString("index_def"); + String constraintName = rs.getString("constraint_name"); + String constraintDef = rs.getString("constraint_def"); + List columns = loadIndexColumns(indexName); + + String dropDdl; + String createDdl; + if (isConstraintBacked) { + dropDdl = MetastoreSchemaTool.quote( + "ALTER TABLE ONLY " + tableName + "" + + " DROP CONSTRAINT " + constraintName + "", + needsQuotedIdentifier, quoteCharacter); + createDdl = MetastoreSchemaTool.quote( + "ALTER TABLE ONLY " + tableName + "" + + " ADD CONSTRAINT " + constraintName + " " + constraintDef, + needsQuotedIdentifier, quoteCharacter); + } else { + dropDdl = MetastoreSchemaTool.quote( + "DROP INDEX IF EXISTS " + indexName + "", + needsQuotedIdentifier, quoteCharacter); + createDdl = indexDef; + } + ddlMap.put(indexName, new PgDdl(dropDdl, createDdl)); + indexes.add(new IndexInfo(indexName, tableName, isUnique, isConstraintBacked, columns)); + } + return indexes; + } catch (SQLException e) { + throw new HiveMetaException("Failed to load indexes from Postgres catalog", e); + } + } + + @Override + public void rebuildIndex(IndexInfo index) throws HiveMetaException { + PgDdl ddl = ddlMap.get(index.indexName()); + executeRebuild(index, ddl.dropDdl(), ddl.createDdl()); + } + + @Override + public String describeRebuildDDL(IndexInfo index) { + PgDdl ddl = ddlMap.get(index.indexName()); + return ddl.dropDdl() + ";" + System.lineSeparator() + ddl.createDdl() + ";"; + } + + private List loadIndexColumns(String indexName) throws SQLException { + List columns = new ArrayList<>(); + try (PreparedStatement ps = conn.prepareStatement(QUERY_INDEX_COLUMNS)) { + ps.setString(1, indexName); + try (ResultSet rs = ps.executeQuery()) { + while (rs.next()) { + columns.add(rs.getString("attname")); + } + } + } + return columns; + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java index 72c1e1ac4884..237d28fcaf50 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java @@ -82,6 +82,8 @@ private Options createOptions(OptionGroup additionalOptions) { .hasArg() .withDescription("Create table for Hive warehouse/compute logs") .create("createLogsTable"); + Option rebuildIndexesOpt = new Option("rebuildIndexes", + "Detect and rebuild corrupt indexes in the metastore backend DB (Postgres only)."); OptionGroup optGroup = new OptionGroup(); optGroup @@ -100,7 +102,8 @@ private Options createOptions(OptionGroup additionalOptions) { .addOption(moveDatabase) .addOption(moveTable) .addOption(createUserOpt) - .addOption(createLogsTable); + .addOption(createLogsTable) + .addOption(rebuildIndexesOpt); optGroup.setRequired(true); Option userNameOpt = OptionBuilder.withArgName("user") @@ -240,10 +243,10 @@ private CommandLine getCommandLine(String[] args) throws ParseException { private static final Set VALID_DB_TYPES = ImmutableSet.of(HiveSchemaHelper.DB_DERBY, HiveSchemaHelper.DB_HIVE, HiveSchemaHelper.DB_MSSQL, HiveSchemaHelper.DB_MYSQL, - HiveSchemaHelper.DB_POSTGRACE, HiveSchemaHelper.DB_ORACLE); + HiveSchemaHelper.DB_POSTGRES, HiveSchemaHelper.DB_ORACLE); private static final Set VALID_META_DB_TYPES = ImmutableSet.of(HiveSchemaHelper.DB_DERBY, - HiveSchemaHelper.DB_MSSQL, HiveSchemaHelper.DB_MYSQL, HiveSchemaHelper.DB_POSTGRACE, + HiveSchemaHelper.DB_MSSQL, HiveSchemaHelper.DB_MYSQL, HiveSchemaHelper.DB_POSTGRES, HiveSchemaHelper.DB_ORACLE); private void validate() throws ParseException { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java new file mode 100644 index 000000000000..7c4a47f888e1 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Rebuilds all B-tree indexes in the HMS backend database. + * + *

Stops early if duplicate keys are found for any unique index and reports the affected + * indexes. + * + *

With {@code --dryRun}, logs the rebuild DDL without executing it. + */ +class SchemaToolTaskRebuildIndexes extends SchemaToolTask { + + private static final Logger LOG = LoggerFactory.getLogger(SchemaToolTaskRebuildIndexes.class); + + @Override + void setCommandLineArguments(SchemaToolCommandLine cl) { + } + + @Override + void execute() throws HiveMetaException { + try (Connection conn = schemaTool.getConnectionToMetastore(false)) { + IndexRebuilder rebuilder = + IndexRebuilderFactory.create(schemaTool.getDbType(), conn, schemaTool); + executeWithRebuilder(rebuilder); + } catch (SQLException e) { + throw new HiveMetaException("Failed to close metastore connection", e); + } + } + + void executeWithRebuilder(IndexRebuilder rebuilder) throws HiveMetaException { + List indexes = rebuilder.loadIndexes(); + if (indexes.isEmpty()) { + LOG.info("No indexes found to rebuild."); + return; + } + + LOG.info("Found {} index(es) to rebuild.", indexes.size()); + + List blocked = new ArrayList<>(); + for (IndexInfo index : indexes) { + long dupes = rebuilder.findDuplicates(index); + if (dupes > 0) { + LOG.error("Cannot rebuild index \"{}\" on table \"{}\": {} duplicate row group(s) detected." + + " Clean up duplicates first.", index.indexName(), index.tableName(), dupes); + blocked.add(index); + } + } + + if (!blocked.isEmpty()) { + String detail = blocked.stream() + .map(i -> "\"" + i.indexName() + "\" on \"" + i.tableName() + "\"") + .collect(Collectors.joining(", ")); + throw new HiveMetaException("Index rebuild blocked by duplicate data in " + + blocked.size() + " index(es): " + detail + ". Remove duplicates and retry."); + } + + for (IndexInfo index : indexes) { + LOG.info("Rebuilding: {}", index); + LOG.info(rebuilder.describeRebuildDDL(index)); + if (!schemaTool.isDryRun()) { + rebuilder.rebuildIndex(index); + LOG.info("Done."); + } + } + + if (schemaTool.isDryRun()) { + LOG.info("Dry run complete. No changes were made."); + } else { + LOG.info("Index rebuild complete."); + } + } +} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java new file mode 100644 index 000000000000..95e88fd893a7 --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java @@ -0,0 +1,338 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; +import org.apache.hadoop.hive.metastore.dbinstall.rules.Mssql; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Integration tests for {@link MSSQLIndexRebuilder}. + * + *

Covers index discovery and rebuild behavior, including INCLUDE columns and FK-backed PKs. + * SQL Server DDL is committed in {@code @BeforeClass}; {@code @After} rollback affects only DML. + */ +@Category(MetastoreCheckinTest.class) +public class TestMSSQLIndexRebuilder { + + @ClassRule + public static final Mssql mssql = new Mssql(); + + private static final String TEST_DB = "idx_rebuild_test"; + + private static Connection conn; + + // Includes PK, UNIQUE, non-unique, multi-column, covering, and FK-backed PK cases. + private static final String[] DDL_CREATE_TABLES = { + "CREATE TABLE pk_table (id BIGINT NOT NULL, name NVARCHAR(256), " + + "CONSTRAINT pk_table_pk PRIMARY KEY (id))", + "CREATE TABLE unique_table (id BIGINT, name NVARCHAR(256))", + "CREATE UNIQUE INDEX idx_unique_name ON unique_table (name)", + "CREATE TABLE plain_table (id BIGINT, name NVARCHAR(256))", + "CREATE INDEX idx_plain_name ON plain_table (name)", + "CREATE TABLE multi_col_table (part_name NVARCHAR(256), tbl_id BIGINT, val NVARCHAR(256))", + "CREATE UNIQUE INDEX idx_multi_col ON multi_col_table (part_name, tbl_id)", + "CREATE TABLE covering_table (id BIGINT, key_col NVARCHAR(256), payload NVARCHAR(256))", + // INCLUDE column should not appear in idx.columns(). + "CREATE INDEX idx_covering ON covering_table (key_col) INCLUDE (payload)", + "CREATE TABLE fk_parent_table (id BIGINT NOT NULL, val NVARCHAR(256), " + + "CONSTRAINT fk_parent_pk PRIMARY KEY (id))", + "CREATE TABLE fk_child_table (id BIGINT, parent_id BIGINT, " + + "CONSTRAINT fk_child_fk FOREIGN KEY (parent_id) REFERENCES fk_parent_table (id))" + }; + + private MSSQLIndexRebuilder rebuilder; + + @BeforeClass + public static void setUpClass() throws Exception { + Class.forName(mssql.getJdbcDriver()); + + try (Connection masterConn = DriverManager.getConnection( + mssql.getInitialJdbcUrl(), mssql.getDbRootUser(), mssql.getDbRootPassword()); + Statement stmt = masterConn.createStatement()) { + stmt.execute("IF DB_ID('" + TEST_DB + "') IS NOT NULL DROP DATABASE " + TEST_DB); + stmt.execute("CREATE DATABASE " + TEST_DB); + } + + // Reconnect to the test database. + String testDbUrl = mssql.getInitialJdbcUrl() + .replace("DatabaseName=master", "DatabaseName=" + TEST_DB); + conn = DriverManager.getConnection(testDbUrl, mssql.getDbRootUser(), mssql.getDbRootPassword()); + conn.setAutoCommit(false); + + for (String ddl : DDL_CREATE_TABLES) { + try (Statement stmt = conn.createStatement()) { + stmt.execute(ddl); + } + } + conn.commit(); + } + + @AfterClass + public static void tearDownClass() throws Exception { + if (conn != null) { + conn.close(); + } + try (Connection masterConn = DriverManager.getConnection( + mssql.getInitialJdbcUrl(), mssql.getDbRootUser(), mssql.getDbRootPassword()); + Statement stmt = masterConn.createStatement()) { + stmt.execute("IF DB_ID('" + TEST_DB + "') IS NOT NULL DROP DATABASE " + TEST_DB); + } + } + + @Before + public void setUp() { + rebuilder = new MSSQLIndexRebuilder(conn, false, "\""); + } + + @After + public void tearDown() throws Exception { + conn.rollback(); + } + + // ------------------------------------------------------------------------- + // Query correctness — loadIndexes + // ------------------------------------------------------------------------- + + @Test + public void primaryKeyIndexIsUniqueAndNotConstraintBacked() throws Exception { + IndexInfo pk = findByTableAndIndex("pk_table", "pk_table_pk"); + assertNotNull("PK-backing index should be present", pk); + assertEquals("pk_table", pk.tableName()); + assertTrue("PK-backing index should be unique", pk.unique()); + assertFalse("MSSQL always returns constraintBacked=false", pk.constraintBacked()); + } + + @Test + public void uniqueIndexIsUniqueAndNotConstraintBacked() throws Exception { + IndexInfo idx = findByTableAndIndex("unique_table", "idx_unique_name"); + assertNotNull("UNIQUE index should be present", idx); + assertTrue(idx.unique()); + assertFalse(idx.constraintBacked()); + } + + @Test + public void nonUniqueIndexIsNotUnique() throws Exception { + IndexInfo idx = findByTableAndIndex("plain_table", "idx_plain_name"); + assertNotNull("Non-unique index should be present", idx); + assertFalse(idx.unique()); + assertFalse(idx.constraintBacked()); + } + + @Test + public void multiColumnIndexColumnsReturnedInDefinitionOrder() throws Exception { + IndexInfo idx = findByTableAndIndex("multi_col_table", "idx_multi_col"); + assertNotNull(idx); + assertEquals(List.of("part_name", "tbl_id"), idx.columns()); + } + + @Test + public void coveringIndexExcludesIncludeColumns() throws Exception { + // INCLUDE columns are not key columns. + IndexInfo idx = findByTableAndIndex("covering_table", "idx_covering"); + assertNotNull("Covering index should be present", idx); + assertEquals("Only key column should appear; INCLUDE column must be excluded", + List.of("key_col"), idx.columns()); + } + + @Test + public void describeDdlUsesAlterIndexRebuildWithTableName() throws Exception { + IndexInfo idx = findByTableAndIndex("plain_table", "idx_plain_name"); + String ddl = rebuilder.describeRebuildDDL(idx).toUpperCase(); + assertTrue("DDL should use ALTER INDEX", ddl.contains("ALTER INDEX")); + assertTrue("DDL should use REBUILD", ddl.contains("REBUILD")); + // SQL Server requires table name in ALTER INDEX ... REBUILD. + assertTrue("DDL should include the table name", ddl.contains("PLAIN_TABLE")); + assertFalse("DDL should not use DROP INDEX", ddl.contains("DROP INDEX")); + assertFalse("DDL should not use CREATE INDEX", ddl.contains("CREATE INDEX")); + } + + // ------------------------------------------------------------------------- + // Rebuild correctness — each index type + // ------------------------------------------------------------------------- + + @Test + public void rebuildPrimaryKeyIndexExistsAfterRebuild() throws Exception { + IndexInfo pk = findByTableAndIndex("pk_table", "pk_table_pk"); + assertNotNull(pk); + rebuilder.rebuildIndex(pk); + assertTrue("PK-backing index should exist after rebuild", + indexExists("pk_table_pk", "pk_table")); + } + + @Test + public void rebuildUniqueIndexExistsAndEnforcesUniquenessAfterRebuild() throws Exception { + IndexInfo idx = findByTableAndIndex("unique_table", "idx_unique_name"); + rebuilder.rebuildIndex(idx); + assertTrue("UNIQUE index should exist after rebuild", + indexExists("idx_unique_name", "unique_table")); + assertTrue("Uniqueness should be enforced after rebuild", + uniquenessEnforced("unique_table", "name")); + } + + @Test + public void rebuildNonUniqueIndexExistsAfterRebuild() throws Exception { + IndexInfo idx = findByTableAndIndex("plain_table", "idx_plain_name"); + rebuilder.rebuildIndex(idx); + assertTrue("Non-unique index should exist after rebuild", + indexExists("idx_plain_name", "plain_table")); + } + + @Test + public void rebuildMultiColumnUniqueIndexEnforcesUniquenessAfterRebuild() throws Exception { + IndexInfo idx = findByTableAndIndex("multi_col_table", "idx_multi_col"); + assertNotNull(idx); + assertTrue(idx.unique()); + assertEquals(List.of("part_name", "tbl_id"), idx.columns()); + + rebuilder.rebuildIndex(idx); + assertTrue("Multi-column index should exist after rebuild", + indexExists("idx_multi_col", "multi_col_table")); + assertTrue("Uniqueness should be enforced on multi-column index after rebuild", + uniquenessEnforcedMultiCol("multi_col_table", "part_name", "tbl_id")); + } + + @Test + public void rebuildFkReferencedPrimaryKeySucceedsWithAlterIndexRebuild() throws Exception { + // ALTER INDEX REBUILD is in-place and keeps FK references intact. + IndexInfo pk = findByTableAndIndex("fk_parent_table", "fk_parent_pk"); + assertNotNull("FK-backed PK index should be present", pk); + + rebuilder.rebuildIndex(pk); + assertTrue("PK-backing index should exist after in-place rebuild", + indexExists("fk_parent_pk", "fk_parent_table")); + } + + // ------------------------------------------------------------------------- + // findDuplicates + // ------------------------------------------------------------------------- + + @Test + public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { + String testDbUrl = mssql.getInitialJdbcUrl() + .replace("DatabaseName=master", "DatabaseName=" + TEST_DB); + Connection closedConn = DriverManager.getConnection( + testDbUrl, mssql.getDbRootUser(), mssql.getDbRootPassword()); + closedConn.close(); + MSSQLIndexRebuilder localRebuilder = new MSSQLIndexRebuilder(closedConn, false, "\""); + IndexInfo nonUnique = new IndexInfo("idx_plain_name", "plain_table", false, false, + List.of("name")); + assertEquals(0, localRebuilder.findDuplicates(nonUnique)); + } + + @Test + public void findDuplicatesTableWithDuplicateValuesReturnsPositiveCount() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO plain_table VALUES (1, 'alice')"); + stmt.execute("INSERT INTO plain_table VALUES (2, 'alice')"); + } + IndexInfo idx = new IndexInfo("fake_idx", "plain_table", true, false, List.of("name")); + assertTrue("Should detect duplicate name values", rebuilder.findDuplicates(idx) > 0); + } + + @Test + public void findDuplicatesTableWithNoDuplicatesReturnsZero() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO plain_table VALUES (1, 'alice')"); + stmt.execute("INSERT INTO plain_table VALUES (2, 'bob')"); + } + IndexInfo idx = new IndexInfo("fake_idx", "plain_table", true, false, List.of("name")); + assertEquals(0, rebuilder.findDuplicates(idx)); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private Map loadAllByName() throws HiveMetaException { + // SQL Server index names are unique per-table, not globally. + return rebuilder.loadIndexes() + .stream() + .collect(Collectors.toMap( + i -> i.tableName() + "." + i.indexName(), + Function.identity())); + } + + private IndexInfo findByTableAndIndex(String tableName, String indexName) + throws HiveMetaException { + return loadAllByName().get(tableName + "." + indexName); + } + + /** Returns true if the named index exists on the given table. */ + private boolean indexExists(String indexName, String tableName) throws SQLException { + try (PreparedStatement ps = conn.prepareStatement(""" + SELECT 1 FROM sys.indexes i + JOIN sys.tables t ON t.object_id = i.object_id + WHERE i.name = ? AND t.name = ?""")) { + ps.setString(1, indexName); + ps.setString(2, tableName); + try (ResultSet rs = ps.executeQuery()) { + return rs.next(); + } + } + } + + /** Returns true when duplicate inserts fail with SQLSTATE 23000. */ + private boolean uniquenessEnforced(String table, String column) throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO " + table + " (" + column + ") VALUES ('__dup_test__')"); + stmt.execute("INSERT INTO " + table + " (" + column + ") VALUES ('__dup_test__')"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23000".equals(e.getSQLState()); + } + } + + private boolean uniquenessEnforcedMultiCol(String table, String col1, String col2) + throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO " + table + " (" + col1 + ", " + col2 + ") VALUES ('p', 1)"); + stmt.execute("INSERT INTO " + table + " (" + col1 + ", " + col2 + ") VALUES ('p', 1)"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23000".equals(e.getSQLState()); + } + } +} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java new file mode 100644 index 000000000000..7e4cb0155920 --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java @@ -0,0 +1,342 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; +import org.apache.hadoop.hive.metastore.dbinstall.rules.Mysql; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Integration tests for {@link MySQLIndexRebuilder}. + * + *

Covers index discovery and rebuild behavior. + * MySQL DDL auto-commits, so {@code @After} rollback only affects DML. + */ +@Category(MetastoreCheckinTest.class) +public class TestMySQLIndexRebuilder { + + @ClassRule + public static final Mysql mysql = new Mysql(); + + private static final String TEST_DB = "test_idx_rebuild"; + + private static Connection conn; + + // plain_table is used for duplicate checks; fk_* tables verify FK-safe atomic PK rebuild. + private static final String DDL_CREATE_TABLES = """ + CREATE TABLE pk_table (id BIGINT, name VARCHAR(256), PRIMARY KEY (id)); + CREATE TABLE unique_table (id BIGINT, name VARCHAR(256)); + ALTER TABLE unique_table ADD UNIQUE KEY uq_unique_table (name); + CREATE TABLE plain_table (id BIGINT, name VARCHAR(256)); + CREATE INDEX idx_plain_btree ON plain_table (name); + CREATE TABLE multi_unique_table (part_name VARCHAR(256), tbl_id BIGINT, value VARCHAR(256)); + ALTER TABLE multi_unique_table ADD UNIQUE KEY uq_multi_unique (part_name, tbl_id); + CREATE TABLE create_unique_table (id BIGINT, name VARCHAR(256)); + CREATE UNIQUE INDEX idx_create_unique ON create_unique_table (name); + CREATE TABLE fk_parent_table (id BIGINT PRIMARY KEY, val VARCHAR(256)); + CREATE TABLE fk_child_table (id BIGINT, parent_id BIGINT, + FOREIGN KEY (parent_id) REFERENCES fk_parent_table(id))"""; + + private MySQLIndexRebuilder rebuilder; + + @BeforeClass + public static void setUpClass() throws Exception { + Class.forName(mysql.getJdbcDriver()); + conn = DriverManager.getConnection( + mysql.getInitialJdbcUrl(), mysql.getDbRootUser(), mysql.getDbRootPassword()); + try (Statement stmt = conn.createStatement()) { + stmt.execute("CREATE DATABASE IF NOT EXISTS " + TEST_DB); + stmt.execute("USE " + TEST_DB); + } + conn.setAutoCommit(false); + for (String ddl : DDL_CREATE_TABLES.split(";")) { + String sql = ddl.trim(); + if (!sql.isEmpty()) { + try (Statement stmt = conn.createStatement()) { + stmt.execute(sql); + } + } + } + } + + @AfterClass + public static void tearDownClass() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("DROP DATABASE IF EXISTS " + TEST_DB); + } + conn.close(); + } + + @Before + public void setUp() { + rebuilder = new MySQLIndexRebuilder(conn, true, "`"); + } + + @After + public void tearDown() throws Exception { + conn.rollback(); + } + + // ------------------------------------------------------------------------- + // Query correctness — loadIndexes + // ------------------------------------------------------------------------- + + @Test + public void primaryKeyIsConstraintBackedAndUnique() throws Exception { + IndexInfo pk = findByTableAndIndex("pk_table", "PRIMARY"); + assertNotNull("PK index should be present", pk); + assertEquals("pk_table", pk.tableName()); + assertTrue("PK should be constraint-backed", pk.constraintBacked()); + assertTrue("PK should be unique", pk.unique()); + } + + @Test + public void uniqueKeyIsNotConstraintBackedButIsUnique() throws Exception { + Map byName = loadAllByName(); + IndexInfo idx = byName.get("unique_table.uq_unique_table"); + assertNotNull("UNIQUE KEY index should be present", idx); + assertFalse(idx.constraintBacked()); + assertTrue(idx.unique()); + } + + @Test + public void standaloneCreateUniqueIndexIsUniqueAndNotConstraintBacked() throws Exception { + // CREATE UNIQUE INDEX and ADD UNIQUE KEY should map the same way in the catalog. + Map byName = loadAllByName(); + IndexInfo idx = byName.get("create_unique_table.idx_create_unique"); + assertNotNull("CREATE UNIQUE INDEX should be present", idx); + assertFalse(idx.constraintBacked()); + assertTrue(idx.unique()); + } + + @Test + public void nonUniqueIndexIsNotUniqueAndNotConstraintBacked() throws Exception { + Map byName = loadAllByName(); + IndexInfo idx = byName.get("plain_table.idx_plain_btree"); + assertNotNull("Non-unique btree index should be present", idx); + assertFalse(idx.constraintBacked()); + assertFalse(idx.unique()); + } + + @Test + public void primaryKeyDescribeDdlUsesAtomicAlterTable() throws Exception { + IndexInfo pk = findByTableAndIndex("pk_table", "PRIMARY"); + String ddl = rebuilder.describeRebuildDDL(pk).toUpperCase(); + assertTrue("PK rebuild DDL should use ALTER TABLE", ddl.contains("ALTER TABLE")); + assertTrue("PK rebuild DDL should drop and add primary key in one statement", + ddl.contains("DROP PRIMARY KEY") && ddl.contains("ADD PRIMARY KEY")); + } + + @Test + public void uniqueKeyDescribeDdlUsesAtomicAlterTable() throws Exception { + IndexInfo idx = loadAllByName().get("unique_table.uq_unique_table"); + String ddl = rebuilder.describeRebuildDDL(idx).toUpperCase(); + assertTrue("UNIQUE KEY rebuild DDL should use ALTER TABLE", ddl.contains("ALTER TABLE")); + assertTrue("UNIQUE KEY rebuild DDL should drop and add index in one statement", + ddl.contains("DROP INDEX") && ddl.contains("ADD UNIQUE INDEX")); + } + + @Test + public void multiColumnIndexColumnsReturnedInDefinitionOrder() throws Exception { + IndexInfo idx = loadAllByName().get("multi_unique_table.uq_multi_unique"); + assertNotNull(idx); + assertEquals(List.of("part_name", "tbl_id"), idx.columns()); + } + + // ------------------------------------------------------------------------- + // Rebuild correctness — each index type + // ------------------------------------------------------------------------- + + @Test + public void rebuildUniqueKeyExistsAndEnforcesUniquenessAfterRebuild() throws Exception { + IndexInfo idx = loadAllByName().get("unique_table.uq_unique_table"); + rebuilder.rebuildIndex(idx); + assertTrue("Index should exist after rebuild", indexExists("uq_unique_table")); + assertTrue(uniquenessEnforced("unique_table", "name")); + } + + @Test + public void rebuildPrimaryKeyExistsAfterRebuild() throws Exception { + IndexInfo pk = findByTableAndIndex("pk_table", "PRIMARY"); + assertNotNull(pk); + rebuilder.rebuildIndex(pk); + assertTrue(indexExistsOnTable("PRIMARY", "pk_table")); + } + + @Test + public void rebuildNonUniqueIndexExistsAfterRebuild() throws Exception { + IndexInfo idx = loadAllByName().get("plain_table.idx_plain_btree"); + rebuilder.rebuildIndex(idx); + assertTrue(indexExists("idx_plain_btree")); + } + + @Test + public void rebuildMultiColumnUniqueKeyExistsAndEnforcesUniquenessAfterRebuild() + throws Exception { + IndexInfo idx = loadAllByName().get("multi_unique_table.uq_multi_unique"); + assertNotNull(idx); + assertTrue(idx.unique()); + assertEquals(List.of("part_name", "tbl_id"), idx.columns()); + + rebuilder.rebuildIndex(idx); + assertTrue("Index should exist after rebuild", indexExists("uq_multi_unique")); + assertTrue(uniquenessEnforcedMultiCol("multi_unique_table", "part_name", "tbl_id")); + } + + @Test + public void rebuildFkReferencedPrimaryKeySucceedsWithAtomicAlterTable() throws Exception { + // Atomic DROP PRIMARY KEY + ADD PRIMARY KEY avoids ER_DROP_INDEX_FK. + IndexInfo pk = findByTableAndIndex("fk_parent_table", "PRIMARY"); + assertNotNull("FK-backed PK should be present", pk); + + rebuilder.rebuildIndex(pk); + assertTrue("PK should exist after atomic rebuild", + indexExistsOnTable("PRIMARY", "fk_parent_table")); + } + + // ------------------------------------------------------------------------- + // findDuplicates + // ------------------------------------------------------------------------- + + @Test + public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { + Connection closedConn = DriverManager.getConnection( + mysql.getInitialJdbcUrl(), mysql.getDbRootUser(), mysql.getDbRootPassword()); + closedConn.close(); + MySQLIndexRebuilder localRebuilder = new MySQLIndexRebuilder(closedConn, true, "`"); + IndexInfo nonUnique = new IndexInfo("idx", "plain_table", false, false, List.of("name")); + assertEquals(0, localRebuilder.findDuplicates(nonUnique)); + } + + @Test + public void findDuplicatesTableWithDuplicateValuesReturnsPositiveCount() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO plain_table VALUES (1, 'alice')"); + stmt.execute("INSERT INTO plain_table VALUES (2, 'alice')"); + } + IndexInfo idx = new IndexInfo( + "fake_idx", "plain_table", true, false, List.of("name")); + assertTrue("Should detect duplicate name values", rebuilder.findDuplicates(idx) > 0); + } + + @Test + public void findDuplicatesTableWithNoDuplicatesReturnsZero() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO plain_table VALUES (1, 'alice')"); + stmt.execute("INSERT INTO plain_table VALUES (2, 'bob')"); + } + IndexInfo idx = new IndexInfo( + "fake_idx", "plain_table", true, false, List.of("name")); + assertEquals(0, rebuilder.findDuplicates(idx)); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private Map loadAllByName() throws HiveMetaException { + return rebuilder.loadIndexes() + .stream() + // PRIMARY is reused across tables, so include table name in the map key. + .collect(Collectors.toMap( + i -> i.tableName() + "." + i.indexName(), + Function.identity())); + } + + /** Looks up an index by its name on a specific table. */ + private IndexInfo findByTableAndIndex(String tableName, String indexName) + throws HiveMetaException { + return rebuilder.loadIndexes().stream() + .filter(i -> i.tableName().equals(tableName) && i.indexName().equals(indexName)) + .findFirst() + .orElse(null); + } + + + private boolean indexExists(String indexName) throws SQLException { + try (PreparedStatement ps = conn.prepareStatement(""" + SELECT 1 FROM INFORMATION_SCHEMA.STATISTICS + WHERE TABLE_SCHEMA = DATABASE() AND INDEX_NAME = ? LIMIT 1""")) { + ps.setString(1, indexName); + try (ResultSet rs = ps.executeQuery()) { + return rs.next(); + } + } + } + + private boolean indexExistsOnTable(String indexName, String tableName) throws SQLException { + try (PreparedStatement ps = conn.prepareStatement(""" + SELECT 1 FROM INFORMATION_SCHEMA.STATISTICS + WHERE TABLE_SCHEMA = DATABASE() AND INDEX_NAME = ? AND TABLE_NAME = ? LIMIT 1""")) { + ps.setString(1, indexName); + ps.setString(2, tableName); + try (ResultSet rs = ps.executeQuery()) { + return rs.next(); + } + } + } + + /** Returns true when duplicate inserts fail with SQLSTATE 23000. */ + private boolean uniquenessEnforced(String table, String column) throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO " + table + "(" + column + ") VALUES ('__dup_test__')"); + stmt.execute("INSERT INTO " + table + "(" + column + ") VALUES ('__dup_test__')"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23000".equals(e.getSQLState()); + } + } + + private boolean uniquenessEnforcedMultiCol(String table, String col1, String col2) + throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO " + table + "(" + col1 + "," + col2 + ") VALUES ('p', 1)"); + stmt.execute("INSERT INTO " + table + "(" + col1 + "," + col2 + ") VALUES ('p', 1)"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23000".equals(e.getSQLState()); + } + } +} diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java new file mode 100644 index 000000000000..cc8d1a821f1e --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java @@ -0,0 +1,397 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; +import org.apache.hadoop.hive.metastore.dbinstall.rules.Oracle; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Integration tests for {@link OracleIndexRebuilder}. + * + *

Covers index discovery and rebuild behavior, including FK-backed PKs. + * Oracle DDL auto-commits; {@code @After} rollback affects only DML. + * Unquoted identifiers are stored uppercase, so assertions use uppercase names. + */ +@Category(MetastoreCheckinTest.class) +public class TestOracleIndexRebuilder { + + @ClassRule + public static final Oracle oracle = new Oracle(); + + // Dedicated test user keeps USER_INDEXES scoped to test objects. + private static final String TEST_USER = "IDX_TEST_USER"; + private static final String TEST_PASSWORD = "TestPass1"; + + private static Connection conn; + + // Names are uppercase in the catalog for unquoted identifiers. + private static final String[] DDL_CREATE_TABLES = { + "CREATE TABLE PK_TABLE (" + + " ID NUMBER NOT NULL," + + " NAME VARCHAR2(256)," + + " CONSTRAINT PK_TABLE_PK PRIMARY KEY (ID)" + + ")", + "CREATE TABLE UNIQUE_TABLE (" + + " ID NUMBER," + + " NAME VARCHAR2(256)" + + ")", + "CREATE UNIQUE INDEX IDX_UNIQUE_NAME ON UNIQUE_TABLE (NAME)", + "CREATE TABLE PLAIN_TABLE (" + + " ID NUMBER," + + " NAME VARCHAR2(256)" + + ")", + "CREATE INDEX IDX_PLAIN_NAME ON PLAIN_TABLE (NAME)", + "CREATE TABLE MULTI_COL_TABLE (" + + " PART_NAME VARCHAR2(256)," + + " TBL_ID NUMBER," + + " VALUE VARCHAR2(256)" + + ")", + "CREATE UNIQUE INDEX IDX_MULTI_COL ON MULTI_COL_TABLE (PART_NAME, TBL_ID)", + "CREATE TABLE FK_PARENT_TABLE (" + + " ID NUMBER NOT NULL," + + " VAL VARCHAR2(256)," + + " CONSTRAINT FK_PARENT_PK PRIMARY KEY (ID)" + + ")", + "CREATE TABLE FK_CHILD_TABLE (" + + " ID NUMBER," + + " PARENT_ID NUMBER," + + " CONSTRAINT FK_CHILD_FK FOREIGN KEY (PARENT_ID)" + + " REFERENCES FK_PARENT_TABLE (ID)" + + ")", + // Inline PRIMARY KEY creates an auto-named index/constraint (SYS_Cxxx). + "CREATE TABLE INLINE_PK_TABLE (" + + " ID NUMBER PRIMARY KEY," + + " NAME VARCHAR2(256)" + + ")", + // Inline UNIQUE also creates an auto-named index/constraint (SYS_Cxxx). + "CREATE TABLE INLINE_UNIQUE_TABLE (" + + " ID NUMBER," + + " NAME VARCHAR2(256)," + + " UNIQUE (NAME)" + + ")" + }; + + private OracleIndexRebuilder rebuilder; + + @BeforeClass + public static void setUpClass() throws Exception { + Class.forName(oracle.getJdbcDriver()); + + try (Connection sysConn = DriverManager.getConnection( + oracle.getInitialJdbcUrl(), oracle.getDbRootUser(), oracle.getDbRootPassword()); + Statement stmt = sysConn.createStatement()) { + try { + stmt.execute("DROP USER " + TEST_USER + " CASCADE"); + } catch (SQLException ignored) { + // Ignore when user does not exist. + } + stmt.execute("CREATE USER " + TEST_USER + " IDENTIFIED BY " + TEST_PASSWORD); + stmt.execute("GRANT CONNECT, RESOURCE TO " + TEST_USER); + stmt.execute("GRANT UNLIMITED TABLESPACE TO " + TEST_USER); + } + + // Reconnect as test user. + String userUrl = oracle.getInitialJdbcUrl(); + conn = DriverManager.getConnection(userUrl, TEST_USER, TEST_PASSWORD); + conn.setAutoCommit(false); + + for (String ddl : DDL_CREATE_TABLES) { + try (Statement stmt = conn.createStatement()) { + stmt.execute(ddl); + } + } + } + + @AfterClass + public static void tearDownClass() throws Exception { + if (conn != null) { + conn.close(); + } + try (Connection sysConn = DriverManager.getConnection( + oracle.getInitialJdbcUrl(), oracle.getDbRootUser(), oracle.getDbRootPassword()); + Statement stmt = sysConn.createStatement()) { + stmt.execute("DROP USER " + TEST_USER + " CASCADE"); + } + } + + @Before + public void setUp() { + rebuilder = new OracleIndexRebuilder(conn, true, "\""); + } + + @After + public void tearDown() throws Exception { + conn.rollback(); + } + + // ------------------------------------------------------------------------- + // Query correctness — loadIndexes + // ------------------------------------------------------------------------- + + @Test + public void primaryKeyIndexIsUniqueAndNotConstraintBacked() throws Exception { + IndexInfo pk = findByName("PK_TABLE_PK"); + assertNotNull("PK-backing index should be present", pk); + assertEquals("PK_TABLE", pk.tableName()); + assertTrue("PK-backing index should be unique", pk.unique()); + assertFalse("Oracle always returns constraintBacked=false", pk.constraintBacked()); + } + + @Test + public void uniqueIndexIsUniqueAndNotConstraintBacked() throws Exception { + IndexInfo idx = findByName("IDX_UNIQUE_NAME"); + assertNotNull("UNIQUE index should be present", idx); + assertEquals("UNIQUE_TABLE", idx.tableName()); + assertTrue(idx.unique()); + assertFalse(idx.constraintBacked()); + } + + @Test + public void nonUniqueIndexIsNotUnique() throws Exception { + IndexInfo idx = findByName("IDX_PLAIN_NAME"); + assertNotNull("Non-unique index should be present", idx); + assertEquals("PLAIN_TABLE", idx.tableName()); + assertFalse(idx.unique()); + assertFalse(idx.constraintBacked()); + } + + @Test + public void multiColumnIndexColumnsReturnedInDefinitionOrder() throws Exception { + IndexInfo idx = findByName("IDX_MULTI_COL"); + assertNotNull(idx); + assertEquals(List.of("PART_NAME", "TBL_ID"), idx.columns()); + } + + @Test + public void describeDdlUsesAlterIndexRebuild() throws Exception { + IndexInfo idx = findByName("IDX_PLAIN_NAME"); + String ddl = rebuilder.describeRebuildDDL(idx).toUpperCase(); + assertTrue("DDL should use ALTER INDEX", ddl.contains("ALTER INDEX")); + assertTrue("DDL should use REBUILD", ddl.contains("REBUILD")); + assertFalse("DDL should not use DROP INDEX", ddl.contains("DROP INDEX")); + assertFalse("DDL should not use CREATE INDEX", ddl.contains("CREATE INDEX")); + } + + // ------------------------------------------------------------------------- + // Rebuild correctness — each index type + // ------------------------------------------------------------------------- + + @Test + public void rebuildPrimaryKeyIndexExistsAfterRebuild() throws Exception { + IndexInfo pk = findByName("PK_TABLE_PK"); + assertNotNull(pk); + rebuilder.rebuildIndex(pk); + assertTrue("PK-backing index should exist after rebuild", indexExists("PK_TABLE_PK")); + } + + @Test + public void rebuildUniqueIndexExistsAndEnforcesUniquenessAfterRebuild() throws Exception { + IndexInfo idx = findByName("IDX_UNIQUE_NAME"); + rebuilder.rebuildIndex(idx); + assertTrue("UNIQUE index should exist after rebuild", indexExists("IDX_UNIQUE_NAME")); + assertTrue("Uniqueness should be enforced after rebuild", + uniquenessEnforced("UNIQUE_TABLE", "NAME")); + } + + @Test + public void rebuildNonUniqueIndexExistsAfterRebuild() throws Exception { + IndexInfo idx = findByName("IDX_PLAIN_NAME"); + rebuilder.rebuildIndex(idx); + assertTrue("Non-unique index should exist after rebuild", indexExists("IDX_PLAIN_NAME")); + } + + @Test + public void rebuildMultiColumnUniqueIndexEnforcesUniquenessAfterRebuild() throws Exception { + IndexInfo idx = findByName("IDX_MULTI_COL"); + assertNotNull(idx); + assertTrue(idx.unique()); + assertEquals(List.of("PART_NAME", "TBL_ID"), idx.columns()); + + rebuilder.rebuildIndex(idx); + assertTrue("Multi-column index should exist after rebuild", indexExists("IDX_MULTI_COL")); + assertTrue("Uniqueness should be enforced on multi-column index after rebuild", + uniquenessEnforcedMultiCol("MULTI_COL_TABLE", "PART_NAME", "TBL_ID")); + } + + @Test + public void inlinePrimaryKeyCreatesSystemNamedIndexThatIsUnique() throws Exception { + // Inline PRIMARY KEY should appear with an auto-generated index name. + IndexInfo pk = findByTable("INLINE_PK_TABLE"); + assertNotNull("System-named PK index should be present in USER_INDEXES", pk); + assertEquals("INLINE_PK_TABLE", pk.tableName()); + assertTrue("Auto-named PK backing index should be unique", pk.unique()); + } + + @Test + public void inlineUniqueConstraintCreatesSystemNamedIndexThatIsUnique() throws Exception { + // Inline UNIQUE should appear with an auto-generated index name. + IndexInfo idx = findByTable("INLINE_UNIQUE_TABLE"); + assertNotNull("System-named UNIQUE constraint index should be present in USER_INDEXES", idx); + assertEquals("INLINE_UNIQUE_TABLE", idx.tableName()); + assertTrue("Auto-named UNIQUE constraint backing index should be unique", idx.unique()); + } + + @Test + public void rebuildSystemNamedPrimaryKeyIndexSucceeds() throws Exception { + IndexInfo pk = findByTable("INLINE_PK_TABLE"); + assertNotNull(pk); + rebuilder.rebuildIndex(pk); + assertTrue("System-named PK index should exist after rebuild", indexExists(pk.indexName())); + } + + @Test + public void rebuildSystemNamedUniqueIndexSucceeds() throws Exception { + IndexInfo idx = findByTable("INLINE_UNIQUE_TABLE"); + assertNotNull(idx); + rebuilder.rebuildIndex(idx); + assertTrue("System-named UNIQUE index should exist after rebuild", + indexExists(idx.indexName())); + } + + @Test + public void rebuildFkReferencedPrimaryKeySucceedsWithAlterIndexRebuild() throws Exception { + // ALTER INDEX REBUILD is in-place and keeps FK references intact. + IndexInfo pk = findByName("FK_PARENT_PK"); + assertNotNull("FK-backed PK index should be present", pk); + + rebuilder.rebuildIndex(pk); + assertTrue("PK-backing index should exist after in-place rebuild", + indexExists("FK_PARENT_PK")); + } + + // ------------------------------------------------------------------------- + // findDuplicates + // ------------------------------------------------------------------------- + + @Test + public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { + Connection closedConn = DriverManager.getConnection( + oracle.getInitialJdbcUrl(), TEST_USER, TEST_PASSWORD); + closedConn.close(); + OracleIndexRebuilder localRebuilder = new OracleIndexRebuilder(closedConn, true, "\""); + IndexInfo nonUnique = new IndexInfo("IDX_PLAIN_NAME", "PLAIN_TABLE", false, false, + List.of("NAME")); + assertEquals(0, localRebuilder.findDuplicates(nonUnique)); + } + + @Test + public void findDuplicatesTableWithDuplicateValuesReturnsPositiveCount() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO PLAIN_TABLE VALUES (1, 'alice')"); + stmt.execute("INSERT INTO PLAIN_TABLE VALUES (2, 'alice')"); + } + IndexInfo idx = new IndexInfo("FAKE_IDX", "PLAIN_TABLE", true, false, List.of("NAME")); + assertTrue("Should detect duplicate NAME values", rebuilder.findDuplicates(idx) > 0); + } + + @Test + public void findDuplicatesTableWithNoDuplicatesReturnsZero() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO PLAIN_TABLE VALUES (1, 'alice')"); + stmt.execute("INSERT INTO PLAIN_TABLE VALUES (2, 'bob')"); + } + IndexInfo idx = new IndexInfo("FAKE_IDX", "PLAIN_TABLE", true, false, List.of("NAME")); + assertEquals(0, rebuilder.findDuplicates(idx)); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private Map loadAllByName() throws HiveMetaException { + return rebuilder.loadIndexes() + .stream() + .collect(Collectors.toMap(IndexInfo::indexName, Function.identity())); + } + + private IndexInfo findByName(String indexName) throws HiveMetaException { + return loadAllByName().get(indexName); + } + + /** Finds the first index on the given table (for system-named indexes). */ + private IndexInfo findByTable(String tableName) throws HiveMetaException { + return rebuilder.loadIndexes().stream() + .filter(i -> i.tableName().equals(tableName)) + .findFirst() + .orElse(null); + } + + /** Returns true if the named index exists in USER_INDEXES. */ + private boolean indexExists(String indexName) throws SQLException { + try (PreparedStatement ps = conn.prepareStatement( + "SELECT 1 FROM USER_INDEXES WHERE INDEX_NAME = ?")) { + ps.setString(1, indexName); + try (ResultSet rs = ps.executeQuery()) { + return rs.next(); + } + } + } + + /** Returns true when duplicate inserts fail with SQLSTATE 23000. */ + private boolean uniquenessEnforced(String table, String column) throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO " + table + " (" + column + ") VALUES ('__dup_test__')"); + stmt.execute("INSERT INTO " + table + " (" + column + ") VALUES ('__dup_test__')"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23000".equals(e.getSQLState()); + } + } + + private boolean uniquenessEnforcedMultiCol(String table, String col1, String col2) + throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute( + "INSERT INTO " + table + " (" + col1 + ", " + col2 + ") VALUES ('p', 1)"); + stmt.execute( + "INSERT INTO " + table + " (" + col1 + ", " + col2 + ") VALUES ('p', 1)"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23000".equals(e.getSQLState()); + } + } +} + diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java new file mode 100644 index 000000000000..daca1962ed20 --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java @@ -0,0 +1,367 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; +import org.apache.hadoop.hive.metastore.dbinstall.rules.Postgres; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Integration tests for {@link PostgresIndexRebuilder}. + * + *

Covers index discovery, generated rebuild DDL, and rebuild behavior. + */ +@Category(MetastoreCheckinTest.class) +public class TestPostgresIndexRebuilder { + + @ClassRule + public static final Postgres postgres = new Postgres(); + + private static Connection conn; + + // Created once for the class; test DML is rolled back after each test. + private static final String DDL_CREATE_TABLES = """ + CREATE TABLE pk_table (id BIGINT PRIMARY KEY, name VARCHAR(256)); + CREATE TABLE unique_table (id BIGINT, name VARCHAR(256)); + ALTER TABLE unique_table ADD CONSTRAINT uq_unique_table UNIQUE (name); + CREATE TABLE standalone_table (id BIGINT, name VARCHAR(256)); + CREATE UNIQUE INDEX uq_standalone_table ON standalone_table (name); + CREATE TABLE plain_table (id BIGINT, name VARCHAR(256)); + CREATE INDEX idx_plain_btree ON plain_table (name); + CREATE INDEX idx_plain_hash ON plain_table USING hash (name); + CREATE INDEX idx_plain_multicol ON plain_table (name, id); + CREATE TABLE multi_unique_table (part_name VARCHAR(256), tbl_id BIGINT, value VARCHAR(256)); + ALTER TABLE multi_unique_table ADD CONSTRAINT uq_multi_unique UNIQUE (part_name, tbl_id); + CREATE TABLE "MixedCaseTable" ("Id" BIGINT, "Name" VARCHAR(256)); + CREATE UNIQUE INDEX "uq_MixedCase" ON "MixedCaseTable" ("Name")"""; + + private PostgresIndexRebuilder rebuilder; + + @BeforeClass + public static void setUpClass() throws Exception { + Class.forName(postgres.getJdbcDriver()); + conn = DriverManager.getConnection( + postgres.getInitialJdbcUrl(), postgres.getDbRootUser(), postgres.getDbRootPassword()); + conn.setAutoCommit(false); + for (String ddl : DDL_CREATE_TABLES.split(";")) { + String sql = ddl.trim(); + if (!sql.isEmpty()) { + try (Statement stmt = conn.createStatement()) { + stmt.execute(sql); + } + } + } + conn.commit(); + } + + @AfterClass + public static void tearDownClass() throws Exception { + try (Statement stmt = conn.createStatement()) { + for (String tbl : new String[]{"pk_table", "unique_table", "standalone_table", + "plain_table", "multi_unique_table", "\"MixedCaseTable\""}) { + stmt.execute("DROP TABLE IF EXISTS " + tbl + " CASCADE"); + } + } + conn.commit(); + conn.close(); + } + + @Before + public void setUp() { + rebuilder = new PostgresIndexRebuilder(conn, true, "\""); + } + + @After + public void tearDown() throws Exception { + conn.rollback(); + } + + // ------------------------------------------------------------------------- + // Query correctness — loadIndexes + // ------------------------------------------------------------------------- + + @Test + public void primaryKeyIsConstraintBackedAndUnique() throws Exception { + Map byName = loadAllByName(); + IndexInfo pk = findByTable(byName, "pk_table"); + assertNotNull("PK index should be present", pk); + assertTrue("PK should be constraint-backed", pk.constraintBacked()); + assertTrue("PK should be unique", pk.unique()); + } + + @Test + public void uniqueConstraintIsConstraintBackedAndUnique() throws Exception { + Map byName = loadAllByName(); + IndexInfo idx = byName.get("uq_unique_table"); + assertNotNull("UNIQUE constraint index should be present", idx); + assertTrue(idx.constraintBacked()); + assertTrue(idx.unique()); + } + + @Test + public void standaloneUniqueIndexIsNotConstraintBacked() throws Exception { + Map byName = loadAllByName(); + IndexInfo idx = byName.get("uq_standalone_table"); + assertNotNull("Standalone UNIQUE index should be present", idx); + assertFalse(idx.constraintBacked()); + assertTrue(idx.unique()); + } + + @Test + public void nonUniqueIndexIsNotUniqueAndNotConstraintBacked() throws Exception { + Map byName = loadAllByName(); + IndexInfo idx = byName.get("idx_plain_btree"); + assertNotNull("Non-unique btree index should be present", idx); + assertFalse(idx.constraintBacked()); + assertFalse(idx.unique()); + } + + @Test + public void hashIndexIsExcluded() throws Exception { + Map byName = loadAllByName(); + assertFalse("Hash index must not appear in results", byName.containsKey("idx_plain_hash")); + } + + @Test + public void multiColumnIndexColumnsReturnedInDefinitionOrder() throws Exception { + Map byName = loadAllByName(); + IndexInfo idx = byName.get("idx_plain_multicol"); + assertNotNull(idx); + assertEquals(List.of("name", "id"), idx.columns()); + } + + @Test + public void standaloneIndexRebuildDdlUsesDropIndex() throws Exception { + IndexInfo idx = loadAllByName().get("uq_standalone_table"); + assertTrue("Standalone index rebuild DDL should use DROP INDEX", + rebuilder.describeRebuildDDL(idx).toUpperCase().contains("DROP INDEX")); + } + + @Test + public void constraintIndexRebuildDdlUsesDropConstraint() throws Exception { + IndexInfo idx = loadAllByName().get("uq_unique_table"); + assertTrue("Constraint-backed index rebuild DDL should use DROP CONSTRAINT", + rebuilder.describeRebuildDDL(idx).toUpperCase().contains("DROP CONSTRAINT")); + } + + // ------------------------------------------------------------------------- + // Rebuild correctness — each index type + // ------------------------------------------------------------------------- + + @Test + public void rebuildUniqueConstraintIndexDropsAndRecreatesWithUniquenessEnforced() + throws Exception { + IndexInfo idx = loadAllByName().get("uq_unique_table"); + rebuilder.rebuildIndex(idx); + conn.commit(); + assertTrue("Index should exist after rebuild", indexExists("uq_unique_table")); + assertTrue(indexIsValid("uq_unique_table")); + assertTrue(uniquenessEnforced("unique_table", "name")); + } + + @Test + public void rebuildStandaloneUniqueIndexDropsAndRecreatesWithUniquenessEnforced() + throws Exception { + IndexInfo idx = loadAllByName().get("uq_standalone_table"); + rebuilder.rebuildIndex(idx); + conn.commit(); + assertTrue(indexExists("uq_standalone_table")); + assertTrue(indexIsValid("uq_standalone_table")); + assertTrue(uniquenessEnforced("standalone_table", "name")); + } + + @Test + public void rebuildPrimaryKeyDropsAndRecreates() throws Exception { + IndexInfo pk = findByTable(loadAllByName(), "pk_table"); + assertNotNull(pk); + rebuilder.rebuildIndex(pk); + conn.commit(); + assertTrue(indexExists(pk.indexName())); + assertTrue(indexIsValid(pk.indexName())); + } + + @Test + public void rebuildNonUniqueIndexDropsAndRecreates() throws Exception { + IndexInfo idx = loadAllByName().get("idx_plain_btree"); + rebuilder.rebuildIndex(idx); + conn.commit(); + assertTrue(indexExists("idx_plain_btree")); + assertTrue(indexIsValid("idx_plain_btree")); + } + + @Test + public void rebuildMultiColumnUniqueConstraintDropsAndRecreatesWithUniquenessEnforced() + throws Exception { + IndexInfo idx = loadAllByName().get("uq_multi_unique"); + assertNotNull(idx); + assertTrue(idx.constraintBacked()); + assertTrue(idx.unique()); + assertEquals(List.of("part_name", "tbl_id"), idx.columns()); + + rebuilder.rebuildIndex(idx); + conn.commit(); + assertTrue(indexExists("uq_multi_unique")); + assertTrue(indexIsValid("uq_multi_unique")); + assertTrue(uniquenessEnforcedMultiCol("multi_unique_table", "part_name", "tbl_id")); + } + + // ------------------------------------------------------------------------- + // findDuplicates + // ------------------------------------------------------------------------- + + @Test + public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { + Connection closedConn = DriverManager.getConnection( + postgres.getInitialJdbcUrl(), postgres.getDbRootUser(), postgres.getDbRootPassword()); + closedConn.close(); + PostgresIndexRebuilder localRebuilder = new PostgresIndexRebuilder(closedConn, true, "\""); + IndexInfo nonUnique = new IndexInfo("idx", "plain_table", false, false, List.of("name")); + assertEquals(0, localRebuilder.findDuplicates(nonUnique)); + } + + @Test + public void findDuplicatesTableWithDuplicateValuesReturnsPositiveCount() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO plain_table VALUES (1, 'alice')"); + stmt.execute("INSERT INTO plain_table VALUES (2, 'alice')"); + } + IndexInfo idx = new IndexInfo("fake_idx", "plain_table", true, false, List.of("name")); + assertTrue("Should detect duplicate name values", rebuilder.findDuplicates(idx) > 0); + } + + @Test + public void findDuplicatesTableWithNoDuplicatesReturnsZero() throws Exception { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO plain_table VALUES (1, 'alice')"); + stmt.execute("INSERT INTO plain_table VALUES (2, 'bob')"); + } + IndexInfo idx = new IndexInfo("fake_idx", "plain_table", true, false, List.of("name")); + assertEquals(0, rebuilder.findDuplicates(idx)); + } + + @Test + public void rebuildMixedCaseIndexDropsAndRecreatesWithUniquenessEnforced() throws Exception { + IndexInfo idx = loadAllByName().get("uq_MixedCase"); + assertNotNull(idx); + assertFalse(idx.constraintBacked()); + assertTrue(idx.unique()); + + rebuilder.rebuildIndex(idx); + conn.commit(); + assertTrue(indexExists("uq_MixedCase")); + assertTrue(indexIsValid("uq_MixedCase")); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private Map loadAllByName() throws HiveMetaException { + return rebuilder.loadIndexes() + .stream() + .collect(Collectors.toMap(IndexInfo::indexName, Function.identity())); + } + + /** Finds a unique constraint-backed index for the given table. */ + private static IndexInfo findByTable(Map byName, String tableName) { + return byName.values().stream() + .filter(i -> i.tableName().equals(tableName) && i.constraintBacked() && i.unique()) + .findFirst() + .orElse(null); + } + + private void execute(String sql) throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute(sql); + } + } + + private boolean indexExists(String indexName) throws SQLException { + try (PreparedStatement ps = conn.prepareStatement( + "SELECT 1 FROM pg_indexes WHERE indexname = ?")) { + ps.setString(1, indexName); + try (ResultSet rs = ps.executeQuery()) { + return rs.next(); + } + } + } + + private boolean indexIsValid(String indexName) throws SQLException { + try (PreparedStatement ps = conn.prepareStatement(""" + SELECT ix.indisvalid + FROM pg_index ix + JOIN pg_class ic ON ic.oid = ix.indexrelid + WHERE ic.relname = ?""")) { + ps.setString(1, indexName); + try (ResultSet rs = ps.executeQuery()) { + return rs.next() && rs.getBoolean(1); + } + } + } + + /** Returns true when duplicate inserts fail with unique_violation. */ + private boolean uniquenessEnforced(String table, String column) throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO " + table + "(" + column + ") VALUES ('__dup_test__')"); + stmt.execute("INSERT INTO " + table + "(" + column + ") VALUES ('__dup_test__')"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23505".equals(e.getSQLState()); + } + } + + /** Returns true when duplicate multi-column inserts fail with unique_violation. */ + private boolean uniquenessEnforcedMultiCol(String table, String col1, String col2) + throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.execute("INSERT INTO " + table + "(" + col1 + "," + col2 + ") VALUES ('p', 1)"); + stmt.execute("INSERT INTO " + table + "(" + col1 + "," + col2 + ") VALUES ('p', 1)"); + conn.rollback(); + return false; + } catch (SQLException e) { + conn.rollback(); + return "23505".equals(e.getSQLState()); + } + } +} + diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestSchemaToolTaskRebuildIndexes.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestSchemaToolTaskRebuildIndexes.java new file mode 100644 index 000000000000..41395223ffb6 --- /dev/null +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestSchemaToolTaskRebuildIndexes.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.tools.schematool; + +import java.util.List; + +import org.apache.hadoop.hive.metastore.HiveMetaException; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link SchemaToolTaskRebuildIndexes} orchestration logic. + */ +@Category(MetastoreUnitTest.class) +public class TestSchemaToolTaskRebuildIndexes { + + private static final IndexInfo UNIQUE_INDEX = new IndexInfo( + "idx", "tbl", true, false, List.of("col")); + + private static final IndexInfo NON_UNIQUE_INDEX = new IndexInfo( + "idx2", "tbl", false, false, List.of("col")); + + private MetastoreSchemaTool schemaTool; + private SchemaToolTaskRebuildIndexes task; + + @Before + public void setUp() { + schemaTool = mock(MetastoreSchemaTool.class); + task = new SchemaToolTaskRebuildIndexes(); + task.schemaTool = schemaTool; + } + + @Test + public void duplicatesBlockRebuildAndNeverCallRebuildIndex() throws Exception { + IndexRebuilder rebuilder = mock(IndexRebuilder.class); + when(rebuilder.loadIndexes()).thenReturn(List.of(UNIQUE_INDEX)); + when(rebuilder.findDuplicates(UNIQUE_INDEX)).thenReturn(2L); + + assertThrows(HiveMetaException.class, () -> task.executeWithRebuilder(rebuilder)); + + verify(rebuilder, never()).rebuildIndex(UNIQUE_INDEX); + } + + @Test + public void dryRunCallsDescribeButNeverCallsRebuildIndex() throws Exception { + IndexRebuilder rebuilder = mock(IndexRebuilder.class); + when(rebuilder.loadIndexes()).thenReturn(List.of(NON_UNIQUE_INDEX)); + when(rebuilder.findDuplicates(NON_UNIQUE_INDEX)).thenReturn(0L); + when(rebuilder.describeRebuildDDL(NON_UNIQUE_INDEX)).thenReturn("DROP ...\nCREATE ..."); + when(schemaTool.isDryRun()).thenReturn(true); + + task.executeWithRebuilder(rebuilder); + + verify(rebuilder).describeRebuildDDL(NON_UNIQUE_INDEX); + verify(rebuilder, never()).rebuildIndex(NON_UNIQUE_INDEX); + } + + @Test + public void normalRunCallsRebuildIndex() throws Exception { + IndexRebuilder rebuilder = mock(IndexRebuilder.class); + when(rebuilder.loadIndexes()).thenReturn(List.of(NON_UNIQUE_INDEX)); + when(rebuilder.findDuplicates(NON_UNIQUE_INDEX)).thenReturn(0L); + when(rebuilder.describeRebuildDDL(NON_UNIQUE_INDEX)).thenReturn("DROP ...\nCREATE ..."); + when(schemaTool.isDryRun()).thenReturn(false); + + task.executeWithRebuilder(rebuilder); + + verify(rebuilder).rebuildIndex(NON_UNIQUE_INDEX); + } +} From 65f3a2ec4861112d5b41c20c98d2fbabcbe83871 Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Wed, 17 Jun 2026 11:12:35 -0700 Subject: [PATCH 2/3] Address initial review comments --- .../metastore/tools/schematool/IndexInfo.java | 3 +-- .../schematool/PostgresIndexRebuilder.java | 25 ++++++++++++++++++- .../schematool/SchemaToolCommandLine.java | 2 +- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java index b5c2c66087a4..ab729ee18bd8 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexInfo.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.metastore.tools.schematool; import java.util.List; -import org.jetbrains.annotations.NotNull; /** * Record for a DB index, used by {@link IndexRebuilder}. @@ -35,7 +34,7 @@ public record IndexInfo( } @Override - public @NotNull String toString() { + public String toString() { return (constraintBacked ? "CONSTRAINT" : "INDEX") + " \"" + indexName + "\" ON \"" + tableName + "\""; } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java index 62657dec5c78..b8e2387e9e4d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java @@ -118,7 +118,30 @@ public List loadIndexes() throws HiveMetaException { @Override public void rebuildIndex(IndexInfo index) throws HiveMetaException { PgDdl ddl = ddlMap.get(index.indexName()); - executeRebuild(index, ddl.dropDdl(), ddl.createDdl()); + boolean prevAutoCommit; + try { + prevAutoCommit = conn.getAutoCommit(); + } catch (SQLException e) { + throw new HiveMetaException("Failed to get autocommit state", e); + } + boolean success = false; + try { + conn.setAutoCommit(false); + executeRebuild(index, ddl.dropDdl(), ddl.createDdl()); + conn.commit(); + success = true; + } catch (SQLException e) { + throw new HiveMetaException("Transaction error rebuilding index \"" + index.indexName() + "\"", e); + } finally { + if (!success) { + try { + conn.rollback(); + } catch (SQLException ignored) {} + } + try { + conn.setAutoCommit(prevAutoCommit); + } catch (SQLException ignored) {} + } } @Override diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java index 237d28fcaf50..6ea53d33867f 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolCommandLine.java @@ -83,7 +83,7 @@ private Options createOptions(OptionGroup additionalOptions) { .withDescription("Create table for Hive warehouse/compute logs") .create("createLogsTable"); Option rebuildIndexesOpt = new Option("rebuildIndexes", - "Detect and rebuild corrupt indexes in the metastore backend DB (Postgres only)."); + "Rebuild indexes in the metastore backend database."); OptionGroup optGroup = new OptionGroup(); optGroup From 1e1068d3b70dc4dd90d154259e0184807a5e735f Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Wed, 17 Jun 2026 13:45:33 -0700 Subject: [PATCH 3/3] Address some sonar issues --- .../schematool/IndexRebuilderFactory.java | 20 ++++++++--------- .../tools/schematool/MySQLIndexRebuilder.java | 3 ++- .../schematool/PostgresIndexRebuilder.java | 22 +++++++++---------- .../SchemaToolTaskRebuildIndexes.java | 7 ++++-- .../schematool/TestMSSQLIndexRebuilder.java | 16 +++++++------- .../schematool/TestMySQLIndexRebuilder.java | 8 +++---- .../schematool/TestOracleIndexRebuilder.java | 12 +++++----- .../TestPostgresIndexRebuilder.java | 13 ++++------- 8 files changed, 50 insertions(+), 51 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java index 1711206ebb00..b719efa99e4b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/IndexRebuilderFactory.java @@ -33,16 +33,16 @@ private IndexRebuilderFactory() { public static IndexRebuilder create(String dbType, Connection conn, MetastoreSchemaTool schemaTool) throws HiveMetaException { return switch (dbType.toLowerCase()) { - case HiveSchemaHelper.DB_POSTGRES -> - new PostgresIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); - case HiveSchemaHelper.DB_MYSQL -> - new MySQLIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); - case HiveSchemaHelper.DB_ORACLE -> - new OracleIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); - case HiveSchemaHelper.DB_MSSQL -> - new MSSQLIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); - default -> throw new HiveMetaException( - "-rebuildIndexes is not supported for -dbType " + dbType + "."); + case HiveSchemaHelper.DB_POSTGRES -> + new PostgresIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + case HiveSchemaHelper.DB_MYSQL -> + new MySQLIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + case HiveSchemaHelper.DB_ORACLE -> + new OracleIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + case HiveSchemaHelper.DB_MSSQL -> + new MSSQLIndexRebuilder(conn, schemaTool.needsQuotedIdentifier, schemaTool.quoteCharacter); + default -> throw new HiveMetaException( + "-rebuildIndexes is not supported for -dbType " + dbType + "."); }; } } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java index 9d90b8530a3c..b5394bafee20 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/MySQLIndexRebuilder.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import org.apache.hadoop.hive.metastore.HiveMetaException; @@ -86,7 +87,7 @@ private String buildAtomicRebuildDdl(IndexInfo index) { @Override public List loadIndexes() throws HiveMetaException { // STATISTICS returns one row per index column; accumulate rows into one index object. - LinkedHashMap byKey = new LinkedHashMap<>(); + Map byKey = new LinkedHashMap<>(); try (Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(QUERY_INDEXES)) { while (rs.next()) { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java index b8e2387e9e4d..b0b925f8d76d 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/PostgresIndexRebuilder.java @@ -52,18 +52,18 @@ class PostgresIndexRebuilder extends AbstractIndexRebuilder { JOIN pg_am am ON am.oid = ic.relam AND am.amname = 'btree' -- restrict to btree indexes only LEFT JOIN pg_constraint con ON con.conindid = ic.oid -- links index to PK/UNIQUE constraint when present WHERE ic.relnamespace = current_schema()::regnamespace -- only objects in the active schema - """; + """; -private static final String QUERY_INDEX_COLUMNS = """ - SELECT a.attname - FROM pg_index ix - JOIN pg_class ic ON ic.oid = ix.indexrelid AND ic.relkind = 'i' - JOIN pg_attribute a ON a.attrelid = ix.indrelid -- read columns from the base table of the index - AND a.attnum = ANY(ix.indkey) -- keep attrs whose attnum appears in index key vector - AND a.attnum > 0 -- system columns have negative attnum; exclude them - WHERE ic.relname = ? AND ic.relnamespace = current_schema()::regnamespace -- scope name lookup to active schema - ORDER BY array_position(ix.indkey, a.attnum) -- ix.indkey stores attr numbers in key order - """; + private static final String QUERY_INDEX_COLUMNS = """ + SELECT a.attname + FROM pg_index ix + JOIN pg_class ic ON ic.oid = ix.indexrelid AND ic.relkind = 'i' + JOIN pg_attribute a ON a.attrelid = ix.indrelid -- read columns from the base table of the index + AND a.attnum = ANY(ix.indkey) -- keep attrs whose attnum appears in index key vector + AND a.attnum > 0 -- system columns have negative attnum; exclude them + WHERE ic.relname = ? AND ic.relnamespace = current_schema()::regnamespace -- scope name lookup to active schema + ORDER BY array_position(ix.indkey, a.attnum) -- ix.indkey stores attr numbers in key order + """; private record PgDdl(String dropDdl, String createDdl) {} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java index 7c4a47f888e1..b537a6cd5722 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/schematool/SchemaToolTaskRebuildIndexes.java @@ -41,6 +41,7 @@ class SchemaToolTaskRebuildIndexes extends SchemaToolTask { @Override void setCommandLineArguments(SchemaToolCommandLine cl) { + // No arguments needed. } @Override @@ -82,8 +83,10 @@ void executeWithRebuilder(IndexRebuilder rebuilder) throws HiveMetaException { } for (IndexInfo index : indexes) { - LOG.info("Rebuilding: {}", index); - LOG.info(rebuilder.describeRebuildDDL(index)); + if (LOG.isInfoEnabled()) { + LOG.info("Rebuilding: {}", index); + LOG.info(rebuilder.describeRebuildDDL(index)); + } if (!schemaTool.isDryRun()) { rebuilder.rebuildIndex(index); LOG.info("Done."); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java index 95e88fd893a7..411dac471e4e 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMSSQLIndexRebuilder.java @@ -54,7 +54,7 @@ public class TestMSSQLIndexRebuilder { @ClassRule - public static final Mssql mssql = new Mssql(); + public static final Mssql MSSQL = new Mssql(); private static final String TEST_DB = "idx_rebuild_test"; @@ -83,19 +83,19 @@ public class TestMSSQLIndexRebuilder { @BeforeClass public static void setUpClass() throws Exception { - Class.forName(mssql.getJdbcDriver()); + Class.forName(MSSQL.getJdbcDriver()); try (Connection masterConn = DriverManager.getConnection( - mssql.getInitialJdbcUrl(), mssql.getDbRootUser(), mssql.getDbRootPassword()); + MSSQL.getInitialJdbcUrl(), MSSQL.getDbRootUser(), MSSQL.getDbRootPassword()); Statement stmt = masterConn.createStatement()) { stmt.execute("IF DB_ID('" + TEST_DB + "') IS NOT NULL DROP DATABASE " + TEST_DB); stmt.execute("CREATE DATABASE " + TEST_DB); } // Reconnect to the test database. - String testDbUrl = mssql.getInitialJdbcUrl() + String testDbUrl = MSSQL.getInitialJdbcUrl() .replace("DatabaseName=master", "DatabaseName=" + TEST_DB); - conn = DriverManager.getConnection(testDbUrl, mssql.getDbRootUser(), mssql.getDbRootPassword()); + conn = DriverManager.getConnection(testDbUrl, MSSQL.getDbRootUser(), MSSQL.getDbRootPassword()); conn.setAutoCommit(false); for (String ddl : DDL_CREATE_TABLES) { @@ -112,7 +112,7 @@ public static void tearDownClass() throws Exception { conn.close(); } try (Connection masterConn = DriverManager.getConnection( - mssql.getInitialJdbcUrl(), mssql.getDbRootUser(), mssql.getDbRootPassword()); + MSSQL.getInitialJdbcUrl(), MSSQL.getDbRootUser(), MSSQL.getDbRootPassword()); Statement stmt = masterConn.createStatement()) { stmt.execute("IF DB_ID('" + TEST_DB + "') IS NOT NULL DROP DATABASE " + TEST_DB); } @@ -247,10 +247,10 @@ public void rebuildFkReferencedPrimaryKeySucceedsWithAlterIndexRebuild() throws @Test public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { - String testDbUrl = mssql.getInitialJdbcUrl() + String testDbUrl = MSSQL.getInitialJdbcUrl() .replace("DatabaseName=master", "DatabaseName=" + TEST_DB); Connection closedConn = DriverManager.getConnection( - testDbUrl, mssql.getDbRootUser(), mssql.getDbRootPassword()); + testDbUrl, MSSQL.getDbRootUser(), MSSQL.getDbRootPassword()); closedConn.close(); MSSQLIndexRebuilder localRebuilder = new MSSQLIndexRebuilder(closedConn, false, "\""); IndexInfo nonUnique = new IndexInfo("idx_plain_name", "plain_table", false, false, diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java index 7e4cb0155920..518c58ce7ad8 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestMySQLIndexRebuilder.java @@ -54,7 +54,7 @@ public class TestMySQLIndexRebuilder { @ClassRule - public static final Mysql mysql = new Mysql(); + public static final Mysql MYSQL = new Mysql(); private static final String TEST_DB = "test_idx_rebuild"; @@ -79,9 +79,9 @@ CREATE TABLE fk_child_table (id BIGINT, parent_id BIGINT, @BeforeClass public static void setUpClass() throws Exception { - Class.forName(mysql.getJdbcDriver()); + Class.forName(MYSQL.getJdbcDriver()); conn = DriverManager.getConnection( - mysql.getInitialJdbcUrl(), mysql.getDbRootUser(), mysql.getDbRootPassword()); + MYSQL.getInitialJdbcUrl(), MYSQL.getDbRootUser(), MYSQL.getDbRootPassword()); try (Statement stmt = conn.createStatement()) { stmt.execute("CREATE DATABASE IF NOT EXISTS " + TEST_DB); stmt.execute("USE " + TEST_DB); @@ -239,7 +239,7 @@ public void rebuildFkReferencedPrimaryKeySucceedsWithAtomicAlterTable() throws E @Test public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { Connection closedConn = DriverManager.getConnection( - mysql.getInitialJdbcUrl(), mysql.getDbRootUser(), mysql.getDbRootPassword()); + MYSQL.getInitialJdbcUrl(), MYSQL.getDbRootUser(), MYSQL.getDbRootPassword()); closedConn.close(); MySQLIndexRebuilder localRebuilder = new MySQLIndexRebuilder(closedConn, true, "`"); IndexInfo nonUnique = new IndexInfo("idx", "plain_table", false, false, List.of("name")); diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java index cc8d1a821f1e..0a31a8de3b53 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestOracleIndexRebuilder.java @@ -55,7 +55,7 @@ public class TestOracleIndexRebuilder { @ClassRule - public static final Oracle oracle = new Oracle(); + public static final Oracle ORACLE = new Oracle(); // Dedicated test user keeps USER_INDEXES scoped to test objects. private static final String TEST_USER = "IDX_TEST_USER"; @@ -114,10 +114,10 @@ public class TestOracleIndexRebuilder { @BeforeClass public static void setUpClass() throws Exception { - Class.forName(oracle.getJdbcDriver()); + Class.forName(ORACLE.getJdbcDriver()); try (Connection sysConn = DriverManager.getConnection( - oracle.getInitialJdbcUrl(), oracle.getDbRootUser(), oracle.getDbRootPassword()); + ORACLE.getInitialJdbcUrl(), ORACLE.getDbRootUser(), ORACLE.getDbRootPassword()); Statement stmt = sysConn.createStatement()) { try { stmt.execute("DROP USER " + TEST_USER + " CASCADE"); @@ -130,7 +130,7 @@ public static void setUpClass() throws Exception { } // Reconnect as test user. - String userUrl = oracle.getInitialJdbcUrl(); + String userUrl = ORACLE.getInitialJdbcUrl(); conn = DriverManager.getConnection(userUrl, TEST_USER, TEST_PASSWORD); conn.setAutoCommit(false); @@ -147,7 +147,7 @@ public static void tearDownClass() throws Exception { conn.close(); } try (Connection sysConn = DriverManager.getConnection( - oracle.getInitialJdbcUrl(), oracle.getDbRootUser(), oracle.getDbRootPassword()); + ORACLE.getInitialJdbcUrl(), ORACLE.getDbRootUser(), ORACLE.getDbRootPassword()); Statement stmt = sysConn.createStatement()) { stmt.execute("DROP USER " + TEST_USER + " CASCADE"); } @@ -305,7 +305,7 @@ public void rebuildFkReferencedPrimaryKeySucceedsWithAlterIndexRebuild() throws @Test public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { Connection closedConn = DriverManager.getConnection( - oracle.getInitialJdbcUrl(), TEST_USER, TEST_PASSWORD); + ORACLE.getInitialJdbcUrl(), TEST_USER, TEST_PASSWORD); closedConn.close(); OracleIndexRebuilder localRebuilder = new OracleIndexRebuilder(closedConn, true, "\""); IndexInfo nonUnique = new IndexInfo("IDX_PLAIN_NAME", "PLAIN_TABLE", false, false, diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java index daca1962ed20..009bb8a09668 100644 --- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java +++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/schematool/TestPostgresIndexRebuilder.java @@ -53,7 +53,7 @@ public class TestPostgresIndexRebuilder { @ClassRule - public static final Postgres postgres = new Postgres(); + public static final Postgres POSTGRES = new Postgres(); private static Connection conn; @@ -77,9 +77,9 @@ public class TestPostgresIndexRebuilder { @BeforeClass public static void setUpClass() throws Exception { - Class.forName(postgres.getJdbcDriver()); + Class.forName(POSTGRES.getJdbcDriver()); conn = DriverManager.getConnection( - postgres.getInitialJdbcUrl(), postgres.getDbRootUser(), postgres.getDbRootPassword()); + POSTGRES.getInitialJdbcUrl(), POSTGRES.getDbRootUser(), POSTGRES.getDbRootPassword()); conn.setAutoCommit(false); for (String ddl : DDL_CREATE_TABLES.split(";")) { String sql = ddl.trim(); @@ -250,7 +250,7 @@ public void rebuildMultiColumnUniqueConstraintDropsAndRecreatesWithUniquenessEnf @Test public void findDuplicatesNonUniqueIndexReturnsZeroWithoutQueryingDb() throws Exception { Connection closedConn = DriverManager.getConnection( - postgres.getInitialJdbcUrl(), postgres.getDbRootUser(), postgres.getDbRootPassword()); + POSTGRES.getInitialJdbcUrl(), POSTGRES.getDbRootUser(), POSTGRES.getDbRootPassword()); closedConn.close(); PostgresIndexRebuilder localRebuilder = new PostgresIndexRebuilder(closedConn, true, "\""); IndexInfo nonUnique = new IndexInfo("idx", "plain_table", false, false, List.of("name")); @@ -308,11 +308,6 @@ private static IndexInfo findByTable(Map byName, String table .orElse(null); } - private void execute(String sql) throws SQLException { - try (Statement stmt = conn.createStatement()) { - stmt.execute(sql); - } - } private boolean indexExists(String indexName) throws SQLException { try (PreparedStatement ps = conn.prepareStatement(