From affd3c027378f9b8cabc6fa3cb9aa472dcef15d3 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Tue, 10 Feb 2026 12:57:47 +1000 Subject: [PATCH 01/22] Exclude temp tables from shrink + test unlogged tables --- gpMgmt/bin/gprebalance_modules/shrink.py | 1 + .../mgmt_utils/ggrebalance_shrink.feature | 33 +++++++++++++++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 26 +++++++++++++++ gpMgmt/test/behave_utils/utils.py | 11 ++++--- 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 9c4616f87c3e..2b912d2208c8 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -644,6 +644,7 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: JOIN pg_namespace n ON c.relnamespace = n.oid JOIN gp_distribution_policy p ON c.oid = p.localoid WHERE c.relkind IN ('r', 'p') AND c.relispartition = FALSE AND + c.relpersistence != 't' AND p.numsegments {cmp} {self.shrink_plan.getTargetSegmentCount()} AND n.nspname NOT IN ('pg_catalog', 'information_schema', '{self.rebalance_schema.getSchemaName()}')''') for schema_name, rel_name in cursor: diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 5f8f705d7846..aea359c71faf 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -512,3 +512,36 @@ Feature: ggrebalance behave tests | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_SHRINKED_TABLES_DONE_end | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_DROP_SCHEMA_START_begin | + Scenario: test 4. shrink - check different table types + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a long-run session starts + And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session + And set fault inject "on_enter_STATE_PREPARE_SHRINK_SCHEMA_STARTED_end" + When the user runs "ggrebalance -x 1 --parallel 1 --batch-size 1 --skip-rebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And a long-run session ends + When execute following sql in db "postgres" and store result in the context + """ + select count(1) as temp_tables_for_redistribute from ggrebalance.table_rebalance_status_detail where schema_name LIKE 'pg\_temp\_%'; + """ + Then validate that following rows are in the stored rows + | temp_tables_for_redistribute | + | 0 | + When the user runs "ggrebalance --parallel 1 --batch-size 1" + Then ggrebalance should return a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index d1dbbdd9cf79..8cfc6795d739 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -1490,6 +1490,27 @@ def stop_segments_immediate(context, where_clause): def impl(context): wait_for_unblocked_transactions(context, 600) +@given('a long-run session starts') +@when('a long-run session starts') +@then('a long-run session starts') +def impl(context): + dbname = 'gptest' + context.long_run_conn = dbconn.connect(dbconn.DbURL(dbname=dbname), unsetSearchPath=False) + +@given('a long-run session ends') +@when('a long-run session ends') +@then('a long-run session ends') +def impl(context): + if context.long_run_conn != None: + context.long_run_conn.close() + context.long_run_conn = None + +@given('sql "{sql}" is executed in a long-run session') +@when('sql "{sql}" is executed in a long-run session') +@then('sql "{sql}" is executed in a long-run session') +def impl(context, sql): + dbconn.execSQL(context.long_run_conn, sql) + @given('below sql is executed in "{dbname}" db') @when('below sql is executed in "{dbname}" db') def impl(context, dbname): @@ -2338,6 +2359,11 @@ def impl(context): def impl(context, tabletype, tablename, dbname, numrows): populate_regular_table_data(context, tabletype, tablename, dbname, compression_type=None, with_data=True, rowcount=int(numrows)) +@given('there is an unlogged "{tabletype}" table "{tablename}" in "{dbname}" with "{numrows}" rows') +@then('there is an unlogged "{tabletype}" table "{tablename}" in "{dbname}" with "{numrows}" rows') +@when('there is an unlogged "{tabletype}" table "{tablename}" in "{dbname}" with "{numrows}" rows') +def impl(context, tabletype, tablename, dbname, numrows): + populate_regular_table_data(context, tabletype, tablename, dbname, compression_type=None, with_data=True, rowcount=int(numrows), unlogged=True) @given('there is a "{tabletype}" table "{tablename}" in "{dbname}" with data') @then('there is a "{tabletype}" table "{tablename}" in "{dbname}" with data') diff --git a/gpMgmt/test/behave_utils/utils.py b/gpMgmt/test/behave_utils/utils.py index 976d1a87be72..5fe43db2bd5e 100644 --- a/gpMgmt/test/behave_utils/utils.py +++ b/gpMgmt/test/behave_utils/utils.py @@ -430,11 +430,14 @@ def create_external_partition(context, tablename, dbname, port, filename): def create_partition(context, tablename, storage_type, dbname, compression_type=None, partition=True, rowcount=1094, - with_data=True, with_desc=False, host=None, port=0, user=None): + with_data=True, with_desc=False, host=None, port=0, user=None, unlogged=False): interval = '1 year' table_definition = 'Column1 int, Column2 varchar(20), Column3 date' - create_table_str = "Create table " + tablename + "(" + table_definition + ")" + create_table_str = "Create table " + if unlogged: + create_table_str = "Create unlogged table " + create_table_str = create_table_str + tablename + "(" + table_definition + ")" storage_type_dict = {'ao': 'row', 'co': 'column'} part_table = " Distributed Randomly Partition by list(Column2) \ @@ -732,11 +735,11 @@ def validate_local_path(path): def populate_regular_table_data(context, tabletype, table_name, dbname, compression_type=None, rowcount=1094, - with_data=False, with_desc=False, host=None, port=0, user=None): + with_data=False, with_desc=False, host=None, port=0, user=None, unlogged=False): create_database_if_not_exists(context, dbname, host=host, port=port, user=user) drop_table_if_exists(context, table_name=table_name, dbname=dbname, host=host, port=port, user=user) create_partition(context, table_name, tabletype, dbname, compression_type=compression_type, partition=False, - rowcount=rowcount, with_data=with_data, with_desc=with_desc, host=host, port=port, user=user) + rowcount=rowcount, with_data=with_data, with_desc=with_desc, host=host, port=port, user=user, unlogged=unlogged) def is_process_running(proc_name, host=None): From ae5133efe0ba2c494843f2b256b66d986e8b3269 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 11 Feb 2026 10:02:08 +1000 Subject: [PATCH 02/22] Support shrink of matviews --- .../bin/gprebalance_modules/rebalance_schema.py | 8 ++++---- gpMgmt/bin/gprebalance_modules/shrink.py | 15 ++++++++++----- .../behave/mgmt_utils/ggrebalance_shrink.feature | 2 ++ .../mgmt_utils/steps/analyzedb_mgmt_utils.py | 4 ++-- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/rebalance_schema.py b/gpMgmt/bin/gprebalance_modules/rebalance_schema.py index a7bdbdf35814..9198613036dd 100644 --- a/gpMgmt/bin/gprebalance_modules/rebalance_schema.py +++ b/gpMgmt/bin/gprebalance_modules/rebalance_schema.py @@ -38,7 +38,7 @@ def createSchema(self, plan: Plan) -> None: DISTRIBUTED REPLICATED''') dbconn.execSQL(self.conn, f'''CREATE TABLE {self.schema_name}.{self.table_rebalance_status_detail} - (db_name TEXT, schema_name TEXT, rel_name TEXT, status TEXT, + (db_name TEXT, schema_name TEXT, rel_name TEXT, rel_kind CHAR, status TEXT, CONSTRAINT unique_fqn UNIQUE (db_name, schema_name, rel_name)) DISTRIBUTED REPLICATED''') dbconn.execSQL(self.conn, @@ -133,10 +133,10 @@ def clearTablesToRebalanceWithStatus(self, status: str) -> None: f'''DELETE FROM {self.schema_name}.{self.table_rebalance_status_detail} WHERE (status = '{status}')''') - def addTableToRebalance(self, db: str, schema_name: str, rel_name: str, status: str) -> None: + def addTableToRebalance(self, db: str, schema_name: str, rel_name: str, rel_kind: str, status: str) -> None: dbconn.execSQL(self.conn, f'''INSERT INTO {self.schema_name}.{self.table_rebalance_status_detail} - VALUES ('{db}', '{schema_name}', '{rel_name}', '{status}')''') + VALUES ('{db}', '{schema_name}', '{rel_name}', '{rel_kind}', '{status}')''') def setStatusForTableToRebalance(self, db: str, schema_name: str, rel_name: str, status: str) -> None: dbconn.execSQL(self.conn, @@ -144,7 +144,7 @@ def setStatusForTableToRebalance(self, db: str, schema_name: str, rel_name: str, WHERE db_name = '{db}' AND schema_name = '{schema_name}' AND rel_name = '{rel_name}';''') def getTablesToRebalanceWithStatus(self, status: str) -> cursor: - return dbconn.query(self.conn, f"""SELECT db_name, schema_name, rel_name FROM + return dbconn.query(self.conn, f"""SELECT db_name, schema_name, rel_name, rel_kind FROM {self.schema_name}.{self.table_rebalance_status_detail} WHERE status = '{status}'""") def saveExecutionSteps(self, steps: List[RebalanceStep]) -> None: diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 2b912d2208c8..14fbdec13ed0 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -587,12 +587,14 @@ def __init__(self, db_name: str, schema_name: str, rel_name: str, + rel_kind: str, target_segment_count: int, table_status_after_rebalance: str) -> None: self.shrink = shrink self.db_name = db_name self.schema_name = schema_name self.rel_name = rel_name + self.rel_kind = rel_kind self.target_segment_count = target_segment_count self.table_status_after_rebalance = table_status_after_rebalance SQLCommand.__init__(self, f'task rebalance for {self.db_name}.{self.schema_name}.{self.rel_name}') @@ -613,6 +615,8 @@ def run(self) -> None: dbconn.execSQL(conn, f'''ALTER TABLE "{self.schema_name}"."{self.rel_name}" REBALANCE {self.target_segment_count}''') + if self.rel_kind == 'm': + dbconn.execSQL(conn, f'REFRESH MATERIALIZED VIEW "{self.schema_name}"."{self.rel_name}"') self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) dbconn.execSQL(conn, 'COMMIT') self.shrink.logger.info(f'Complete table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') @@ -639,16 +643,16 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: dburl = dbconn.DbURL(dbname=db, port=self.gpEnv.getCoordinatorPort()) with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: cursor = dbconn.query(conn, - f'''SELECT n.nspname, c.relname + f'''SELECT n.nspname, c.relname, c.relkind FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid JOIN gp_distribution_policy p ON c.oid = p.localoid - WHERE c.relkind IN ('r', 'p') AND c.relispartition = FALSE AND + WHERE c.relkind IN ('r', 'p', 'm') AND c.relispartition = FALSE AND c.relpersistence != 't' AND p.numsegments {cmp} {self.shrink_plan.getTargetSegmentCount()} AND n.nspname NOT IN ('pg_catalog', 'information_schema', '{self.rebalance_schema.getSchemaName()}')''') - for schema_name, rel_name in cursor: - self.rebalance_schema.addTableToRebalance(db, schema_name, rel_name, status) + for schema_name, rel_name, rel_kind in cursor: + self.rebalance_schema.addTableToRebalance(db, schema_name, rel_name, rel_kind, status) dbconn.execSQL(self.conn, 'COMMIT') @@ -660,11 +664,12 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm if cursor.rowcount > 0: self.workers_for_tables_rebalance = WorkerPool(numWorkers=min(cursor.rowcount, self.options.parallel)) - for db_name, schema_name, rel_name in cursor: + for db_name, schema_name, rel_name, rel_kind in cursor: task = self.TableRebalanceTask(self, db_name, schema_name, rel_name, + rel_kind, target_segment_count, target_status) self.workers_for_tables_rebalance.addCommand(task) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index aea359c71faf..5e73eee656c4 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -523,6 +523,7 @@ Feature: ggrebalance behave tests And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" And a long-run session starts And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session And set fault inject "on_enter_STATE_PREPARE_SHRINK_SCHEMA_STARTED_end" @@ -545,3 +546,4 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 diff --git a/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py index f7bf6f1bdfa7..43c2a172e982 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py @@ -94,13 +94,13 @@ def impl(context, view_name, table_name, schema_name): @given('a view "{view_name}" exists on table "{table_name}"') def impl(context, view_name, table_name): with closing(dbconn.connect(dbconn.DbURL(dbname=context.dbname))) as conn: - create_view_on_table(context.conn, view_name, table_name) + create_view_on_table(conn, view_name, table_name) @given('a materialized view "{view_name}" exists on table "{table_name}"') def impl(context, view_name, table_name): with closing(dbconn.connect(dbconn.DbURL(dbname=context.dbname))) as conn: - create_materialized_view_on_table_in_schema(context.conn, viewname=view_name, + create_materialized_view_on_table_in_schema(conn, viewname=view_name, tablename=table_name) From ff134f91b076ee28221b7a9e810dd8b97e2ab0ce Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 11 Feb 2026 11:39:43 +1000 Subject: [PATCH 03/22] Add check for shrink of partitioned tables --- gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature | 4 ++++ gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 5e73eee656c4..27408be4a992 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -522,6 +522,8 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" And a long-run session starts @@ -545,5 +547,7 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 8cfc6795d739..6aaadad7181c 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -2384,6 +2384,12 @@ def impl(context, tabletype, tablename, dbname): def impl(context, tabletype, table_name, dbname): create_partition(context, tablename=table_name, storage_type=tabletype, dbname=dbname, with_data=True) +@given('there is a "{tabletype}" partition table "{table_name}" in "{dbname}" with "{numrows}" rows') +@then('there is a "{tabletype}" partition table "{table_name}" in "{dbname}" with "{numrows}" rows') +@when('there is a "{tabletype}" partition table "{table_name}" in "{dbname}" with "{numrows}" rows') +def impl(context, tabletype, table_name, dbname, numrows): + create_partition(context, tablename=table_name, storage_type=tabletype, dbname=dbname, with_data=True, rowcount=int(numrows)) + @given('there is a view without columns in "{dbname}"') @then('there is a view without columns in "{dbname}"') @when('there is a view without columns in "{dbname}"') From afec7bae6f89ca0ee6faa0b711e06dcf2ede6688 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 11 Feb 2026 13:17:58 +1000 Subject: [PATCH 04/22] Support ext writable tables in shrink --- gpMgmt/bin/gprebalance_modules/shrink.py | 9 ++++++--- gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 14fbdec13ed0..3a75467cc49c 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -643,15 +643,18 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: dburl = dbconn.DbURL(dbname=db, port=self.gpEnv.getCoordinatorPort()) with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: cursor = dbconn.query(conn, - f'''SELECT n.nspname, c.relname, c.relkind + f'''SELECT n.nspname, c.relname, c.relkind, pe.writable is not null as external_writable FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid JOIN gp_distribution_policy p ON c.oid = p.localoid - WHERE c.relkind IN ('r', 'p', 'm') AND c.relispartition = FALSE AND + LEFT JOIN pg_exttable pe on (c.oid=pe.reloid and pe.writable) + WHERE c.relkind IN ('r', 'p', 'm', 'f') AND c.relispartition = FALSE AND c.relpersistence != 't' AND p.numsegments {cmp} {self.shrink_plan.getTargetSegmentCount()} AND n.nspname NOT IN ('pg_catalog', 'information_schema', '{self.rebalance_schema.getSchemaName()}')''') - for schema_name, rel_name, rel_kind in cursor: + for schema_name, rel_name, rel_kind, external_writable in cursor: + if rel_kind == 'f' and not external_writable: + continue self.rebalance_schema.addTableToRebalance(db, schema_name, rel_name, rel_kind, status) dbconn.execSQL(self.conn, 'COMMIT') diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 27408be4a992..6ea3fa0d32f4 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -528,6 +528,8 @@ Feature: ggrebalance behave tests And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" And a long-run session starts And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session + And database "gptest" exists + And the user create a writable external table with name "ext_test" And set fault inject "on_enter_STATE_PREPARE_SHRINK_SCHEMA_STARTED_end" When the user runs "ggrebalance -x 1 --parallel 1 --batch-size 1 --skip-rebalance" Then ggrebalance should return a return code of 1 @@ -551,3 +553,4 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 From 11024d23870fed1c448413d78f1948a61db570f2 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 13 Feb 2026 15:27:22 +1000 Subject: [PATCH 05/22] Rework matviews handling, add more table types into tests, improve logging --- gpMgmt/bin/gprebalance_modules/shrink.py | 78 +++++++++-- .../mgmt_utils/ggrebalance_shrink.feature | 132 ++++++++++++++++++ 2 files changed, 200 insertions(+), 10 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 3a75467cc49c..9b1edd7069be 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -572,14 +572,18 @@ def func_with_faults(self): @wrap_segment_stop_with_faults def run(self) -> None: - self.shrink.logger.info(f'Stopping shrinked segment dbid {self.segment.getSegmentDbId()} @ host={self.remoteHost}, datadir={self.segment.getSegmentDataDirectory()}') + self.shrink.logger.info(f'Stopping shrinked segment {str(self.segment)}') self.checkRunningSegment.run() if self.checkRunningSegment.is_shutdown(): - self.shrink.logger.info(f'Segment dbid {self.segment.getSegmentDbId()} is already down @ host={self.remoteHost}, datadir={self.segment.getSegmentDataDirectory()} ') + self.shrink.logger.info(f'Segment {str(self.segment)} is already down') self.set_results(CommandResult(0, b'', b'', True, False)) else: - SegmentStop.run(self) - self.shrink.logger.info(f'Stopped shrinked segment dbid {self.segment.getSegmentDbId()} @ host={self.remoteHost}, datadir={self.segment.getSegmentDataDirectory()}') + try: + SegmentStop.run(self, validateAfter = True) + except ExecutionError: + self.shrink.logger.info(f'Failed to stop shrinked segment {str(self.segment)}') + return + self.shrink.logger.info(f'Stopped shrinked segment {str(self.segment)}') class TableRebalanceTask(SQLCommand): def __init__(self, @@ -589,7 +593,8 @@ def __init__(self, rel_name: str, rel_kind: str, target_segment_count: int, - table_status_after_rebalance: str) -> None: + table_status_after_rebalance: str, + mat_view_refresh: bool) -> None: self.shrink = shrink self.db_name = db_name self.schema_name = schema_name @@ -597,6 +602,9 @@ def __init__(self, self.rel_kind = rel_kind self.target_segment_count = target_segment_count self.table_status_after_rebalance = table_status_after_rebalance + self.mat_view_refresh = mat_view_refresh + if self.mat_view_refresh: + assert rel_kind == 'm' SQLCommand.__init__(self, f'task rebalance for {self.db_name}.{self.schema_name}.{self.rel_name}') # decorator to inject a fault before running TableRebalanceTask for a specific {db_name, schema_name, rel_name} @@ -606,8 +614,7 @@ def func_with_faults(self): fun(self) return func_with_faults - @wrap_table_rebalance_with_faults - def run(self) -> None: + def rebalance_table(self) -> None: self.shrink.logger.info(f'Start table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: @@ -616,12 +623,32 @@ def run(self) -> None: f'''ALTER TABLE "{self.schema_name}"."{self.rel_name}" REBALANCE {self.target_segment_count}''') if self.rel_kind == 'm': - dbconn.execSQL(conn, f'REFRESH MATERIALIZED VIEW "{self.schema_name}"."{self.rel_name}"') - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, 'mv_refresh_required') + else: + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) dbconn.execSQL(conn, 'COMMIT') self.shrink.logger.info(f'Complete table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') self.set_results(CommandResult(0, b'', b'', True, False)) + def refresh_mat_view(self) -> None: + self.shrink.logger.info(f'Start matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') + dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) + with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: + dbconn.execSQL(conn, 'BEGIN') + dbconn.execSQL(conn, f'REFRESH MATERIALIZED VIEW "{self.schema_name}"."{self.rel_name}"') + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) + dbconn.execSQL(conn, 'COMMIT') + self.shrink.logger.info(f'Complete matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') + self.set_results(CommandResult(0, b'', b'', True, False)) + + @wrap_table_rebalance_with_faults + def run(self) -> None: + if not self.mat_view_refresh: + self.rebalance_table() + else: + self.refresh_mat_view() + + def prepare_shrink_schema(self, is_rollback: bool) -> None: status = 'done' if is_rollback else 'none' cmp = '<=' if is_rollback else '>' @@ -631,6 +658,7 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: # cleanup list of tables that require rebalance # for the case we re-enter this state after we were interrupted right after it self.rebalance_schema.clearTablesToRebalanceWithStatus(status) + self.rebalance_schema.clearTablesToRebalanceWithStatus('mv_refresh_required') cursor = dbconn.query(self.conn, 'SELECT datname FROM pg_database') databases_to_process = [] @@ -674,7 +702,37 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm rel_name, rel_kind, target_segment_count, - target_status) + target_status, + False) + self.workers_for_tables_rebalance.addCommand(task) + + print_progress(self.workers_for_tables_rebalance, interval=1) + + self.workers_for_tables_rebalance.haltWork() + self.workers_for_tables_rebalance.joinWorkers() + + for task in self.workers_for_tables_rebalance.getCompletedItems(): + if not task.was_successful(): + raise Exception(f'Failed to do ALTER REBALANCE: {task.get_results().stderr}') + + self.workers_for_tables_rebalance = None + + cursor = self.rebalance_schema.getTablesToRebalanceWithStatus('mv_refresh_required') + + self.logger.info(f'Tables to process {cursor.rowcount}') + + if cursor.rowcount > 0: + self.workers_for_tables_rebalance = WorkerPool(numWorkers=min(cursor.rowcount, self.options.parallel)) + + for db_name, schema_name, rel_name, rel_kind in cursor: + task = self.TableRebalanceTask(self, + db_name, + schema_name, + rel_name, + rel_kind, + target_segment_count, + target_status, + True) self.workers_for_tables_rebalance.addCommand(task) print_progress(self.workers_for_tables_rebalance, interval=1) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 6ea3fa0d32f4..9cd625cd261c 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -12,6 +12,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -29,6 +35,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -45,6 +56,14 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + #And a long-run session starts + #And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -53,6 +72,14 @@ Feature: ggrebalance behave tests Then ggrebalance should return a return code of 1 And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp And unset fault inject + #And a long-run session ends + When execute following sql in db "postgres" and store result in the context + """ + select count(1) as temp_tables_for_redistribute from ggrebalance.table_rebalance_status_detail where schema_name LIKE 'pg\_temp\_%'; + """ + Then validate that following rows are in the stored rows + | temp_tables_for_redistribute | + | 0 | When the user runs "ggrebalance -x 1 --parallel 1 --batch-size 1 --skip-rebalance" Then ggrebalance should return a return code of 1 And ggrebalance should print "Can't start a new operation, because the previous one was interrupted" to logfile with latest timestamp @@ -65,6 +92,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -106,6 +138,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -123,6 +161,12 @@ Feature: ggrebalance behave tests And ggrebalance should print "Shrink is complete" to logfile with latest timestamp And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 1, row count = 1094 And ggrebalance should return a return code of 0 @@ -142,6 +186,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -157,6 +207,11 @@ Feature: ggrebalance behave tests Then ggrebalance should return a return code of 0 And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + Then the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -172,6 +227,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -185,6 +246,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + Then the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -221,6 +287,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -238,6 +310,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -261,6 +338,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -279,6 +362,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + Then the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 2, row count = 200 @@ -314,6 +402,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -333,6 +427,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -353,6 +452,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -371,6 +476,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + Then the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -414,6 +524,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -432,6 +548,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rebalance schema doesn't exists and no shrink plan is supplied. Please specify shrink plan." to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + Then the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -456,6 +577,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -478,6 +605,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + Then the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 2, row count = 200 From 80148991055f522ad1cf81bdd518cbfc673adee1 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 13 Feb 2026 15:29:17 +1000 Subject: [PATCH 06/22] Add more interruption points into the test with cluster restart --- gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 9cd625cd261c..b93518d5ea89 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -157,7 +157,7 @@ Feature: ggrebalance behave tests Then gpstart should return a return code of 0 When there is a "heap" table "test_schema_2.test_table_3" in "test_db_2" with data And the user runs "ggrebalance" - Then ggrebalance should print "Cluster restarted after previous run, trying to repopulate the relation queue" to logfile + Then ggrebalance should return a return code of 0 And ggrebalance should print "Shrink is complete" to logfile with latest timestamp And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 @@ -169,12 +169,15 @@ Feature: ggrebalance behave tests Then the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 1, row count = 1094 - And ggrebalance should return a return code of 0 + Examples: | fault_name | | on_enter_STATE_BACKUP_CATALOG_AND_UPDATE_TARGET_SEGMENT_COUNT_DONE_begin | | on_enter_STATE_PREPARE_SHRINK_SCHEMA_DONE_begin | | on_enter_STATE_SHRINK_TABLES_DONE_begin | + | on_enter_STATE_SHRINK_TABLES_DONE_end | + | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | + | on_enter_STATE_SHRINK_CATALOG_STARTED_end | Scenario: test 2.1. shrink - check rollback after interrupted state, if interruption is done before the rebalance schema creation Given the database is not running From 1c3f8fe409b7b4978b68a3a49fe5188fefa0d385 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Mon, 16 Feb 2026 13:02:27 +1000 Subject: [PATCH 07/22] Check table and db existence --- gpMgmt/bin/gprebalance_modules/shrink.py | 65 +++++++++++----- .../mgmt_utils/ggrebalance_shrink.feature | 75 +++++++++++++++++++ 2 files changed, 121 insertions(+), 19 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 9b1edd7069be..559e53ffab92 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -224,7 +224,6 @@ def __init__(self, conn: dbconn.Connection, self.gparray_dump_file = gpArrayDumpFilename self.rebalance_schema = schema self.shrink_plan = None - self.needs_repopulate = False self.dumped_gparray = gparray.GpArray.initFromFile(self.gparray_dump_file) if os.path.exists(self.gparray_dump_file) else None self.machine = Machine(model = self, @@ -276,7 +275,6 @@ def get_state_after_interrupt(self, prev_state) -> str: # means that target rebalance numsegments is reset, and new tables are created at old segment count if bool(row[0]) is False: self.logger.info("Cluster restarted after previous run, trying to repopulate the relation queue") - self.needs_repopulate = True return 'STATE_BACKUP_CATALOG_AND_UPDATE_TARGET_SEGMENT_COUNT_STARTED' return self.states_main_shrink_flow[prev_idx + 1] @@ -614,30 +612,59 @@ def func_with_faults(self): fun(self) return func_with_faults + def table_exists(self, conn: dbconn.Connection, schema_name: str, rel_name: str) -> bool: + if dbconn.querySingleton(conn, f""" + SELECT count(1) + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE c.relname = '{rel_name}' AND n.nspname = '{schema_name}' AND c.relnamespace = n.oid + """) == 0: + return False + return True + + def db_exists(self, conn: dbconn.Connection, db_name: str) -> bool: + if dbconn.querySingleton(conn, f"""SELECT count(*) FROM pg_database WHERE datname = '{db_name}'""") == 0: + return False + return True + def rebalance_table(self) -> None: self.shrink.logger.info(f'Start table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') - dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) - with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: - dbconn.execSQL(conn, 'BEGIN') - dbconn.execSQL(conn, - f'''ALTER TABLE "{self.schema_name}"."{self.rel_name}" - REBALANCE {self.target_segment_count}''') - if self.rel_kind == 'm': - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, 'mv_refresh_required') - else: - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) - dbconn.execSQL(conn, 'COMMIT') + if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): + dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) + with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: + dbconn.execSQL(conn, 'BEGIN') + + table_exists = self.table_exists(conn, self.schema_name, self.rel_name) + if table_exists: + dbconn.execSQL(conn, + f'''ALTER TABLE "{self.schema_name}"."{self.rel_name}" + REBALANCE {self.target_segment_count}''') + else: + self.shrink.logger.info(f'''Table "{self.db_name}"."{self.schema_name}"."{self.rel_name}" doesn't exist, skipping actual rebalance''') + + if self.rel_kind == 'm' and table_exists: + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, 'mv_refresh_required') + else: + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) + dbconn.execSQL(conn, 'COMMIT') + else: + self.shrink.logger.info(f'''DB "{self.db_name}" doesn't exist, skipping actual rebalance for "{self.schema_name}"."{self.rel_name}"''') self.shrink.logger.info(f'Complete table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') self.set_results(CommandResult(0, b'', b'', True, False)) def refresh_mat_view(self) -> None: self.shrink.logger.info(f'Start matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') - dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) - with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: - dbconn.execSQL(conn, 'BEGIN') - dbconn.execSQL(conn, f'REFRESH MATERIALIZED VIEW "{self.schema_name}"."{self.rel_name}"') - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) - dbconn.execSQL(conn, 'COMMIT') + if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): + dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) + with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: + dbconn.execSQL(conn, 'BEGIN') + if self.table_exists(conn, self.schema_name, self.rel_name): + dbconn.execSQL(conn, f'REFRESH MATERIALIZED VIEW "{self.schema_name}"."{self.rel_name}"') + else: + self.shrink.logger.info(f'''Materialized view "{self.db_name}"."{self.schema_name}"."{self.rel_name}" doesn't exist, skipping actual REFRESH''') + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) + dbconn.execSQL(conn, 'COMMIT') + else: + self.shrink.logger.info(f'''DB "{self.db_name}" doesn't exist, skipping actual REFRESH for "{self.schema_name}"."{self.rel_name}"''') self.shrink.logger.info(f'Complete matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') self.set_results(CommandResult(0, b'', b'', True, False)) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index b93518d5ea89..ce37a62d7aff 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -689,3 +689,78 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 Then the numsegments of table "ext_test" is 1 + + Scenario: test 5. test shrink continue after cluster restart, when a table planned for rebalance was dropped + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + When set fault inject "fault_rebalance_table_test_db_2.test_schema_2.test_table_1" + And the user runs "ggrebalance -x 1 --skip-rebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And table "test_schema_2.test_table_1" is dropped in "test_db_2" + When the user runs "ggrebalance" + Then ggrebalance should return a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 + And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 + + Scenario: test 6. test shrink continue after cluster restart, when a db with the table planned for rebalance was dropped + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + When set fault inject "fault_rebalance_table_test_db_2.test_schema_2.test_table_1" + And the user runs "ggrebalance -x 1 --skip-rebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And the database "test_db_2" does not exist + When the user runs "ggrebalance" + Then ggrebalance should return a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 From 08483215d54d5f9b8b28bcd361bf5c5e146391db Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Tue, 17 Feb 2026 08:30:02 +1000 Subject: [PATCH 08/22] Updates for mat views --- gpMgmt/bin/gprebalance_modules/shrink.py | 56 ++++++++++--------- .../mgmt_utils/ggrebalance_shrink.feature | 54 +++++++++++++++++- .../behave/mgmt_utils/steps/mgmt_utils.py | 5 ++ gpMgmt/test/behave_utils/utils.py | 4 ++ 4 files changed, 92 insertions(+), 27 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 559e53ffab92..7e483960f9ac 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -591,8 +591,7 @@ def __init__(self, rel_name: str, rel_kind: str, target_segment_count: int, - table_status_after_rebalance: str, - mat_view_refresh: bool) -> None: + table_status_after_rebalance: str) -> None: self.shrink = shrink self.db_name = db_name self.schema_name = schema_name @@ -600,10 +599,7 @@ def __init__(self, self.rel_kind = rel_kind self.target_segment_count = target_segment_count self.table_status_after_rebalance = table_status_after_rebalance - self.mat_view_refresh = mat_view_refresh - if self.mat_view_refresh: - assert rel_kind == 'm' - SQLCommand.__init__(self, f'task rebalance for {self.db_name}.{self.schema_name}.{self.rel_name}') + super().__init__(f'task rebalance for {self.db_name}.{self.schema_name}.{self.rel_name}') # decorator to inject a fault before running TableRebalanceTask for a specific {db_name, schema_name, rel_name} def wrap_table_rebalance_with_faults(fun): @@ -626,7 +622,8 @@ def db_exists(self, conn: dbconn.Connection, db_name: str) -> bool: return False return True - def rebalance_table(self) -> None: + @wrap_table_rebalance_with_faults + def run(self) -> None: self.shrink.logger.info(f'Start table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) @@ -651,7 +648,25 @@ def rebalance_table(self) -> None: self.shrink.logger.info(f'Complete table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') self.set_results(CommandResult(0, b'', b'', True, False)) - def refresh_mat_view(self) -> None: + class MatViewRefreshTask(TableRebalanceTask): + def __init__(self, + shrink: 'GGShrink', + db_name: str, + schema_name: str, + rel_name: str, + target_segment_count: int, + table_status_after_rebalance: str) -> None: + super().__init__(shrink, db_name, schema_name, rel_name, 'm', target_segment_count, table_status_after_rebalance) + + # decorator to inject a fault before running MatViewRefreshTask for a specific {db_name, schema_name, rel_name} + def wrap_refresh_matview_with_faults(fun): + def func_with_faults(self): + inject_fault(f'fault_refresh_matview_{self.db_name}.{self.schema_name}.{self.rel_name}') + fun(self) + return func_with_faults + + @wrap_refresh_matview_with_faults + def run(self) -> None: self.shrink.logger.info(f'Start matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) @@ -668,13 +683,6 @@ def refresh_mat_view(self) -> None: self.shrink.logger.info(f'Complete matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') self.set_results(CommandResult(0, b'', b'', True, False)) - @wrap_table_rebalance_with_faults - def run(self) -> None: - if not self.mat_view_refresh: - self.rebalance_table() - else: - self.refresh_mat_view() - def prepare_shrink_schema(self, is_rollback: bool) -> None: status = 'done' if is_rollback else 'none' @@ -685,7 +693,8 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: # cleanup list of tables that require rebalance # for the case we re-enter this state after we were interrupted right after it self.rebalance_schema.clearTablesToRebalanceWithStatus(status) - self.rebalance_schema.clearTablesToRebalanceWithStatus('mv_refresh_required') + if is_rollback: + self.rebalance_schema.clearTablesToRebalanceWithStatus('mv_refresh_required') cursor = dbconn.query(self.conn, 'SELECT datname FROM pg_database') databases_to_process = [] @@ -717,7 +726,7 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: def rebalance_tables(self, original_status: str, target_status: str, target_segment_count: int) -> None: cursor = self.rebalance_schema.getTablesToRebalanceWithStatus(original_status) - self.logger.info(f'Tables to process {cursor.rowcount}') + self.logger.info(f'Tables to rebalance: {cursor.rowcount}') if cursor.rowcount > 0: self.workers_for_tables_rebalance = WorkerPool(numWorkers=min(cursor.rowcount, self.options.parallel)) @@ -729,8 +738,7 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm rel_name, rel_kind, target_segment_count, - target_status, - False) + target_status) self.workers_for_tables_rebalance.addCommand(task) print_progress(self.workers_for_tables_rebalance, interval=1) @@ -746,20 +754,18 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm cursor = self.rebalance_schema.getTablesToRebalanceWithStatus('mv_refresh_required') - self.logger.info(f'Tables to process {cursor.rowcount}') + self.logger.info(f'Materialized views to refresh: {cursor.rowcount}') if cursor.rowcount > 0: self.workers_for_tables_rebalance = WorkerPool(numWorkers=min(cursor.rowcount, self.options.parallel)) for db_name, schema_name, rel_name, rel_kind in cursor: - task = self.TableRebalanceTask(self, + task = self.MatViewRefreshTask(self, db_name, schema_name, rel_name, - rel_kind, target_segment_count, - target_status, - True) + target_status) self.workers_for_tables_rebalance.addCommand(task) print_progress(self.workers_for_tables_rebalance, interval=1) @@ -769,7 +775,7 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm for task in self.workers_for_tables_rebalance.getCompletedItems(): if not task.was_successful(): - raise Exception(f'Failed to do ALTER REBALANCE: {task.get_results().stderr}') + raise Exception(f'Failed to do REFRESH for a materialized view: {task.get_results().stderr}') self.workers_for_tables_rebalance = None diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index ce37a62d7aff..450760cf192d 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -126,6 +126,7 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_SEGMENTS_STOP_STARTED_begin | | on_enter_STATE_SHRINK_SEGMENTS_STOP_STARTED_end | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | + | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | | fault_segment_stop_dbid_3 | Scenario Outline: test 1.3. test shrink continue after cluster restart @@ -178,6 +179,7 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_TABLES_DONE_end | | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | | on_enter_STATE_SHRINK_CATALOG_STARTED_end | + | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario: test 2.1. shrink - check rollback after interrupted state, if interruption is done before the rebalance schema creation Given the database is not running @@ -278,6 +280,7 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_TABLES_DONE_end | | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | + | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario Outline: test 2.3. shrink - check rollback after interrupted state (interruption is done after the point of no return). Rollback fails. So just continue shrink. Given the database is not running @@ -393,6 +396,7 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_TABLES_DONE_end | | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | + | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario Outline: test 3.1. shrink - check continue after interrupted rollback state. In this case we fail in rollback too early, and normal shrink will be complete. Given the database is not running @@ -646,6 +650,7 @@ Feature: ggrebalance behave tests | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_SHRINKED_TABLES_DONE_begin | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_SHRINKED_TABLES_DONE_end | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_DROP_SCHEMA_START_begin | + | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario: test 4. shrink - check different table types Given the database is not running @@ -690,7 +695,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 Then the numsegments of table "ext_test" is 1 - Scenario: test 5. test shrink continue after cluster restart, when a table planned for rebalance was dropped + Scenario: test 5. test shrink continue, when a table planned for rebalance was dropped Given the database is not running And a working directory of the test as '/data/gpdata/ggrebalance' And a cluster is created with mirrors on "cdw" and "sdw1" @@ -728,7 +733,52 @@ Feature: ggrebalance behave tests Then the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 - Scenario: test 6. test shrink continue after cluster restart, when a db with the table planned for rebalance was dropped + Scenario: test 5.1. test shrink continue, when a mat view planned for rebalance was dropped + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And a materialized view "test_schema_1.mv_test_table_2" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + When set fault inject "fault_rebalance_table_test_db_1.test_schema_1.mv_test_table_1" + And the user runs "ggrebalance -x 1 --skip-rebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And materialized view "test_schema_1.mv_test_table_1" is dropped in "test_db_1" + When set fault inject "fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_2" + And the user runs "ggrebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And materialized view "test_schema_1.mv_test_table_2" is dropped in "test_db_1" + When the user runs "ggrebalance" + Then ggrebalance should return a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + Then the numsegments of table "ext_test" is 1 + And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 + + Scenario: test 6. test shrink continue, when a db with the table planned for rebalance was dropped Given the database is not running And a working directory of the test as '/data/gpdata/ggrebalance' And a cluster is created with mirrors on "cdw" and "sdw1" diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 6aaadad7181c..f79cd7730912 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -1582,6 +1582,11 @@ def get_opened_files(filename, pidfile): def impl(context, tablename, dbname): drop_table_if_exists(context, table_name=tablename, dbname=dbname) +@when('materialized view "{viewname}" is dropped in "{dbname}"') +@then('materialized view "{viewname}" is dropped in "{dbname}"') +@given('materialized view "{viewname}" is dropped in "{dbname}"') +def impl(context, viewname, dbname): + drop_materialized_view_if_exists(context, view_name=viewname, dbname=dbname) @given('all the segments are running') @when('all the segments are running') diff --git a/gpMgmt/test/behave_utils/utils.py b/gpMgmt/test/behave_utils/utils.py index 5fe43db2bd5e..53cac4169ffd 100644 --- a/gpMgmt/test/behave_utils/utils.py +++ b/gpMgmt/test/behave_utils/utils.py @@ -358,6 +358,10 @@ def drop_table(context, table_name, dbname, host=None, port=0, user=None): if check_table_exists(context, table_name=table_name, dbname=dbname, host=host, port=port, user=user): raise Exception('Unable to successfully drop the table %s' % table_name) +def drop_materialized_view_if_exists(context, view_name, dbname, host=None, port=0, user=None): + SQL = 'drop materialized view if exists %s' % view_name + with closing(dbconn.connect(dbconn.DbURL(hostname=host, port=port, username=user, dbname=dbname), unsetSearchPath=False)) as conn: + dbconn.execSQL(conn, SQL) def check_schema_exists(context, schema_name, dbname): schema_check_sql = "select * from pg_namespace where nspname='%s';" % schema_name From 7e1ecf2de54850b5a868cbdeaac565accb3f4c2f Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Tue, 17 Feb 2026 17:16:46 +1000 Subject: [PATCH 09/22] Update segments stop procedure --- gpMgmt/bin/gprebalance_modules/shrink.py | 32 ++++++++++--------- .../mgmt_utils/ggrebalance_shrink.feature | 6 ++-- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 7e483960f9ac..8915c2aaa5db 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -442,29 +442,27 @@ def on_enter_STATE_SHRINK_SEGMENTS_STOP_STARTED(self) -> None: gp_array = self.gparray segments_to_stop = gp_array.get_segment_count() - self.shrink_plan.getTargetSegmentCount() - segments_to_stop = segments_to_stop * 2 # consider mirrors self.workers_for_segment_stop = WorkerPool(numWorkers=min(segments_to_stop, self.options.batch_size)) - for seg_pair in gp_array.getSegmentList(): - primary_seg = seg_pair.primaryDB - mirror_seg = seg_pair.mirrorDB - if primary_seg.getSegmentContentId() >= self.shrink_plan.getTargetSegmentCount(): - if primary_seg.isSegmentUp(): - cmd = self.SegmentStopAfterShrink(self, primary_seg) + # Stop primaries first, and mirrors after primaries + seg_roles = [gparray.ROLE_PRIMARY, gparray.ROLE_MIRROR] + for seg_role in seg_roles: + self.logger.info(f"Prepare to stop segments with role '{seg_role}'") + for seg in gp_array.getSegDbList(): + if (seg.getSegmentContentId() >= self.shrink_plan.getTargetSegmentCount() and + seg.getSegmentRole() == seg_role and seg.isSegmentUp()): + cmd = self.SegmentStopAfterShrink(self, seg) self.workers_for_segment_stop.addCommand(cmd) - - if mirror_seg != None and mirror_seg.isSegmentUp(): - cmd = self.SegmentStopAfterShrink(self, mirror_seg) - self.workers_for_segment_stop.addCommand(cmd) - - print_progress(self.workers_for_segment_stop, interval=1) + if self.shutdown_requested: + break + print_progress(self.workers_for_segment_stop, interval=1) self.workers_for_segment_stop.haltWork() self.workers_for_segment_stop.joinWorkers() for task in self.workers_for_segment_stop.getCompletedItems(): if not task.was_successful(): - raise Exception('Failed to stop segments') + self.logger.warning('Failed to stop segments') self.workers_for_segment_stop = None @@ -564,7 +562,11 @@ def __init__(self, shrink: 'GGShrink', segment: Segment) -> None: # decorator to inject a fault before running SegmentStopAfterShrink for a specific dbid def wrap_segment_stop_with_faults(fun): def func_with_faults(self): - inject_fault(f'fault_segment_stop_dbid_{self.segment.getSegmentDbId()}') + try: + inject_fault(f'fault_segment_stop_dbid_{self.segment.getSegmentDbId()}') + except Exception as e: + os.kill(os.getpid(), signal.SIGINT) + return fun(self) return func_with_faults diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 450760cf192d..71a747542119 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -60,8 +60,8 @@ Feature: ggrebalance behave tests And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" - #And a long-run session starts - #And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session + And a long-run session starts + And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session And database "gptest" exists And the user create a writable external table with name "ext_test" And database "test_db_2" exists @@ -72,7 +72,7 @@ Feature: ggrebalance behave tests Then ggrebalance should return a return code of 1 And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp And unset fault inject - #And a long-run session ends + And a long-run session ends When execute following sql in db "postgres" and store result in the context """ select count(1) as temp_tables_for_redistribute from ggrebalance.table_rebalance_status_detail where schema_name LIKE 'pg\_temp\_%'; From 643f8381f7fede59098660b783aa184aaa01da4c Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 18 Feb 2026 13:01:06 +1000 Subject: [PATCH 10/22] Fix the case when table is dropped in a parallel transaction --- gpMgmt/bin/gppylib/fault_injection.py | 8 ++++ gpMgmt/bin/gprebalance_modules/shrink.py | 32 ++++++++++---- .../mgmt_utils/ggrebalance_shrink.feature | 43 +++++++++++++++++++ .../behave/mgmt_utils/steps/mgmt_utils.py | 33 ++++++++++++-- 4 files changed, 104 insertions(+), 12 deletions(-) diff --git a/gpMgmt/bin/gppylib/fault_injection.py b/gpMgmt/bin/gppylib/fault_injection.py index 142ab17dfcfd..388097634575 100755 --- a/gpMgmt/bin/gppylib/fault_injection.py +++ b/gpMgmt/bin/gppylib/fault_injection.py @@ -7,9 +7,17 @@ GPMGMT_FAULT_POINT = 'GPMGMT_FAULT_POINT' GPMGMT_FAULT_DELAY_MS = 'GPMGMT_FAULT_DELAY_MS' +GPMGMT_FAULT_TYPE = 'GPMGMT_FAULT_TYPE' +GPMGMT_FAULT_FILE_FLAG = 'GPMGMT_FAULT_FILE_FLAG' + +GPMGMT_FAULT_TYPE_SYSPEND = 'suspend' def inject_fault(fault_point): if GPMGMT_FAULT_POINT in os.environ and fault_point == os.environ[GPMGMT_FAULT_POINT]: + if GPMGMT_FAULT_TYPE in os.environ and os.environ[GPMGMT_FAULT_TYPE] == GPMGMT_FAULT_TYPE_SYSPEND: + while GPMGMT_FAULT_FILE_FLAG in os.environ and os.path.exists(os.environ[GPMGMT_FAULT_FILE_FLAG]): + time.sleep(0.1) + return if GPMGMT_FAULT_DELAY_MS in os.environ and int(os.environ[GPMGMT_FAULT_DELAY_MS]) > 0: delay_ms = int(os.environ[GPMGMT_FAULT_DELAY_MS]) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index 8915c2aaa5db..614c46a46061 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -605,9 +605,9 @@ def __init__(self, # decorator to inject a fault before running TableRebalanceTask for a specific {db_name, schema_name, rel_name} def wrap_table_rebalance_with_faults(fun): - def func_with_faults(self): + def func_with_faults(self, attempt: int): inject_fault(f'fault_rebalance_table_{self.db_name}.{self.schema_name}.{self.rel_name}') - fun(self) + fun(self, attempt) return func_with_faults def table_exists(self, conn: dbconn.Connection, schema_name: str, rel_name: str) -> bool: @@ -625,8 +625,8 @@ def db_exists(self, conn: dbconn.Connection, db_name: str) -> bool: return True @wrap_table_rebalance_with_faults - def run(self) -> None: - self.shrink.logger.info(f'Start table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') + def process_table(self, attempt: int) -> None: + self.shrink.logger.info(f'Start table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments (attempt {attempt})') if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: @@ -650,6 +650,22 @@ def run(self) -> None: self.shrink.logger.info(f'Complete table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') self.set_results(CommandResult(0, b'', b'', True, False)) + def run(self) -> None: + # give 2 attempts to process a table. + attempt_max_cnt = 2 + for i in range(attempt_max_cnt): + attempt = i + 1 + try: + self.process_table(attempt) + except Exception as e: + if attempt < attempt_max_cnt: + logger.warning(f"{str(e)}") + else: + logger.error(f"{str(e)}") + raise Exception(f'Failed to process the db object for {attempt_max_cnt} attempts') + continue + break + class MatViewRefreshTask(TableRebalanceTask): def __init__(self, shrink: 'GGShrink', @@ -662,14 +678,14 @@ def __init__(self, # decorator to inject a fault before running MatViewRefreshTask for a specific {db_name, schema_name, rel_name} def wrap_refresh_matview_with_faults(fun): - def func_with_faults(self): + def func_with_faults(self, attempt: int): inject_fault(f'fault_refresh_matview_{self.db_name}.{self.schema_name}.{self.rel_name}') - fun(self) + fun(self, attempt) return func_with_faults @wrap_refresh_matview_with_faults - def run(self) -> None: - self.shrink.logger.info(f'Start matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') + def process_table(self, attempt: int) -> None: + self.shrink.logger.info(f'Start matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments (attempt {attempt})') if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 71a747542119..d5502d204263 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -814,3 +814,46 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 Then the numsegments of table "ext_test" is 1 + + Scenario: test 7. test shrink, when a table, planned for rebalance, is dropped in a parallel transaction, committed after the start of table redistribution + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And there is a "heap" table "test_table_1" in "gptest" with "100" rows + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + And set fault inject "on_enter_STATE_PREPARE_SHRINK_SCHEMA_STARTED_begin" + And set fault inject type to suspend + When the user asynchronously runs "ggrebalance -x 1 --skip-rebalance" and the process is saved + And the user waits till ggrebalance prints "Updated target segment count to 1" in the logs + And a long-run session starts + And sql "BEGIN; DROP TABLE test_table_1;" is executed in a long-run session + And unset fault inject + And the user waits till ggrebalance prints "Start table rebalance for \"gptest\".\"public\".\"test_table_1\" to 1 segments" in the logs + And waiting "5" seconds + And sql "COMMIT;" is executed in a long-run session + And a long-run session ends + Then the async process finished with a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index f79cd7730912..521e2bb56d94 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -39,6 +39,7 @@ from gppylib import pgconf from gppylib.commands.gp import get_coordinatordatadir from gppylib.parseutils import canonicalize_address +from gppylib import fault_injection coordinator_data_dir = gp.get_coordinatordatadir() if coordinator_data_dir is None: @@ -660,6 +661,16 @@ def impl(context, kill_process_name, log_msg, logfile_name): "fi; done" % (log_msg, logfile_name, kill_process_name) run_async_command(context, command) +@given('the user waits till {process_name} prints "{log_msg}" in the logs') +@when('the user waits till {process_name} prints "{log_msg}" in the logs') +@then('the user waits till {process_name} prints "{log_msg}" in the logs') +def impl(context, process_name, log_msg): + command = "while sleep 0.1; " \ + "do if grep -E --quiet %s ~/gpAdminLogs/%s*log ; " \ + "then break 2; " \ + "fi; done" % (log_msg, process_name) + run_cmd(command) + @given('the user asynchronously sets up to end {process_name} process with {signal_name}') @when('the user asynchronously sets up to end {process_name} process with {signal_name}') @then('the user asynchronously sets up to end {process_name} process with {signal_name}') @@ -4547,25 +4558,39 @@ def step_impl(context, address): @then('set fault inject "{fault}"') @when('set fault inject "{fault}"') def impl(context, fault): - os.environ['GPMGMT_FAULT_POINT'] = fault + os.environ[fault_injection.GPMGMT_FAULT_POINT] = fault @given('unset fault inject') @then('unset fault inject') @when('unset fault inject') def impl(context): - os.environ['GPMGMT_FAULT_POINT'] = "" + os.environ[fault_injection.GPMGMT_FAULT_POINT] = "" + os.environ[fault_injection.GPMGMT_FAULT_TYPE] = "" + os.environ[fault_injection.GPMGMT_FAULT_FILE_FLAG] = "" + if os.path.exists(context.fault_flag_filename): + os.remove(context.fault_flag_filename) @given('set fault inject delay {delay} ms') @then('set fault inject delay {delay} ms') @when('set fault inject delay {delay} ms') def impl(context, delay): - os.environ['GPMGMT_FAULT_DELAY_MS'] = delay + os.environ[fault_injection.GPMGMT_FAULT_DELAY_MS] = delay + +@given('set fault inject type to suspend') +@then('set fault inject type to suspend') +@when('set fault inject type to suspend') +def impl(context): + os.environ[fault_injection.GPMGMT_FAULT_TYPE] = fault_injection.GPMGMT_FAULT_TYPE_SYSPEND + context.fault_flag_filename = "/tmp/ggrebalance_fault_suspend_flag" + with open(context.fault_flag_filename, "w"): + pass + os.environ[fault_injection.GPMGMT_FAULT_FILE_FLAG] = context.fault_flag_filename @given('unset fault inject delay') @then('unset fault inject delay') @when('unset fault inject delay') def impl(context): - os.environ['GPMGMT_FAULT_DELAY_MS'] = "" + os.environ[fault_injection.GPMGMT_FAULT_DELAY_MS] = "" @given('stub') def impl(context): From ad6430e1c00acbf32abc6d058ca624ef3a64b972 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 18 Feb 2026 13:04:02 +1000 Subject: [PATCH 11/22] Cosmetic changes --- .../mgmt_utils/ggrebalance_shrink.feature | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index d5502d204263..e869f07864ef 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -39,7 +39,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -96,7 +96,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -167,7 +167,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 1, row count = 1094 @@ -216,7 +216,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 - Then the numsegments of table "ext_test" is 2 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -255,7 +255,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 - Then the numsegments of table "ext_test" is 2 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -320,7 +320,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -372,7 +372,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 - Then the numsegments of table "ext_test" is 2 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 2, row count = 200 @@ -438,7 +438,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -487,7 +487,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 - Then the numsegments of table "ext_test" is 2 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -559,7 +559,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 - Then the numsegments of table "ext_test" is 2 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -616,7 +616,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 - Then the numsegments of table "ext_test" is 2 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 2, row count = 200 @@ -693,7 +693,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 Scenario: test 5. test shrink continue, when a table planned for rebalance was dropped Given the database is not running @@ -730,7 +730,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 Scenario: test 5.1. test shrink continue, when a mat view planned for rebalance was dropped @@ -774,7 +774,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 @@ -813,7 +813,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - Then the numsegments of table "ext_test" is 1 + And the numsegments of table "ext_test" is 1 Scenario: test 7. test shrink, when a table, planned for rebalance, is dropped in a parallel transaction, committed after the start of table redistribution Given the database is not running From 0c4487daeddcb2ab0c4d18155c245453483e91c5 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 18 Feb 2026 15:21:56 +1000 Subject: [PATCH 12/22] Fix tests --- gpMgmt/test/behave/mgmt_utils/environment.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gpMgmt/test/behave/mgmt_utils/environment.py b/gpMgmt/test/behave/mgmt_utils/environment.py index 90e1f06cd948..7722901d3988 100644 --- a/gpMgmt/test/behave/mgmt_utils/environment.py +++ b/gpMgmt/test/behave/mgmt_utils/environment.py @@ -123,6 +123,8 @@ def before_scenario(context, scenario): if 'gpssh-exkeys' in context.feature.tags: context.gpssh_exkeys_context = GpsshExkeysMgmtContext(context) + context.fault_flag_filename = "" + tags_to_skip = ['gpexpand', 'gpaddmirrors', 'gpstate', 'gpmovemirrors', 'gpconfig', 'gpssh-exkeys', 'gpstop', 'gpinitsystem', 'cross_subnet', 'gplogfilter', 'ggrebalance_basics', 'ggrebalance_shrink', 'ggrebalance_rebalance', 'ggrebalance_misc_options'] From 6fec958ccf17e92563f792dff4a79be5f9f0e72a Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 18 Feb 2026 15:24:50 +1000 Subject: [PATCH 13/22] Remove redundant test --- .../mgmt_utils/ggrebalance_shrink.feature | 51 ++----------------- 1 file changed, 4 insertions(+), 47 deletions(-) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index e869f07864ef..83d122914152 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -652,50 +652,7 @@ Feature: ggrebalance behave tests | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_DROP_SCHEMA_START_begin | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | - Scenario: test 4. shrink - check different table types - Given the database is not running - And a working directory of the test as '/data/gpdata/ggrebalance' - And a cluster is created with mirrors on "cdw" and "sdw1" - And segment information for content 1 is saved in context - And all files in gpAdminLogs directory are deleted - And database "test_db_1" exists - And schema "test_schema_1" exists in "test_db_1" - And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows - And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows - And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows - And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows - And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows - And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" - And a long-run session starts - And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session - And database "gptest" exists - And the user create a writable external table with name "ext_test" - And set fault inject "on_enter_STATE_PREPARE_SHRINK_SCHEMA_STARTED_end" - When the user runs "ggrebalance -x 1 --parallel 1 --batch-size 1 --skip-rebalance" - Then ggrebalance should return a return code of 1 - And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp - And unset fault inject - And a long-run session ends - When execute following sql in db "postgres" and store result in the context - """ - select count(1) as temp_tables_for_redistribute from ggrebalance.table_rebalance_status_detail where schema_name LIKE 'pg\_temp\_%'; - """ - Then validate that following rows are in the stored rows - | temp_tables_for_redistribute | - | 0 | - When the user runs "ggrebalance --parallel 1 --batch-size 1" - Then ggrebalance should return a return code of 0 - And ggrebalance should print "Shrink is complete" to logfile with latest timestamp - And verify no segment running for saved segment information - And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 - And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 - And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 - And the numsegments of table "ext_test" is 1 - - Scenario: test 5. test shrink continue, when a table planned for rebalance was dropped + Scenario: test 4. test shrink continue, when a table planned for rebalance was dropped Given the database is not running And a working directory of the test as '/data/gpdata/ggrebalance' And a cluster is created with mirrors on "cdw" and "sdw1" @@ -733,7 +690,7 @@ Feature: ggrebalance behave tests And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 - Scenario: test 5.1. test shrink continue, when a mat view planned for rebalance was dropped + Scenario: test 4.1. test shrink continue, when a mat view planned for rebalance was dropped Given the database is not running And a working directory of the test as '/data/gpdata/ggrebalance' And a cluster is created with mirrors on "cdw" and "sdw1" @@ -778,7 +735,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 - Scenario: test 6. test shrink continue, when a db with the table planned for rebalance was dropped + Scenario: test 5. test shrink continue, when a db with the table planned for rebalance was dropped Given the database is not running And a working directory of the test as '/data/gpdata/ggrebalance' And a cluster is created with mirrors on "cdw" and "sdw1" @@ -815,7 +772,7 @@ Feature: ggrebalance behave tests And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And the numsegments of table "ext_test" is 1 - Scenario: test 7. test shrink, when a table, planned for rebalance, is dropped in a parallel transaction, committed after the start of table redistribution + Scenario: test 6. test shrink, when a table, planned for rebalance, is dropped in a parallel transaction, committed after the start of table redistribution Given the database is not running And a working directory of the test as '/data/gpdata/ggrebalance' And a cluster is created with mirrors on "cdw" and "sdw1" From 35bab85d8bb1fa46ae2f2afb81096828c398f4ca Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 18 Feb 2026 15:37:17 +1000 Subject: [PATCH 14/22] Cosmetic changes --- gpMgmt/bin/gprebalance_modules/shrink.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index ee51bf458bd0..b0db407cf928 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -444,7 +444,8 @@ def on_enter_STATE_SHRINK_SEGMENTS_STOP_STARTED(self) -> None: segments_to_stop = gp_array.get_segment_count() - self.shrink_plan.getTargetSegmentCount() self.workers_for_segment_stop = WorkerPool(numWorkers=min(segments_to_stop, self.options.batch_size)) - # Stop primaries first, and mirrors after primaries + # Stop primaries first, and mirrors after primaries, + # to avoid hanging replication processes seg_roles = [gparray.ROLE_PRIMARY, gparray.ROLE_MIRROR] for seg_role in seg_roles: self.logger.info(f"Prepare to stop segments with role '{seg_role}'") @@ -586,6 +587,8 @@ def run(self) -> None: self.shrink.logger.info(f'Stopped shrinked segment {str(self.segment)}') class TableRebalanceTask(SQLCommand): + STATUS_MAT_VIEW_REFRESH_REQUIRED = 'mv_refresh_required' + def __init__(self, shrink: 'GGShrink', db_name: str, @@ -644,7 +647,7 @@ def process_table(self, attempt: int) -> None: self.shrink.logger.info(f'''Table "{self.db_name}"."{self.schema_name}"."{self.rel_name}" doesn't exist, skipping actual rebalance''') if self.rel_kind == 'm' and table_exists: - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, 'mv_refresh_required') + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.STATUS_MAT_VIEW_REFRESH_REQUIRED) else: self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) dbconn.execSQL(conn, 'COMMIT') @@ -654,7 +657,10 @@ def process_table(self, attempt: int) -> None: self.set_results(CommandResult(0, b'', b'', True, False)) def run(self) -> None: - # give 2 attempts to process a table. + # Give 2 attempts to process a table. It is needed, when, for example, + # other session opens a transaction after we have created the rebalance table + # list, drops the table before we started to rebalance it, and commits the + # transaction when we've started to rebalance the table. attempt_max_cnt = 2 for i in range(attempt_max_cnt): attempt = i + 1 @@ -718,7 +724,7 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: # for the case we re-enter this state after we were interrupted right after it self.rebalance_schema.clearTablesToRebalanceWithStatus(status) if is_rollback: - self.rebalance_schema.clearTablesToRebalanceWithStatus('mv_refresh_required') + self.rebalance_schema.clearTablesToRebalanceWithStatus(self.TableRebalanceTask.STATUS_MAT_VIEW_REFRESH_REQUIRED) cursor = dbconn.query(self.conn, 'SELECT datname FROM pg_database') databases_to_process = [] @@ -776,7 +782,9 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm self.workers_for_tables_rebalance = None - cursor = self.rebalance_schema.getTablesToRebalanceWithStatus('mv_refresh_required') + # Process refresh of mat views separately from table rebalancing, + # as doing it in parallel may provide not full data refresh. + cursor = self.rebalance_schema.getTablesToRebalanceWithStatus(self.TableRebalanceTask.STATUS_MAT_VIEW_REFRESH_REQUIRED) self.logger.info(f'Materialized views to refresh: {cursor.rowcount}') From af75169f55e993fb6ae8027d745474cef695e84a Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 20 Feb 2026 09:23:28 +1000 Subject: [PATCH 15/22] Use CTAS approach for rebalancing the materialized view Problem description: Before this patch, in order to rebalance a materialized view, 2 steps were required: the actual rebalance where distribution policy was updated, and the refresh step to update the data in the materialized view. This approach had 2 problems with respect to usage in 'ggrebalance' tool for cluster shrink: 1. It could change the actual data in the materialized view before the cluster shrink, and after the shrink, if the view was not up-to-date. We intend to keep the logical data in the cluster not altered. 2. If a materialized view depends on another materialized view, there could be a race condition when doing the refresh, when we try to refresh based on the yet-not-refreshed one. Fix: Use the CTAS approach from the EXPAND TABLE specifically when we are rebalancing a materialized view. It creates a temp table with a correct distribution policy, where all data from the materialized view is copied, and then the relfilenode of the materialized view is swapped with the temp table. It keeps the data as it was before the rebalance, even if it was not up-to-date (therefore we will not surprise the user with the not expected view content), and it eliminates dependencies on other objects besides the materialized view itself. (cherry picked from commit 37dc7e74afa8d73e3164a7d8d85f7f06fed3a09a) --- src/backend/commands/tablecmds.c | 29 +++++++++++++------ src/test/regress/expected/alter_rebalance.out | 4 --- src/test/regress/sql/alter_rebalance.sql | 2 -- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 224b1c235c22..e4d7707b496f 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -513,7 +513,9 @@ static void ATExecRebalanceTable(List **wqueue, Relation rel, AlterTableCmd *cmd static void ATRepackTable(Relation origTable, AlteredTableInfo *tab); static void ATExecExpandPartitionTablePrepare(Relation rel); -static void ATExecExpandTableCTAS(AlterTableCmd *rootCmd, Relation rel, AlterTableCmd *cmd); +static void ATExecRebalanceTableCTAS(AlterTableCmd *rootCmd, + Relation rel, AlterTableCmd *cmd, + int targetNumSegments); static void ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd); @@ -17513,7 +17515,7 @@ ATExecExpandTable(List **wqueue, Relation rel, AlterTableCmd *cmd) } else { - ATExecExpandTableCTAS(rootCmd, rel, cmd); + ATExecRebalanceTableCTAS(rootCmd, rel, cmd, 0); } /* Update numsegments to cluster size */ @@ -17651,11 +17653,6 @@ ATExecRebalanceTable(List **wqueue, Relation rel, AlterTableCmd *cmd) * child partitions. */ } - else if (rel->rd_rel->relkind == RELKIND_MATVIEW) - { - ereport((Gp_role == GP_ROLE_EXECUTE) ? DEBUG1 : NOTICE, - (errmsg("Materialized view requires REFRESH after rebalance"))); - } else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { if (rel_is_external_table(relid)) @@ -17677,6 +17674,18 @@ ATExecRebalanceTable(List **wqueue, Relation rel, AlterTableCmd *cmd) return; } } + else if (rel->rd_rel->relkind == RELKIND_MATVIEW) + { + /* + * We can't insert data directly to an existing matview, + * therefore the approach from ATExecShrinkTable() is not suitable, + * and we use CTAS method for matviews. + */ + AlteredTableInfo *tab = linitial(*wqueue); + AlterTableCmd *rootCmd = + (AlterTableCmd *)linitial(tab->subcmds[AT_PASS_MISC]); + ATExecRebalanceTableCTAS(rootCmd, rel, cmd, targetNumSegments); + } else { ATExecShrinkTable(rel, newPolicy); @@ -17778,7 +17787,8 @@ ATExecExpandPartitionTablePrepare(Relation rel) } static void -ATExecExpandTableCTAS(AlterTableCmd *rootCmd, Relation rel, AlterTableCmd *cmd) +ATExecRebalanceTableCTAS(AlterTableCmd *rootCmd, Relation rel, + AlterTableCmd *cmd, int targetNumSegments) { RangeVar *tmprv; Oid tmprelid; @@ -17817,7 +17827,8 @@ ATExecExpandTableCTAS(AlterTableCmd *rootCmd, Relation rel, AlterTableCmd *cmd) /* Step (b) - build CTAS */ distby = make_distributedby_for_rel(rel); - distby->numsegments = getgpsegmentCount(); + distby->numsegments = + (targetNumSegments > 0) ? targetNumSegments : getgpsegmentCount(); queryDesc = build_ctas_with_dist(rel, distby, untransformRelOptions(get_rel_opts(rel)), diff --git a/src/test/regress/expected/alter_rebalance.out b/src/test/regress/expected/alter_rebalance.out index 0f1e88d1d6ee..24a6a1c892a2 100644 --- a/src/test/regress/expected/alter_rebalance.out +++ b/src/test/regress/expected/alter_rebalance.out @@ -3561,8 +3561,6 @@ insert into test_table select generate_series(1, 10); create materialized view mv_test_table as select a from test_table distributed by (a); alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; -NOTICE: Materialized view requires REFRESH after rebalance -refresh materialized view mv_test_table; select count(1), gp_segment_id from test_table group by gp_segment_id; count | gp_segment_id -------+--------------- @@ -3600,8 +3598,6 @@ select count(1), gp_segment_id from mv_test_table group by gp_segment_id order b begin; alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; -NOTICE: Materialized view requires REFRESH after rebalance -refresh materialized view mv_test_table; rollback; select count(1), gp_segment_id from test_table group by gp_segment_id order by gp_segment_id; count | gp_segment_id diff --git a/src/test/regress/sql/alter_rebalance.sql b/src/test/regress/sql/alter_rebalance.sql index f11c3b3ddb19..09c9555f5b06 100644 --- a/src/test/regress/sql/alter_rebalance.sql +++ b/src/test/regress/sql/alter_rebalance.sql @@ -463,7 +463,6 @@ create materialized view mv_test_table as select a from test_table distributed b alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; -refresh materialized view mv_test_table; select count(1), gp_segment_id from test_table group by gp_segment_id; select count(1), gp_segment_id from mv_test_table group by gp_segment_id; @@ -483,7 +482,6 @@ select count(1), gp_segment_id from mv_test_table group by gp_segment_id order b begin; alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; -refresh materialized view mv_test_table; rollback; select count(1), gp_segment_id from test_table group by gp_segment_id order by gp_segment_id; From cabac887c910a8e97ed0a7022b3d96bf09009cbd Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 20 Feb 2026 10:23:32 +1000 Subject: [PATCH 16/22] Remove refresh step for MV handling --- .../gprebalance_modules/rebalance_schema.py | 8 +- gpMgmt/bin/gprebalance_modules/shrink.py | 86 +------------------ .../mgmt_utils/ggrebalance_shrink.feature | 12 --- 3 files changed, 8 insertions(+), 98 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/rebalance_schema.py b/gpMgmt/bin/gprebalance_modules/rebalance_schema.py index 9198613036dd..a7bdbdf35814 100644 --- a/gpMgmt/bin/gprebalance_modules/rebalance_schema.py +++ b/gpMgmt/bin/gprebalance_modules/rebalance_schema.py @@ -38,7 +38,7 @@ def createSchema(self, plan: Plan) -> None: DISTRIBUTED REPLICATED''') dbconn.execSQL(self.conn, f'''CREATE TABLE {self.schema_name}.{self.table_rebalance_status_detail} - (db_name TEXT, schema_name TEXT, rel_name TEXT, rel_kind CHAR, status TEXT, + (db_name TEXT, schema_name TEXT, rel_name TEXT, status TEXT, CONSTRAINT unique_fqn UNIQUE (db_name, schema_name, rel_name)) DISTRIBUTED REPLICATED''') dbconn.execSQL(self.conn, @@ -133,10 +133,10 @@ def clearTablesToRebalanceWithStatus(self, status: str) -> None: f'''DELETE FROM {self.schema_name}.{self.table_rebalance_status_detail} WHERE (status = '{status}')''') - def addTableToRebalance(self, db: str, schema_name: str, rel_name: str, rel_kind: str, status: str) -> None: + def addTableToRebalance(self, db: str, schema_name: str, rel_name: str, status: str) -> None: dbconn.execSQL(self.conn, f'''INSERT INTO {self.schema_name}.{self.table_rebalance_status_detail} - VALUES ('{db}', '{schema_name}', '{rel_name}', '{rel_kind}', '{status}')''') + VALUES ('{db}', '{schema_name}', '{rel_name}', '{status}')''') def setStatusForTableToRebalance(self, db: str, schema_name: str, rel_name: str, status: str) -> None: dbconn.execSQL(self.conn, @@ -144,7 +144,7 @@ def setStatusForTableToRebalance(self, db: str, schema_name: str, rel_name: str, WHERE db_name = '{db}' AND schema_name = '{schema_name}' AND rel_name = '{rel_name}';''') def getTablesToRebalanceWithStatus(self, status: str) -> cursor: - return dbconn.query(self.conn, f"""SELECT db_name, schema_name, rel_name, rel_kind FROM + return dbconn.query(self.conn, f"""SELECT db_name, schema_name, rel_name FROM {self.schema_name}.{self.table_rebalance_status_detail} WHERE status = '{status}'""") def saveExecutionSteps(self, steps: List[RebalanceStep]) -> None: diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index b0db407cf928..a385ebc7c777 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -587,21 +587,17 @@ def run(self) -> None: self.shrink.logger.info(f'Stopped shrinked segment {str(self.segment)}') class TableRebalanceTask(SQLCommand): - STATUS_MAT_VIEW_REFRESH_REQUIRED = 'mv_refresh_required' - def __init__(self, shrink: 'GGShrink', db_name: str, schema_name: str, rel_name: str, - rel_kind: str, target_segment_count: int, table_status_after_rebalance: str) -> None: self.shrink = shrink self.db_name = db_name self.schema_name = schema_name self.rel_name = rel_name - self.rel_kind = rel_kind self.target_segment_count = target_segment_count self.table_status_after_rebalance = table_status_after_rebalance super().__init__(f'task rebalance for {self.db_name}.{self.schema_name}.{self.rel_name}') @@ -640,16 +636,13 @@ def process_table(self, attempt: int) -> None: dbconn.execSQL(conn, f'''ALTER TABLE "{self.schema_name}"."{self.rel_name}" REBALANCE {self.target_segment_count}''') - if self.rel_kind != 'm' and self.shrink.options.analyze: + if self.shrink.options.analyze: dbconn.execSQL(conn, f'''ANALYZE "{self.schema_name}"."{self.rel_name}"''') else: self.shrink.logger.info(f'''Table "{self.db_name}"."{self.schema_name}"."{self.rel_name}" doesn't exist, skipping actual rebalance''') - if self.rel_kind == 'm' and table_exists: - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.STATUS_MAT_VIEW_REFRESH_REQUIRED) - else: - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) dbconn.execSQL(conn, 'COMMIT') else: self.shrink.logger.info(f'''DB "{self.db_name}" doesn't exist, skipping actual rebalance for "{self.schema_name}"."{self.rel_name}"''') @@ -675,45 +668,6 @@ def run(self) -> None: continue break - class MatViewRefreshTask(TableRebalanceTask): - def __init__(self, - shrink: 'GGShrink', - db_name: str, - schema_name: str, - rel_name: str, - target_segment_count: int, - table_status_after_rebalance: str) -> None: - super().__init__(shrink, db_name, schema_name, rel_name, 'm', target_segment_count, table_status_after_rebalance) - - # decorator to inject a fault before running MatViewRefreshTask for a specific {db_name, schema_name, rel_name} - def wrap_refresh_matview_with_faults(fun): - def func_with_faults(self, attempt: int): - inject_fault(f'fault_refresh_matview_{self.db_name}.{self.schema_name}.{self.rel_name}') - fun(self, attempt) - return func_with_faults - - @wrap_refresh_matview_with_faults - def process_table(self, attempt: int) -> None: - self.shrink.logger.info(f'Start matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments (attempt {attempt})') - if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): - dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) - with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: - dbconn.execSQL(conn, 'BEGIN') - if self.table_exists(conn, self.schema_name, self.rel_name): - dbconn.execSQL(conn, f'REFRESH MATERIALIZED VIEW "{self.schema_name}"."{self.rel_name}"') - if self.shrink.options.analyze: - dbconn.execSQL(conn, - f'''ANALYZE "{self.schema_name}"."{self.rel_name}"''') - else: - self.shrink.logger.info(f'''Materialized view "{self.db_name}"."{self.schema_name}"."{self.rel_name}" doesn't exist, skipping actual REFRESH''') - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) - dbconn.execSQL(conn, 'COMMIT') - else: - self.shrink.logger.info(f'''DB "{self.db_name}" doesn't exist, skipping actual REFRESH for "{self.schema_name}"."{self.rel_name}"''') - self.shrink.logger.info(f'Complete matview refresh for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') - self.set_results(CommandResult(0, b'', b'', True, False)) - - def prepare_shrink_schema(self, is_rollback: bool) -> None: status = 'done' if is_rollback else 'none' cmp = '<=' if is_rollback else '>' @@ -723,8 +677,6 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: # cleanup list of tables that require rebalance # for the case we re-enter this state after we were interrupted right after it self.rebalance_schema.clearTablesToRebalanceWithStatus(status) - if is_rollback: - self.rebalance_schema.clearTablesToRebalanceWithStatus(self.TableRebalanceTask.STATUS_MAT_VIEW_REFRESH_REQUIRED) cursor = dbconn.query(self.conn, 'SELECT datname FROM pg_database') databases_to_process = [] @@ -749,7 +701,7 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: for schema_name, rel_name, rel_kind, external_writable in cursor: if rel_kind == 'f' and not external_writable: continue - self.rebalance_schema.addTableToRebalance(db, schema_name, rel_name, rel_kind, status) + self.rebalance_schema.addTableToRebalance(db, schema_name, rel_name, status) dbconn.execSQL(self.conn, 'COMMIT') @@ -761,12 +713,11 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm if cursor.rowcount > 0: self.workers_for_tables_rebalance = WorkerPool(numWorkers=min(cursor.rowcount, self.options.parallel)) - for db_name, schema_name, rel_name, rel_kind in cursor: + for db_name, schema_name, rel_name in cursor: task = self.TableRebalanceTask(self, db_name, schema_name, rel_name, - rel_kind, target_segment_count, target_status) self.workers_for_tables_rebalance.addCommand(task) @@ -782,35 +733,6 @@ def rebalance_tables(self, original_status: str, target_status: str, target_segm self.workers_for_tables_rebalance = None - # Process refresh of mat views separately from table rebalancing, - # as doing it in parallel may provide not full data refresh. - cursor = self.rebalance_schema.getTablesToRebalanceWithStatus(self.TableRebalanceTask.STATUS_MAT_VIEW_REFRESH_REQUIRED) - - self.logger.info(f'Materialized views to refresh: {cursor.rowcount}') - - if cursor.rowcount > 0: - self.workers_for_tables_rebalance = WorkerPool(numWorkers=min(cursor.rowcount, self.options.parallel)) - - for db_name, schema_name, rel_name, rel_kind in cursor: - task = self.MatViewRefreshTask(self, - db_name, - schema_name, - rel_name, - target_segment_count, - target_status) - self.workers_for_tables_rebalance.addCommand(task) - - print_progress(self.workers_for_tables_rebalance, interval=1) - - self.workers_for_tables_rebalance.haltWork() - self.workers_for_tables_rebalance.joinWorkers() - - for task in self.workers_for_tables_rebalance.getCompletedItems(): - if not task.was_successful(): - raise Exception(f'Failed to do REFRESH for a materialized view: {task.get_results().stderr}') - - self.workers_for_tables_rebalance = None - def state_can_rollback(self, state: str) -> bool: if (state in self.states_main_shrink_flow): if self.states_main_shrink_flow.index(state) <= self.states_main_shrink_flow.index('STATE_SHRINK_TABLES_DONE'): diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 83d122914152..0376d31bf64e 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -126,7 +126,6 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_SEGMENTS_STOP_STARTED_begin | | on_enter_STATE_SHRINK_SEGMENTS_STOP_STARTED_end | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | - | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | | fault_segment_stop_dbid_3 | Scenario Outline: test 1.3. test shrink continue after cluster restart @@ -179,7 +178,6 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_TABLES_DONE_end | | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | | on_enter_STATE_SHRINK_CATALOG_STARTED_end | - | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario: test 2.1. shrink - check rollback after interrupted state, if interruption is done before the rebalance schema creation Given the database is not running @@ -280,7 +278,6 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_TABLES_DONE_end | | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | - | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario Outline: test 2.3. shrink - check rollback after interrupted state (interruption is done after the point of no return). Rollback fails. So just continue shrink. Given the database is not running @@ -396,7 +393,6 @@ Feature: ggrebalance behave tests | on_enter_STATE_SHRINK_TABLES_DONE_end | | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | - | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario Outline: test 3.1. shrink - check continue after interrupted rollback state. In this case we fail in rollback too early, and normal shrink will be complete. Given the database is not running @@ -650,7 +646,6 @@ Feature: ggrebalance behave tests | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_SHRINKED_TABLES_DONE_begin | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_SHRINKED_TABLES_DONE_end | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_DROP_SCHEMA_START_begin | - | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_1 | Scenario: test 4. test shrink continue, when a table planned for rebalance was dropped Given the database is not running @@ -704,7 +699,6 @@ Feature: ggrebalance behave tests And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" - And a materialized view "test_schema_1.mv_test_table_2" exists on table "test_schema_1.test_table_1" And database "gptest" exists And the user create a writable external table with name "ext_test" And database "test_db_2" exists @@ -717,12 +711,6 @@ Feature: ggrebalance behave tests And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp And unset fault inject And materialized view "test_schema_1.mv_test_table_1" is dropped in "test_db_1" - When set fault inject "fault_refresh_matview_test_db_1.test_schema_1.mv_test_table_2" - And the user runs "ggrebalance" - Then ggrebalance should return a return code of 1 - And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp - And unset fault inject - And materialized view "test_schema_1.mv_test_table_2" is dropped in "test_db_1" When the user runs "ggrebalance" Then ggrebalance should return a return code of 0 And ggrebalance should print "Shrink is complete" to logfile with latest timestamp From aa00ef247b76e2542e906495e72ec444ff5ba1c5 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 20 Feb 2026 10:29:47 +1000 Subject: [PATCH 17/22] Reduce delta --- gpMgmt/bin/gprebalance_modules/shrink.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index a385ebc7c777..48798267ed6c 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -600,7 +600,7 @@ def __init__(self, self.rel_name = rel_name self.target_segment_count = target_segment_count self.table_status_after_rebalance = table_status_after_rebalance - super().__init__(f'task rebalance for {self.db_name}.{self.schema_name}.{self.rel_name}') + SQLCommand.__init__(self, f'task rebalance for {self.db_name}.{self.schema_name}.{self.rel_name}') # decorator to inject a fault before running TableRebalanceTask for a specific {db_name, schema_name, rel_name} def wrap_table_rebalance_with_faults(fun): @@ -708,7 +708,7 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: def rebalance_tables(self, original_status: str, target_status: str, target_segment_count: int) -> None: cursor = self.rebalance_schema.getTablesToRebalanceWithStatus(original_status) - self.logger.info(f'Tables to rebalance: {cursor.rowcount}') + self.logger.info(f'Tables to process {cursor.rowcount}') if cursor.rowcount > 0: self.workers_for_tables_rebalance = WorkerPool(numWorkers=min(cursor.rowcount, self.options.parallel)) From 793a96375505918a56598d8ac87d080a16438be9 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 20 Feb 2026 11:59:57 +1000 Subject: [PATCH 18/22] Use existing test steps instead of new ones --- .../mgmt_utils/ggrebalance_shrink.feature | 14 +++++----- .../behave/mgmt_utils/steps/mgmt_utils.py | 26 ++++--------------- 2 files changed, 12 insertions(+), 28 deletions(-) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 0376d31bf64e..f5735e03657d 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -60,8 +60,8 @@ Feature: ggrebalance behave tests And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" - And a long-run session starts - And sql "CREATE TEMP TABLE temp_table(a int)" is executed in a long-run session + And the user connects to "gptest" with named connection "test_connection" + And the user executes "CREATE TEMP TABLE temp_table(a int);" with named connection "test_connection" And database "gptest" exists And the user create a writable external table with name "ext_test" And database "test_db_2" exists @@ -72,7 +72,7 @@ Feature: ggrebalance behave tests Then ggrebalance should return a return code of 1 And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp And unset fault inject - And a long-run session ends + And the user drops the named connection "test_connection" When execute following sql in db "postgres" and store result in the context """ select count(1) as temp_tables_for_redistribute from ggrebalance.table_rebalance_status_detail where schema_name LIKE 'pg\_temp\_%'; @@ -785,13 +785,13 @@ Feature: ggrebalance behave tests And set fault inject type to suspend When the user asynchronously runs "ggrebalance -x 1 --skip-rebalance" and the process is saved And the user waits till ggrebalance prints "Updated target segment count to 1" in the logs - And a long-run session starts - And sql "BEGIN; DROP TABLE test_table_1;" is executed in a long-run session + And the user connects to "gptest" with named connection "test_connection" + And the user executes "BEGIN; DROP TABLE test_table_1;" with named connection "test_connection" And unset fault inject And the user waits till ggrebalance prints "Start table rebalance for \"gptest\".\"public\".\"test_table_1\" to 1 segments" in the logs And waiting "5" seconds - And sql "COMMIT;" is executed in a long-run session - And a long-run session ends + And the user executes "COMMIT;" with named connection "test_connection" + And the user drops the named connection "test_connection" Then the async process finished with a return code of 0 And ggrebalance should print "Shrink is complete" to logfile with latest timestamp And verify no segment running for saved segment information diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 5939e4663efb..c6bd07eb2c01 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -157,6 +157,8 @@ def impl(context, query, db, contentids): @given('the user connects to "{dbname}" with named connection "{cname}"') +@when('the user connects to "{dbname}" with named connection "{cname}"') +@then('the user connects to "{dbname}" with named connection "{cname}"') def impl(context, dbname, cname): if not hasattr(context, 'named_conns'): context.named_conns = {} @@ -198,11 +200,14 @@ def impl(conetxt, tabname): @given('the user executes "{sql}" with named connection "{cname}"') +@when('the user executes "{sql}" with named connection "{cname}"') +@then('the user executes "{sql}" with named connection "{cname}"') def impl(context, cname, sql): conn = context.named_conns[cname] dbconn.execSQL(conn, sql) +@when('the user drops the named connection "{cname}"') @then('the user drops the named connection "{cname}"') def impl(context, cname): if cname in context.named_conns: @@ -1501,27 +1506,6 @@ def stop_segments_immediate(context, where_clause): def impl(context): wait_for_unblocked_transactions(context, 600) -@given('a long-run session starts') -@when('a long-run session starts') -@then('a long-run session starts') -def impl(context): - dbname = 'gptest' - context.long_run_conn = dbconn.connect(dbconn.DbURL(dbname=dbname), unsetSearchPath=False) - -@given('a long-run session ends') -@when('a long-run session ends') -@then('a long-run session ends') -def impl(context): - if context.long_run_conn != None: - context.long_run_conn.close() - context.long_run_conn = None - -@given('sql "{sql}" is executed in a long-run session') -@when('sql "{sql}" is executed in a long-run session') -@then('sql "{sql}" is executed in a long-run session') -def impl(context, sql): - dbconn.execSQL(context.long_run_conn, sql) - @given('below sql is executed in "{dbname}" db') @when('below sql is executed in "{dbname}" db') def impl(context, dbname): From 57e45a9a17948b97b9bc9893019198f91c7b6984 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 20 Feb 2026 14:39:13 +1000 Subject: [PATCH 19/22] Add timeout into wait for logs step --- .../mgmt_utils/ggrebalance_shrink.feature | 4 +-- .../behave/mgmt_utils/steps/mgmt_utils.py | 33 ++++++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index f5735e03657d..48241783d44f 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -784,11 +784,11 @@ Feature: ggrebalance behave tests And set fault inject "on_enter_STATE_PREPARE_SHRINK_SCHEMA_STARTED_begin" And set fault inject type to suspend When the user asynchronously runs "ggrebalance -x 1 --skip-rebalance" and the process is saved - And the user waits till ggrebalance prints "Updated target segment count to 1" in the logs + And the user waits till ggrebalance prints "Updated target segment count to 1" in the logs (with timeout of "60" sec) And the user connects to "gptest" with named connection "test_connection" And the user executes "BEGIN; DROP TABLE test_table_1;" with named connection "test_connection" And unset fault inject - And the user waits till ggrebalance prints "Start table rebalance for \"gptest\".\"public\".\"test_table_1\" to 1 segments" in the logs + And the user waits till ggrebalance prints "Start table rebalance for \"gptest\".\"public\".\"test_table_1\" to 1 segments" in the logs (with timeout of "60" sec) And waiting "5" seconds And the user executes "COMMIT;" with named connection "test_connection" And the user drops the named connection "test_connection" diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index c6bd07eb2c01..c317c636ffa3 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -666,15 +666,30 @@ def impl(context, kill_process_name, log_msg, logfile_name): "fi; done" % (log_msg, logfile_name, kill_process_name) run_async_command(context, command) -@given('the user waits till {process_name} prints "{log_msg}" in the logs') -@when('the user waits till {process_name} prints "{log_msg}" in the logs') -@then('the user waits till {process_name} prints "{log_msg}" in the logs') -def impl(context, process_name, log_msg): - command = "while sleep 0.1; " \ - "do if grep -E --quiet %s ~/gpAdminLogs/%s*log ; " \ - "then break 2; " \ - "fi; done" % (log_msg, process_name) - run_cmd(command) +@given('the user waits till {process_name} prints "{log_msg}" in the logs (with timeout of "{timeout}" sec)') +@when('the user waits till {process_name} prints "{log_msg}" in the logs (with timeout of "{timeout}" sec)') +@then('the user waits till {process_name} prints "{log_msg}" in the logs (with timeout of "{timeout}" sec)') +def impl(context, process_name, log_msg, timeout): + poll_period = 0.1 + max_iteration_cnt = int(int(timeout) / poll_period) + command = f""" + ITERATION=0 + MAX_ITERATION_CNT={max_iteration_cnt} + while sleep {poll_period}; do + if grep -E --quiet '{log_msg}' ~/gpAdminLogs/{process_name}*log ; + then break 2; + fi; + + ITERATION=$((ITERATION + 1)) + if [ $ITERATION -ge $MAX_ITERATION_CNT ]; then + echo "Timeout after {timeout} seconds waiting for '{log_msg}' in {process_name} logs" + exit 1 + fi + done + """ + rc, _, error = run_cmd(command) + if rc: + raise Exception(error) @given('the user asynchronously sets up to end {process_name} process with {signal_name}') @when('the user asynchronously sets up to end {process_name} process with {signal_name}') From cd8394d254009b1df719eb125fd4fe34a1e4679d Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Fri, 20 Feb 2026 14:52:26 +1000 Subject: [PATCH 20/22] Reduce delta accross files --- gpMgmt/test/behave/mgmt_utils/environment.py | 2 -- gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/gpMgmt/test/behave/mgmt_utils/environment.py b/gpMgmt/test/behave/mgmt_utils/environment.py index 7722901d3988..90e1f06cd948 100644 --- a/gpMgmt/test/behave/mgmt_utils/environment.py +++ b/gpMgmt/test/behave/mgmt_utils/environment.py @@ -123,8 +123,6 @@ def before_scenario(context, scenario): if 'gpssh-exkeys' in context.feature.tags: context.gpssh_exkeys_context = GpsshExkeysMgmtContext(context) - context.fault_flag_filename = "" - tags_to_skip = ['gpexpand', 'gpaddmirrors', 'gpstate', 'gpmovemirrors', 'gpconfig', 'gpssh-exkeys', 'gpstop', 'gpinitsystem', 'cross_subnet', 'gplogfilter', 'ggrebalance_basics', 'ggrebalance_shrink', 'ggrebalance_rebalance', 'ggrebalance_misc_options'] diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index c317c636ffa3..7373c2c3ef5e 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -4582,7 +4582,7 @@ def impl(context): os.environ[fault_injection.GPMGMT_FAULT_POINT] = "" os.environ[fault_injection.GPMGMT_FAULT_TYPE] = "" os.environ[fault_injection.GPMGMT_FAULT_FILE_FLAG] = "" - if os.path.exists(context.fault_flag_filename): + if hasattr(context, 'fault_flag_filename') and os.path.exists(context.fault_flag_filename): os.remove(context.fault_flag_filename) @given('set fault inject delay {delay} ms') From 08481d6b20918a7521b93898c3a9edd5ec2369e2 Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Tue, 24 Feb 2026 21:50:43 +1000 Subject: [PATCH 21/22] Revert "Use CTAS approach for rebalancing the materialized view" This reverts commit af75169f55e993fb6ae8027d745474cef695e84a. --- src/backend/commands/tablecmds.c | 29 ++++++------------- src/test/regress/expected/alter_rebalance.out | 4 +++ src/test/regress/sql/alter_rebalance.sql | 2 ++ 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index e4d7707b496f..224b1c235c22 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -513,9 +513,7 @@ static void ATExecRebalanceTable(List **wqueue, Relation rel, AlterTableCmd *cmd static void ATRepackTable(Relation origTable, AlteredTableInfo *tab); static void ATExecExpandPartitionTablePrepare(Relation rel); -static void ATExecRebalanceTableCTAS(AlterTableCmd *rootCmd, - Relation rel, AlterTableCmd *cmd, - int targetNumSegments); +static void ATExecExpandTableCTAS(AlterTableCmd *rootCmd, Relation rel, AlterTableCmd *cmd); static void ATExecSetDistributedBy(Relation rel, Node *node, AlterTableCmd *cmd); @@ -17515,7 +17513,7 @@ ATExecExpandTable(List **wqueue, Relation rel, AlterTableCmd *cmd) } else { - ATExecRebalanceTableCTAS(rootCmd, rel, cmd, 0); + ATExecExpandTableCTAS(rootCmd, rel, cmd); } /* Update numsegments to cluster size */ @@ -17653,6 +17651,11 @@ ATExecRebalanceTable(List **wqueue, Relation rel, AlterTableCmd *cmd) * child partitions. */ } + else if (rel->rd_rel->relkind == RELKIND_MATVIEW) + { + ereport((Gp_role == GP_ROLE_EXECUTE) ? DEBUG1 : NOTICE, + (errmsg("Materialized view requires REFRESH after rebalance"))); + } else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { if (rel_is_external_table(relid)) @@ -17674,18 +17677,6 @@ ATExecRebalanceTable(List **wqueue, Relation rel, AlterTableCmd *cmd) return; } } - else if (rel->rd_rel->relkind == RELKIND_MATVIEW) - { - /* - * We can't insert data directly to an existing matview, - * therefore the approach from ATExecShrinkTable() is not suitable, - * and we use CTAS method for matviews. - */ - AlteredTableInfo *tab = linitial(*wqueue); - AlterTableCmd *rootCmd = - (AlterTableCmd *)linitial(tab->subcmds[AT_PASS_MISC]); - ATExecRebalanceTableCTAS(rootCmd, rel, cmd, targetNumSegments); - } else { ATExecShrinkTable(rel, newPolicy); @@ -17787,8 +17778,7 @@ ATExecExpandPartitionTablePrepare(Relation rel) } static void -ATExecRebalanceTableCTAS(AlterTableCmd *rootCmd, Relation rel, - AlterTableCmd *cmd, int targetNumSegments) +ATExecExpandTableCTAS(AlterTableCmd *rootCmd, Relation rel, AlterTableCmd *cmd) { RangeVar *tmprv; Oid tmprelid; @@ -17827,8 +17817,7 @@ ATExecRebalanceTableCTAS(AlterTableCmd *rootCmd, Relation rel, /* Step (b) - build CTAS */ distby = make_distributedby_for_rel(rel); - distby->numsegments = - (targetNumSegments > 0) ? targetNumSegments : getgpsegmentCount(); + distby->numsegments = getgpsegmentCount(); queryDesc = build_ctas_with_dist(rel, distby, untransformRelOptions(get_rel_opts(rel)), diff --git a/src/test/regress/expected/alter_rebalance.out b/src/test/regress/expected/alter_rebalance.out index 24a6a1c892a2..0f1e88d1d6ee 100644 --- a/src/test/regress/expected/alter_rebalance.out +++ b/src/test/regress/expected/alter_rebalance.out @@ -3561,6 +3561,8 @@ insert into test_table select generate_series(1, 10); create materialized view mv_test_table as select a from test_table distributed by (a); alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; +NOTICE: Materialized view requires REFRESH after rebalance +refresh materialized view mv_test_table; select count(1), gp_segment_id from test_table group by gp_segment_id; count | gp_segment_id -------+--------------- @@ -3598,6 +3600,8 @@ select count(1), gp_segment_id from mv_test_table group by gp_segment_id order b begin; alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; +NOTICE: Materialized view requires REFRESH after rebalance +refresh materialized view mv_test_table; rollback; select count(1), gp_segment_id from test_table group by gp_segment_id order by gp_segment_id; count | gp_segment_id diff --git a/src/test/regress/sql/alter_rebalance.sql b/src/test/regress/sql/alter_rebalance.sql index 09c9555f5b06..f11c3b3ddb19 100644 --- a/src/test/regress/sql/alter_rebalance.sql +++ b/src/test/regress/sql/alter_rebalance.sql @@ -463,6 +463,7 @@ create materialized view mv_test_table as select a from test_table distributed b alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; +refresh materialized view mv_test_table; select count(1), gp_segment_id from test_table group by gp_segment_id; select count(1), gp_segment_id from mv_test_table group by gp_segment_id; @@ -482,6 +483,7 @@ select count(1), gp_segment_id from mv_test_table group by gp_segment_id order b begin; alter table test_table rebalance 1; alter materialized view mv_test_table rebalance 1; +refresh materialized view mv_test_table; rollback; select count(1), gp_segment_id from test_table group by gp_segment_id order by gp_segment_id; From f090cb9ab15e738c9e4c0b9f1a421b93ef17146f Mon Sep 17 00:00:00 2001 From: Roman Eskin Date: Wed, 25 Feb 2026 09:18:22 +1000 Subject: [PATCH 22/22] Use stderr --- gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 7373c2c3ef5e..3d6f3936aa3f 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -682,7 +682,7 @@ def impl(context, process_name, log_msg, timeout): ITERATION=$((ITERATION + 1)) if [ $ITERATION -ge $MAX_ITERATION_CNT ]; then - echo "Timeout after {timeout} seconds waiting for '{log_msg}' in {process_name} logs" + echo "Timeout after {timeout} seconds waiting for '{log_msg}' in {process_name} logs" >&2 exit 1 fi done