diff --git a/gpMgmt/bin/gppylib/fault_injection.py b/gpMgmt/bin/gppylib/fault_injection.py index 142ab17dfcfd..388097634575 100755 --- a/gpMgmt/bin/gppylib/fault_injection.py +++ b/gpMgmt/bin/gppylib/fault_injection.py @@ -7,9 +7,17 @@ GPMGMT_FAULT_POINT = 'GPMGMT_FAULT_POINT' GPMGMT_FAULT_DELAY_MS = 'GPMGMT_FAULT_DELAY_MS' +GPMGMT_FAULT_TYPE = 'GPMGMT_FAULT_TYPE' +GPMGMT_FAULT_FILE_FLAG = 'GPMGMT_FAULT_FILE_FLAG' + +GPMGMT_FAULT_TYPE_SYSPEND = 'suspend' def inject_fault(fault_point): if GPMGMT_FAULT_POINT in os.environ and fault_point == os.environ[GPMGMT_FAULT_POINT]: + if GPMGMT_FAULT_TYPE in os.environ and os.environ[GPMGMT_FAULT_TYPE] == GPMGMT_FAULT_TYPE_SYSPEND: + while GPMGMT_FAULT_FILE_FLAG in os.environ and os.path.exists(os.environ[GPMGMT_FAULT_FILE_FLAG]): + time.sleep(0.1) + return if GPMGMT_FAULT_DELAY_MS in os.environ and int(os.environ[GPMGMT_FAULT_DELAY_MS]) > 0: delay_ms = int(os.environ[GPMGMT_FAULT_DELAY_MS]) diff --git a/gpMgmt/bin/gprebalance_modules/shrink.py b/gpMgmt/bin/gprebalance_modules/shrink.py index a93c5d6e091b..48798267ed6c 100644 --- a/gpMgmt/bin/gprebalance_modules/shrink.py +++ b/gpMgmt/bin/gprebalance_modules/shrink.py @@ -224,7 +224,6 @@ def __init__(self, conn: dbconn.Connection, self.gparray_dump_file = gpArrayDumpFilename self.rebalance_schema = schema self.shrink_plan = None - self.needs_repopulate = False self.dumped_gparray = gparray.GpArray.initFromFile(self.gparray_dump_file) if os.path.exists(self.gparray_dump_file) else None self.machine = Machine(model = self, @@ -276,7 +275,6 @@ def get_state_after_interrupt(self, prev_state) -> str: # means that target rebalance numsegments is reset, and new tables are created at old segment count if bool(row[0]) is False: self.logger.info("Cluster restarted after previous run, trying to repopulate the relation queue") - self.needs_repopulate = True return 'STATE_BACKUP_CATALOG_AND_UPDATE_TARGET_SEGMENT_COUNT_STARTED' return self.states_main_shrink_flow[prev_idx + 1] @@ -444,29 +442,28 @@ def on_enter_STATE_SHRINK_SEGMENTS_STOP_STARTED(self) -> None: gp_array = self.gparray segments_to_stop = gp_array.get_segment_count() - self.shrink_plan.getTargetSegmentCount() - segments_to_stop = segments_to_stop * 2 # consider mirrors self.workers_for_segment_stop = WorkerPool(numWorkers=min(segments_to_stop, self.options.batch_size)) - for seg_pair in gp_array.getSegmentList(): - primary_seg = seg_pair.primaryDB - mirror_seg = seg_pair.mirrorDB - if primary_seg.getSegmentContentId() >= self.shrink_plan.getTargetSegmentCount(): - if primary_seg.isSegmentUp(): - cmd = self.SegmentStopAfterShrink(self, primary_seg) + # Stop primaries first, and mirrors after primaries, + # to avoid hanging replication processes + seg_roles = [gparray.ROLE_PRIMARY, gparray.ROLE_MIRROR] + for seg_role in seg_roles: + self.logger.info(f"Prepare to stop segments with role '{seg_role}'") + for seg in gp_array.getSegDbList(): + if (seg.getSegmentContentId() >= self.shrink_plan.getTargetSegmentCount() and + seg.getSegmentRole() == seg_role and seg.isSegmentUp()): + cmd = self.SegmentStopAfterShrink(self, seg) self.workers_for_segment_stop.addCommand(cmd) - - if mirror_seg != None and mirror_seg.isSegmentUp(): - cmd = self.SegmentStopAfterShrink(self, mirror_seg) - self.workers_for_segment_stop.addCommand(cmd) - - print_progress(self.workers_for_segment_stop, interval=1) + if self.shutdown_requested: + break + print_progress(self.workers_for_segment_stop, interval=1) self.workers_for_segment_stop.haltWork() self.workers_for_segment_stop.joinWorkers() for task in self.workers_for_segment_stop.getCompletedItems(): if not task.was_successful(): - raise Exception('Failed to stop segments') + self.logger.warning('Failed to stop segments') self.workers_for_segment_stop = None @@ -566,20 +563,28 @@ def __init__(self, shrink: 'GGShrink', segment: Segment) -> None: # decorator to inject a fault before running SegmentStopAfterShrink for a specific dbid def wrap_segment_stop_with_faults(fun): def func_with_faults(self): - inject_fault(f'fault_segment_stop_dbid_{self.segment.getSegmentDbId()}') + try: + inject_fault(f'fault_segment_stop_dbid_{self.segment.getSegmentDbId()}') + except Exception as e: + os.kill(os.getpid(), signal.SIGINT) + return fun(self) return func_with_faults @wrap_segment_stop_with_faults def run(self) -> None: - self.shrink.logger.info(f'Stopping shrinked segment dbid {self.segment.getSegmentDbId()} @ host={self.remoteHost}, datadir={self.segment.getSegmentDataDirectory()}') + self.shrink.logger.info(f'Stopping shrinked segment {str(self.segment)}') self.checkRunningSegment.run() if self.checkRunningSegment.is_shutdown(): - self.shrink.logger.info(f'Segment dbid {self.segment.getSegmentDbId()} is already down @ host={self.remoteHost}, datadir={self.segment.getSegmentDataDirectory()} ') + self.shrink.logger.info(f'Segment {str(self.segment)} is already down') self.set_results(CommandResult(0, b'', b'', True, False)) else: - SegmentStop.run(self) - self.shrink.logger.info(f'Stopped shrinked segment dbid {self.segment.getSegmentDbId()} @ host={self.remoteHost}, datadir={self.segment.getSegmentDataDirectory()}') + try: + SegmentStop.run(self, validateAfter = True) + except ExecutionError: + self.shrink.logger.info(f'Failed to stop shrinked segment {str(self.segment)}') + return + self.shrink.logger.info(f'Stopped shrinked segment {str(self.segment)}') class TableRebalanceTask(SQLCommand): def __init__(self, @@ -599,28 +604,70 @@ def __init__(self, # decorator to inject a fault before running TableRebalanceTask for a specific {db_name, schema_name, rel_name} def wrap_table_rebalance_with_faults(fun): - def func_with_faults(self): + def func_with_faults(self, attempt: int): inject_fault(f'fault_rebalance_table_{self.db_name}.{self.schema_name}.{self.rel_name}') - fun(self) + fun(self, attempt) return func_with_faults + def table_exists(self, conn: dbconn.Connection, schema_name: str, rel_name: str) -> bool: + if dbconn.querySingleton(conn, f""" + SELECT count(1) + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE c.relname = '{rel_name}' AND n.nspname = '{schema_name}' AND c.relnamespace = n.oid + """) == 0: + return False + return True + + def db_exists(self, conn: dbconn.Connection, db_name: str) -> bool: + if dbconn.querySingleton(conn, f"""SELECT count(*) FROM pg_database WHERE datname = '{db_name}'""") == 0: + return False + return True + @wrap_table_rebalance_with_faults - def run(self) -> None: - self.shrink.logger.info(f'Start table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments') - dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) - with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: - dbconn.execSQL(conn, 'BEGIN') - dbconn.execSQL(conn, - f'''ALTER TABLE "{self.schema_name}"."{self.rel_name}" - REBALANCE {self.target_segment_count}''') - self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) - if self.shrink.options.analyze: - dbconn.execSQL(conn, - f'''ANALYZE "{self.schema_name}"."{self.rel_name}"''') - dbconn.execSQL(conn, 'COMMIT') + def process_table(self, attempt: int) -> None: + self.shrink.logger.info(f'Start table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}" to {self.target_segment_count} segments (attempt {attempt})') + if self.db_exists(self.shrink.rebalance_schema.conn, self.db_name): + dburl = dbconn.DbURL(dbname=self.db_name, port=self.shrink.gpEnv.getCoordinatorPort()) + with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: + dbconn.execSQL(conn, 'BEGIN') + + table_exists = self.table_exists(conn, self.schema_name, self.rel_name) + if table_exists: + dbconn.execSQL(conn, + f'''ALTER TABLE "{self.schema_name}"."{self.rel_name}" + REBALANCE {self.target_segment_count}''') + if self.shrink.options.analyze: + dbconn.execSQL(conn, + f'''ANALYZE "{self.schema_name}"."{self.rel_name}"''') + else: + self.shrink.logger.info(f'''Table "{self.db_name}"."{self.schema_name}"."{self.rel_name}" doesn't exist, skipping actual rebalance''') + + self.shrink.rebalance_schema.setStatusForTableToRebalance(self.db_name, self.schema_name, self.rel_name, self.table_status_after_rebalance) + dbconn.execSQL(conn, 'COMMIT') + else: + self.shrink.logger.info(f'''DB "{self.db_name}" doesn't exist, skipping actual rebalance for "{self.schema_name}"."{self.rel_name}"''') self.shrink.logger.info(f'Complete table rebalance for "{self.db_name}"."{self.schema_name}"."{self.rel_name}"') self.set_results(CommandResult(0, b'', b'', True, False)) + def run(self) -> None: + # Give 2 attempts to process a table. It is needed, when, for example, + # other session opens a transaction after we have created the rebalance table + # list, drops the table before we started to rebalance it, and commits the + # transaction when we've started to rebalance the table. + attempt_max_cnt = 2 + for i in range(attempt_max_cnt): + attempt = i + 1 + try: + self.process_table(attempt) + except Exception as e: + if attempt < attempt_max_cnt: + logger.warning(f"{str(e)}") + else: + logger.error(f"{str(e)}") + raise Exception(f'Failed to process the db object for {attempt_max_cnt} attempts') + continue + break + def prepare_shrink_schema(self, is_rollback: bool) -> None: status = 'done' if is_rollback else 'none' cmp = '<=' if is_rollback else '>' @@ -642,14 +689,18 @@ def prepare_shrink_schema(self, is_rollback: bool) -> None: dburl = dbconn.DbURL(dbname=db, port=self.gpEnv.getCoordinatorPort()) with closing(dbconn.connect(dburl, encoding='UTF8')) as conn: cursor = dbconn.query(conn, - f'''SELECT n.nspname, c.relname + f'''SELECT n.nspname, c.relname, c.relkind, pe.writable is not null as external_writable FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid JOIN gp_distribution_policy p ON c.oid = p.localoid - WHERE c.relkind IN ('r', 'p') AND c.relispartition = FALSE AND + LEFT JOIN pg_exttable pe on (c.oid=pe.reloid and pe.writable) + WHERE c.relkind IN ('r', 'p', 'm', 'f') AND c.relispartition = FALSE AND + c.relpersistence != 't' AND p.numsegments {cmp} {self.shrink_plan.getTargetSegmentCount()} AND n.nspname NOT IN ('pg_catalog', 'information_schema', '{self.rebalance_schema.getSchemaName()}')''') - for schema_name, rel_name in cursor: + for schema_name, rel_name, rel_kind, external_writable in cursor: + if rel_kind == 'f' and not external_writable: + continue self.rebalance_schema.addTableToRebalance(db, schema_name, rel_name, status) dbconn.execSQL(self.conn, 'COMMIT') diff --git a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature index 5f8f705d7846..48241783d44f 100755 --- a/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature +++ b/gpMgmt/test/behave/mgmt_utils/ggrebalance_shrink.feature @@ -12,6 +12,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -29,6 +35,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -45,6 +56,14 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And the user connects to "gptest" with named connection "test_connection" + And the user executes "CREATE TEMP TABLE temp_table(a int);" with named connection "test_connection" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -53,6 +72,14 @@ Feature: ggrebalance behave tests Then ggrebalance should return a return code of 1 And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp And unset fault inject + And the user drops the named connection "test_connection" + When execute following sql in db "postgres" and store result in the context + """ + select count(1) as temp_tables_for_redistribute from ggrebalance.table_rebalance_status_detail where schema_name LIKE 'pg\_temp\_%'; + """ + Then validate that following rows are in the stored rows + | temp_tables_for_redistribute | + | 0 | When the user runs "ggrebalance -x 1 --parallel 1 --batch-size 1 --skip-rebalance" Then ggrebalance should return a return code of 1 And ggrebalance should print "Can't start a new operation, because the previous one was interrupted" to logfile with latest timestamp @@ -65,6 +92,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -106,6 +138,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -119,18 +157,27 @@ Feature: ggrebalance behave tests Then gpstart should return a return code of 0 When there is a "heap" table "test_schema_2.test_table_3" in "test_db_2" with data And the user runs "ggrebalance" - Then ggrebalance should print "Cluster restarted after previous run, trying to repopulate the relation queue" to logfile + Then ggrebalance should return a return code of 0 And ggrebalance should print "Shrink is complete" to logfile with latest timestamp And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 1, row count = 1094 - And ggrebalance should return a return code of 0 + Examples: | fault_name | | on_enter_STATE_BACKUP_CATALOG_AND_UPDATE_TARGET_SEGMENT_COUNT_DONE_begin | | on_enter_STATE_PREPARE_SHRINK_SCHEMA_DONE_begin | | on_enter_STATE_SHRINK_TABLES_DONE_begin | + | on_enter_STATE_SHRINK_TABLES_DONE_end | + | on_enter_STATE_SHRINK_CATALOG_STARTED_begin | + | on_enter_STATE_SHRINK_CATALOG_STARTED_end | Scenario: test 2.1. shrink - check rollback after interrupted state, if interruption is done before the rebalance schema creation Given the database is not running @@ -142,6 +189,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -157,6 +210,11 @@ Feature: ggrebalance behave tests Then ggrebalance should return a return code of 0 And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -172,6 +230,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -185,6 +249,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -221,6 +290,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -238,6 +313,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -261,6 +341,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -279,6 +365,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 2, row count = 200 @@ -314,6 +405,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -333,6 +430,11 @@ Feature: ggrebalance behave tests And verify no segment running for saved segment information And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -353,6 +455,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -371,6 +479,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -414,6 +527,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -432,6 +551,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rebalance schema doesn't exists and no shrink plan is supplied. Please specify shrink plan." to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 When there is a "heap" table "test_schema_1.test_table_3" in "test_db_1" with "100" rows @@ -456,6 +580,12 @@ Feature: ggrebalance behave tests And schema "test_schema_1" exists in "test_db_1" And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" And database "test_db_2" exists And schema "test_schema_2" exists in "test_db_2" And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows @@ -478,6 +608,11 @@ Feature: ggrebalance behave tests And ggrebalance should print "Rollback is complete" to logfile with latest timestamp And distribution information from table "test_schema_1.test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 2, row count = 100 + And the numsegments of table "ext_test" is 2 And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 2, row count = 100 And distribution information from table "test_schema_2.test_table_3" with data in "test_db_2" is equal to segment count = 2, row count = 200 @@ -512,3 +647,158 @@ Feature: ggrebalance behave tests | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_SHRINKED_TABLES_DONE_end | | fault_rebalance_table_test_db_2.test_schema_2.test_table_1 | on_enter_STATE_SHRINK_ROLLBACK_DROP_SCHEMA_START_begin | + Scenario: test 4. test shrink continue, when a table planned for rebalance was dropped + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + When set fault inject "fault_rebalance_table_test_db_2.test_schema_2.test_table_1" + And the user runs "ggrebalance -x 1 --skip-rebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And table "test_schema_2.test_table_1" is dropped in "test_db_2" + When the user runs "ggrebalance" + Then ggrebalance should return a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 + And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 + + Scenario: test 4.1. test shrink continue, when a mat view planned for rebalance was dropped + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + When set fault inject "fault_rebalance_table_test_db_1.test_schema_1.mv_test_table_1" + And the user runs "ggrebalance -x 1 --skip-rebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And materialized view "test_schema_1.mv_test_table_1" is dropped in "test_db_1" + When the user runs "ggrebalance" + Then ggrebalance should return a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 + And distribution information from table "test_schema_2.test_table_1" with data in "test_db_2" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 + + Scenario: test 5. test shrink continue, when a db with the table planned for rebalance was dropped + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + When set fault inject "fault_rebalance_table_test_db_2.test_schema_2.test_table_1" + And the user runs "ggrebalance -x 1 --skip-rebalance" + Then ggrebalance should return a return code of 1 + And ggrebalance should print "ggrebalance failed" to logfile with latest timestamp + And unset fault inject + And the database "test_db_2" does not exist + When the user runs "ggrebalance" + Then ggrebalance should return a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 + + Scenario: test 6. test shrink, when a table, planned for rebalance, is dropped in a parallel transaction, committed after the start of table redistribution + Given the database is not running + And a working directory of the test as '/data/gpdata/ggrebalance' + And a cluster is created with mirrors on "cdw" and "sdw1" + And segment information for content 1 is saved in context + And all files in gpAdminLogs directory are deleted + And database "test_db_1" exists + And schema "test_schema_1" exists in "test_db_1" + And there is a "heap" table "test_schema_1.test_table_1" in "test_db_1" with "100" rows + And there is a "ao" table "test_schema_1.test_table_2" in "test_db_1" with "100" rows + And there is a "heap" partition table "test_schema_1.part_test_table_1" in "test_db_1" with "100" rows + And there is a "ao" partition table "test_schema_1.part_test_table_2" in "test_db_1" with "100" rows + And there is an unlogged "heap" table "test_schema_1.unlogged_test_table_1" in "test_db_1" with "100" rows + And a materialized view "test_schema_1.mv_test_table_1" exists on table "test_schema_1.test_table_1" + And database "gptest" exists + And there is a "heap" table "test_table_1" in "gptest" with "100" rows + And the user create a writable external table with name "ext_test" + And database "test_db_2" exists + And schema "test_schema_2" exists in "test_db_2" + And there is a "heap" table "test_schema_2.test_table_1" in "test_db_2" with "100" rows + And there is a "ao" table "test_schema_2.test_table_2" in "test_db_2" with "100" rows + And set fault inject "on_enter_STATE_PREPARE_SHRINK_SCHEMA_STARTED_begin" + And set fault inject type to suspend + When the user asynchronously runs "ggrebalance -x 1 --skip-rebalance" and the process is saved + And the user waits till ggrebalance prints "Updated target segment count to 1" in the logs (with timeout of "60" sec) + And the user connects to "gptest" with named connection "test_connection" + And the user executes "BEGIN; DROP TABLE test_table_1;" with named connection "test_connection" + And unset fault inject + And the user waits till ggrebalance prints "Start table rebalance for \"gptest\".\"public\".\"test_table_1\" to 1 segments" in the logs (with timeout of "60" sec) + And waiting "5" seconds + And the user executes "COMMIT;" with named connection "test_connection" + And the user drops the named connection "test_connection" + Then the async process finished with a return code of 0 + And ggrebalance should print "Shrink is complete" to logfile with latest timestamp + And verify no segment running for saved segment information + And distribution information from table "test_schema_1.test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.part_test_table_2" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.unlogged_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_1.mv_test_table_1" with data in "test_db_1" is equal to segment count = 1, row count = 100 + And distribution information from table "test_schema_2.test_table_2" with data in "test_db_2" is equal to segment count = 1, row count = 100 + And the numsegments of table "ext_test" is 1 diff --git a/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py index f7bf6f1bdfa7..43c2a172e982 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/analyzedb_mgmt_utils.py @@ -94,13 +94,13 @@ def impl(context, view_name, table_name, schema_name): @given('a view "{view_name}" exists on table "{table_name}"') def impl(context, view_name, table_name): with closing(dbconn.connect(dbconn.DbURL(dbname=context.dbname))) as conn: - create_view_on_table(context.conn, view_name, table_name) + create_view_on_table(conn, view_name, table_name) @given('a materialized view "{view_name}" exists on table "{table_name}"') def impl(context, view_name, table_name): with closing(dbconn.connect(dbconn.DbURL(dbname=context.dbname))) as conn: - create_materialized_view_on_table_in_schema(context.conn, viewname=view_name, + create_materialized_view_on_table_in_schema(conn, viewname=view_name, tablename=table_name) diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index 059f6e27c8f5..3d6f3936aa3f 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -39,6 +39,7 @@ from gppylib import pgconf from gppylib.commands.gp import get_coordinatordatadir from gppylib.parseutils import canonicalize_address +from gppylib import fault_injection coordinator_data_dir = gp.get_coordinatordatadir() if coordinator_data_dir is None: @@ -156,6 +157,8 @@ def impl(context, query, db, contentids): @given('the user connects to "{dbname}" with named connection "{cname}"') +@when('the user connects to "{dbname}" with named connection "{cname}"') +@then('the user connects to "{dbname}" with named connection "{cname}"') def impl(context, dbname, cname): if not hasattr(context, 'named_conns'): context.named_conns = {} @@ -197,11 +200,14 @@ def impl(conetxt, tabname): @given('the user executes "{sql}" with named connection "{cname}"') +@when('the user executes "{sql}" with named connection "{cname}"') +@then('the user executes "{sql}" with named connection "{cname}"') def impl(context, cname, sql): conn = context.named_conns[cname] dbconn.execSQL(conn, sql) +@when('the user drops the named connection "{cname}"') @then('the user drops the named connection "{cname}"') def impl(context, cname): if cname in context.named_conns: @@ -660,6 +666,31 @@ def impl(context, kill_process_name, log_msg, logfile_name): "fi; done" % (log_msg, logfile_name, kill_process_name) run_async_command(context, command) +@given('the user waits till {process_name} prints "{log_msg}" in the logs (with timeout of "{timeout}" sec)') +@when('the user waits till {process_name} prints "{log_msg}" in the logs (with timeout of "{timeout}" sec)') +@then('the user waits till {process_name} prints "{log_msg}" in the logs (with timeout of "{timeout}" sec)') +def impl(context, process_name, log_msg, timeout): + poll_period = 0.1 + max_iteration_cnt = int(int(timeout) / poll_period) + command = f""" + ITERATION=0 + MAX_ITERATION_CNT={max_iteration_cnt} + while sleep {poll_period}; do + if grep -E --quiet '{log_msg}' ~/gpAdminLogs/{process_name}*log ; + then break 2; + fi; + + ITERATION=$((ITERATION + 1)) + if [ $ITERATION -ge $MAX_ITERATION_CNT ]; then + echo "Timeout after {timeout} seconds waiting for '{log_msg}' in {process_name} logs" >&2 + exit 1 + fi + done + """ + rc, _, error = run_cmd(command) + if rc: + raise Exception(error) + @given('the user asynchronously sets up to end {process_name} process with {signal_name}') @when('the user asynchronously sets up to end {process_name} process with {signal_name}') @then('the user asynchronously sets up to end {process_name} process with {signal_name}') @@ -1561,6 +1592,11 @@ def get_opened_files(filename, pidfile): def impl(context, tablename, dbname): drop_table_if_exists(context, table_name=tablename, dbname=dbname) +@when('materialized view "{viewname}" is dropped in "{dbname}"') +@then('materialized view "{viewname}" is dropped in "{dbname}"') +@given('materialized view "{viewname}" is dropped in "{dbname}"') +def impl(context, viewname, dbname): + drop_materialized_view_if_exists(context, view_name=viewname, dbname=dbname) @given('all the segments are running') @when('all the segments are running') @@ -2338,6 +2374,11 @@ def impl(context): def impl(context, tabletype, tablename, dbname, numrows): populate_regular_table_data(context, tabletype, tablename, dbname, compression_type=None, with_data=True, rowcount=int(numrows)) +@given('there is an unlogged "{tabletype}" table "{tablename}" in "{dbname}" with "{numrows}" rows') +@then('there is an unlogged "{tabletype}" table "{tablename}" in "{dbname}" with "{numrows}" rows') +@when('there is an unlogged "{tabletype}" table "{tablename}" in "{dbname}" with "{numrows}" rows') +def impl(context, tabletype, tablename, dbname, numrows): + populate_regular_table_data(context, tabletype, tablename, dbname, compression_type=None, with_data=True, rowcount=int(numrows), unlogged=True) @given('there is a "{tabletype}" table "{tablename}" in "{dbname}" with data') @then('there is a "{tabletype}" table "{tablename}" in "{dbname}" with data') @@ -2358,6 +2399,12 @@ def impl(context, tabletype, tablename, dbname): def impl(context, tabletype, table_name, dbname): create_partition(context, tablename=table_name, storage_type=tabletype, dbname=dbname, with_data=True) +@given('there is a "{tabletype}" partition table "{table_name}" in "{dbname}" with "{numrows}" rows') +@then('there is a "{tabletype}" partition table "{table_name}" in "{dbname}" with "{numrows}" rows') +@when('there is a "{tabletype}" partition table "{table_name}" in "{dbname}" with "{numrows}" rows') +def impl(context, tabletype, table_name, dbname, numrows): + create_partition(context, tablename=table_name, storage_type=tabletype, dbname=dbname, with_data=True, rowcount=int(numrows)) + @given('there is a view without columns in "{dbname}"') @then('there is a view without columns in "{dbname}"') @when('there is a view without columns in "{dbname}"') @@ -4526,25 +4573,39 @@ def step_impl(context, address): @then('set fault inject "{fault}"') @when('set fault inject "{fault}"') def impl(context, fault): - os.environ['GPMGMT_FAULT_POINT'] = fault + os.environ[fault_injection.GPMGMT_FAULT_POINT] = fault @given('unset fault inject') @then('unset fault inject') @when('unset fault inject') def impl(context): - os.environ['GPMGMT_FAULT_POINT'] = "" + os.environ[fault_injection.GPMGMT_FAULT_POINT] = "" + os.environ[fault_injection.GPMGMT_FAULT_TYPE] = "" + os.environ[fault_injection.GPMGMT_FAULT_FILE_FLAG] = "" + if hasattr(context, 'fault_flag_filename') and os.path.exists(context.fault_flag_filename): + os.remove(context.fault_flag_filename) @given('set fault inject delay {delay} ms') @then('set fault inject delay {delay} ms') @when('set fault inject delay {delay} ms') def impl(context, delay): - os.environ['GPMGMT_FAULT_DELAY_MS'] = delay + os.environ[fault_injection.GPMGMT_FAULT_DELAY_MS] = delay + +@given('set fault inject type to suspend') +@then('set fault inject type to suspend') +@when('set fault inject type to suspend') +def impl(context): + os.environ[fault_injection.GPMGMT_FAULT_TYPE] = fault_injection.GPMGMT_FAULT_TYPE_SYSPEND + context.fault_flag_filename = "/tmp/ggrebalance_fault_suspend_flag" + with open(context.fault_flag_filename, "w"): + pass + os.environ[fault_injection.GPMGMT_FAULT_FILE_FLAG] = context.fault_flag_filename @given('unset fault inject delay') @then('unset fault inject delay') @when('unset fault inject delay') def impl(context): - os.environ['GPMGMT_FAULT_DELAY_MS'] = "" + os.environ[fault_injection.GPMGMT_FAULT_DELAY_MS] = "" @given('stub') def impl(context): diff --git a/gpMgmt/test/behave_utils/utils.py b/gpMgmt/test/behave_utils/utils.py index 976d1a87be72..53cac4169ffd 100644 --- a/gpMgmt/test/behave_utils/utils.py +++ b/gpMgmt/test/behave_utils/utils.py @@ -358,6 +358,10 @@ def drop_table(context, table_name, dbname, host=None, port=0, user=None): if check_table_exists(context, table_name=table_name, dbname=dbname, host=host, port=port, user=user): raise Exception('Unable to successfully drop the table %s' % table_name) +def drop_materialized_view_if_exists(context, view_name, dbname, host=None, port=0, user=None): + SQL = 'drop materialized view if exists %s' % view_name + with closing(dbconn.connect(dbconn.DbURL(hostname=host, port=port, username=user, dbname=dbname), unsetSearchPath=False)) as conn: + dbconn.execSQL(conn, SQL) def check_schema_exists(context, schema_name, dbname): schema_check_sql = "select * from pg_namespace where nspname='%s';" % schema_name @@ -430,11 +434,14 @@ def create_external_partition(context, tablename, dbname, port, filename): def create_partition(context, tablename, storage_type, dbname, compression_type=None, partition=True, rowcount=1094, - with_data=True, with_desc=False, host=None, port=0, user=None): + with_data=True, with_desc=False, host=None, port=0, user=None, unlogged=False): interval = '1 year' table_definition = 'Column1 int, Column2 varchar(20), Column3 date' - create_table_str = "Create table " + tablename + "(" + table_definition + ")" + create_table_str = "Create table " + if unlogged: + create_table_str = "Create unlogged table " + create_table_str = create_table_str + tablename + "(" + table_definition + ")" storage_type_dict = {'ao': 'row', 'co': 'column'} part_table = " Distributed Randomly Partition by list(Column2) \ @@ -732,11 +739,11 @@ def validate_local_path(path): def populate_regular_table_data(context, tabletype, table_name, dbname, compression_type=None, rowcount=1094, - with_data=False, with_desc=False, host=None, port=0, user=None): + with_data=False, with_desc=False, host=None, port=0, user=None, unlogged=False): create_database_if_not_exists(context, dbname, host=host, port=port, user=user) drop_table_if_exists(context, table_name=table_name, dbname=dbname, host=host, port=port, user=user) create_partition(context, table_name, tabletype, dbname, compression_type=compression_type, partition=False, - rowcount=rowcount, with_data=with_data, with_desc=with_desc, host=host, port=port, user=user) + rowcount=rowcount, with_data=with_data, with_desc=with_desc, host=host, port=port, user=user, unlogged=unlogged) def is_process_running(proc_name, host=None):