Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/backend/distributed/utils/resource_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,17 @@ LockShardListMetadataOnWorkers(LOCKMODE lockmode, List *shardIntervalList)

appendStringInfo(lockCommand, "])");

SendCommandToWorkersWithMetadata(lockCommand->data);
/*
* Disable idle_in_transaction_session_timeout on metadata workers before
* acquiring locks. In block_writes mode, these connections stay open for
* the entire shard copy which can take hours for large shards. Without
* this, the timeout would kill the connection and fail the move.
* SET LOCAL scopes the change to this transaction only.
*/
List *commandList = list_make2(
"SET LOCAL idle_in_transaction_session_timeout = 0",
lockCommand->data);
SendCommandListToWorkersWithMetadata(commandList);
}


Expand Down
84 changes: 84 additions & 0 deletions src/test/regress/expected/shard_move_constraints_blocking.out
Original file line number Diff line number Diff line change
Expand Up @@ -399,3 +399,87 @@ drop cascades to table "blocking shard Move Fkeys Indexes".reference_table
drop cascades to table "blocking shard Move Fkeys Indexes".reference_table_8970028
drop cascades to table "blocking shard Move Fkeys Indexes".index_backed_rep_identity
DROP ROLE mx_rebalancer_blocking_role_ent;
-- Test: block_writes shard move succeeds even when workers have a low
-- idle_in_transaction_session_timeout. LockShardListMetadataOnWorkers opens
-- coordinated transactions on ALL metadata workers before the data copy.
-- Workers not involved in the copy sit idle-in-transaction for the entire
-- duration. Without the SET LOCAL override, the timeout would kill those
-- connections and fail the move.
SET citus.next_shard_id TO 8980000;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
CREATE SCHEMA blocking_move_idle_timeout;
SET search_path TO blocking_move_idle_timeout;
-- set a very low idle_in_transaction_session_timeout on all nodes
SELECT 1 FROM run_command_on_all_nodes(
'ALTER SYSTEM SET idle_in_transaction_session_timeout = ''1s''');
?column?
---------------------------------------------------------------------
1
1
1
(3 rows)

SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()');
?column?
---------------------------------------------------------------------
1
1
1
(3 rows)

-- allow the reload to take effect
SELECT pg_sleep(0.5);
pg_sleep
---------------------------------------------------------------------

(1 row)

CREATE TABLE test_move(id int PRIMARY KEY, val text);
SELECT create_distributed_table('test_move', 'id');
create_distributed_table
---------------------------------------------------------------------

(1 row)

INSERT INTO test_move SELECT i, 'val_' || i FROM generate_series(1, 100) i;
-- move a shard using block_writes; should succeed despite the 1s timeout
SELECT citus_move_shard_placement(8980000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes');
citus_move_shard_placement
---------------------------------------------------------------------

(1 row)

SELECT public.wait_for_resource_cleanup();
wait_for_resource_cleanup
---------------------------------------------------------------------

(1 row)

-- verify data integrity after move
SELECT count(*) FROM test_move;
count
---------------------------------------------------------------------
100
(1 row)

-- cleanup: restore idle_in_transaction_session_timeout
SELECT 1 FROM run_command_on_all_nodes(
'ALTER SYSTEM RESET idle_in_transaction_session_timeout');
?column?
---------------------------------------------------------------------
1
1
1
(3 rows)

SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()');
?column?
---------------------------------------------------------------------
1
1
1
(3 rows)

DROP SCHEMA blocking_move_idle_timeout CASCADE;
NOTICE: drop cascades to table test_move
38 changes: 38 additions & 0 deletions src/test/regress/sql/shard_move_constraints_blocking.sql
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,41 @@ ALTER TABLE sensors_2020_01_01 DROP CONSTRAINT fkey_from_child_to_child;
\c - postgres - :master_port
DROP SCHEMA "blocking shard Move Fkeys Indexes" CASCADE;
DROP ROLE mx_rebalancer_blocking_role_ent;

-- Test: block_writes shard move succeeds even when workers have a low
-- idle_in_transaction_session_timeout. LockShardListMetadataOnWorkers opens
-- coordinated transactions on ALL metadata workers before the data copy.
-- Workers not involved in the copy sit idle-in-transaction for the entire
-- duration. Without the SET LOCAL override, the timeout would kill those
-- connections and fail the move.
SET citus.next_shard_id TO 8980000;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;

CREATE SCHEMA blocking_move_idle_timeout;
SET search_path TO blocking_move_idle_timeout;

-- set a very low idle_in_transaction_session_timeout on all nodes
SELECT 1 FROM run_command_on_all_nodes(
'ALTER SYSTEM SET idle_in_transaction_session_timeout = ''1s''');
SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()');
-- allow the reload to take effect
SELECT pg_sleep(0.5);

CREATE TABLE test_move(id int PRIMARY KEY, val text);
SELECT create_distributed_table('test_move', 'id');
INSERT INTO test_move SELECT i, 'val_' || i FROM generate_series(1, 100) i;

-- move a shard using block_writes; should succeed despite the 1s timeout
SELECT citus_move_shard_placement(8980000, 'localhost', :worker_1_port, 'localhost', :worker_2_port, shard_transfer_mode:='block_writes');
SELECT public.wait_for_resource_cleanup();
Comment on lines +246 to +252
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test sets idle_in_transaction_session_timeout to 1s, but the shard being moved is tiny (100 rows spread across 4 shards), so the block_writes move will likely finish well under 1s even without the backend fix. That makes the regression non-deterministic / ineffective at catching the original failure mode. Consider making the data-copy phase reliably exceed the timeout (e.g., add a small per-row delay on the target during copy, or otherwise slow the copy in a deterministic way) so the test would fail on older code paths.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback


-- verify data integrity after move
SELECT count(*) FROM test_move;

-- cleanup: restore idle_in_transaction_session_timeout
SELECT 1 FROM run_command_on_all_nodes(
'ALTER SYSTEM RESET idle_in_transaction_session_timeout');
SELECT 1 FROM run_command_on_all_nodes('SELECT pg_reload_conf()');

DROP SCHEMA blocking_move_idle_timeout CASCADE;