Skip to content

Commit 380aea0

Browse files
committed
Corrections after reworking synchronization of ON CLUSTER BACKUPs/RESTOREs #2.
1 parent 19bcc55 commit 380aea0

2 files changed

Lines changed: 20 additions & 14 deletions

File tree

src/Backups/BackupCoordinationStageSync.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,7 @@ BackupCoordinationStageSync::BackupCoordinationStageSync(
121121

122122
try
123123
{
124-
concurrency_check.emplace(is_restore, /* on_cluster = */ true, zookeeper_path, allow_concurrency, concurrency_counters_);
125-
createStartAndAliveNodes();
124+
createStartAndAliveNodesAndCheckConcurrency(concurrency_counters_);
126125
startWatchingThread();
127126
}
128127
catch (...)
@@ -221,7 +220,7 @@ void BackupCoordinationStageSync::createRootNodes()
221220
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected path in ZooKeeper specified: {}", zookeeper_path);
222221
}
223222

224-
auto holder = with_retries.createRetriesControlHolder("BackupStageSync::createRootNodes", WithRetries::kInitialization);
223+
auto holder = with_retries.createRetriesControlHolder("BackupCoordinationStageSync::createRootNodes", WithRetries::kInitialization);
225224
holder.retries_ctl.retryLoop(
226225
[&, &zookeeper = holder.faulty_zookeeper]()
227226
{
@@ -232,18 +231,22 @@ void BackupCoordinationStageSync::createRootNodes()
232231
}
233232

234233

235-
void BackupCoordinationStageSync::createStartAndAliveNodes()
234+
void BackupCoordinationStageSync::createStartAndAliveNodesAndCheckConcurrency(BackupConcurrencyCounters & concurrency_counters_)
236235
{
237-
auto holder = with_retries.createRetriesControlHolder("BackupStageSync::createStartAndAliveNodes", WithRetries::kInitialization);
236+
auto holder = with_retries.createRetriesControlHolder("BackupCoordinationStageSync::createStartAndAliveNodes", WithRetries::kInitialization);
238237
holder.retries_ctl.retryLoop([&, &zookeeper = holder.faulty_zookeeper]()
239238
{
240239
with_retries.renewZooKeeper(zookeeper);
241-
createStartAndAliveNodes(zookeeper);
240+
createStartAndAliveNodesAndCheckConcurrency(zookeeper);
242241
});
242+
243+
/// The local concurrency check should be done here after BackupCoordinationStageSync::checkConcurrency() checked that
244+
/// there are no 'alive' nodes corresponding to other backups or restores.
245+
local_concurrency_check.emplace(is_restore, /* on_cluster = */ true, zookeeper_path, allow_concurrency, concurrency_counters_);
243246
}
244247

245248

246-
void BackupCoordinationStageSync::createStartAndAliveNodes(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper)
249+
void BackupCoordinationStageSync::createStartAndAliveNodesAndCheckConcurrency(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper)
247250
{
248251
/// The "num_hosts" node keeps the number of hosts which started (created the "started" node)
249252
/// but not yet finished (not created the "finished" node).
@@ -464,7 +467,7 @@ void BackupCoordinationStageSync::watchingThread()
464467
try
465468
{
466469
/// Recreate the 'alive' node if necessary and read a new state from ZooKeeper.
467-
auto holder = with_retries.createRetriesControlHolder("BackupStageSync::watchingThread");
470+
auto holder = with_retries.createRetriesControlHolder("BackupCoordinationStageSync::watchingThread");
468471
auto & zookeeper = holder.faulty_zookeeper;
469472
with_retries.renewZooKeeper(zookeeper);
470473

@@ -496,6 +499,9 @@ void BackupCoordinationStageSync::watchingThread()
496499
tryLogCurrentException(log, "Caught exception while watching");
497500
}
498501

502+
if (should_stop())
503+
return;
504+
499505
zk_nodes_changed->tryWait(sync_period_ms.count());
500506
}
501507
}
@@ -769,7 +775,7 @@ void BackupCoordinationStageSync::setStage(const String & stage, const String &
769775
stopWatchingThread();
770776
}
771777

772-
auto holder = with_retries.createRetriesControlHolder("BackupStageSync::setStage");
778+
auto holder = with_retries.createRetriesControlHolder("BackupCoordinationStageSync::setStage");
773779
holder.retries_ctl.retryLoop([&, &zookeeper = holder.faulty_zookeeper]()
774780
{
775781
with_retries.renewZooKeeper(zookeeper);
@@ -938,7 +944,7 @@ bool BackupCoordinationStageSync::finishImpl(bool throw_if_error, WithRetries::K
938944

939945
try
940946
{
941-
auto holder = with_retries.createRetriesControlHolder("BackupStageSync::finish", retries_kind);
947+
auto holder = with_retries.createRetriesControlHolder("BackupCoordinationStageSync::finish", retries_kind);
942948
holder.retries_ctl.retryLoop([&, &zookeeper = holder.faulty_zookeeper]()
943949
{
944950
with_retries.renewZooKeeper(zookeeper);
@@ -1309,7 +1315,7 @@ bool BackupCoordinationStageSync::setError(const Exception & exception, bool thr
13091315
}
13101316
}
13111317

1312-
auto holder = with_retries.createRetriesControlHolder("BackupStageSync::setError", WithRetries::kErrorHandling);
1318+
auto holder = with_retries.createRetriesControlHolder("BackupCoordinationStageSync::setError", WithRetries::kErrorHandling);
13131319
holder.retries_ctl.retryLoop([&, &zookeeper = holder.faulty_zookeeper]()
13141320
{
13151321
with_retries.renewZooKeeper(zookeeper);

src/Backups/BackupCoordinationStageSync.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ class BackupCoordinationStageSync
7272
void createRootNodes();
7373

7474
/// Atomically creates both 'start' and 'alive' nodes and also checks that there is no concurrent backup or restore if `allow_concurrency` is false.
75-
void createStartAndAliveNodes();
76-
void createStartAndAliveNodes(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper);
75+
void createStartAndAliveNodesAndCheckConcurrency(BackupConcurrencyCounters & concurrency_counters_);
76+
void createStartAndAliveNodesAndCheckConcurrency(Coordination::ZooKeeperWithFaultInjection::Ptr zookeeper);
7777

7878
/// Deserialize the version of a node stored in the 'start' node.
7979
int parseStartNode(const String & start_node_contents, const String & host) const;
@@ -171,7 +171,7 @@ class BackupCoordinationStageSync
171171
const String alive_node_path;
172172
const String alive_tracker_node_path;
173173

174-
std::optional<BackupConcurrencyCheck> concurrency_check;
174+
std::optional<BackupConcurrencyCheck> local_concurrency_check;
175175

176176
std::shared_ptr<Poco::Event> zk_nodes_changed;
177177

0 commit comments

Comments
 (0)