Skip to content

Commit 830229d

Browse files
committed
[server] Coordinator Server Supports High-Available
1 parent de4f118 commit 830229d

16 files changed

Lines changed: 434 additions & 20 deletions

File tree

fluss-common/src/main/java/com/alibaba/fluss/metrics/MetricNames.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public class MetricNames {
3636
// metrics for coordinator server
3737
// --------------------------------------------------------------------------------------------
3838
public static final String ACTIVE_COORDINATOR_COUNT = "activeCoordinatorCount";
39+
public static final String ALIVE_COORDINATOR_COUNT = "aliveCoordinatorCount";
3940
public static final String ACTIVE_TABLET_SERVER_COUNT = "activeTabletServerCount";
4041
public static final String OFFLINE_BUCKET_COUNT = "offlineBucketCount";
4142
public static final String TABLE_COUNT = "tableCount";

fluss-server/src/main/java/com/alibaba/fluss/server/coordinator/CoordinatorContext.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ public class CoordinatorContext {
6666
// a success deletion.
6767
private final Map<TableBucketReplica, Integer> failDeleteNumbers = new HashMap<>();
6868

69+
private final Set<Integer> liveCoordinatorServers = new HashSet<>();
6970
private final Map<Integer, ServerInfo> liveTabletServers = new HashMap<>();
7071

7172
// a map from the table bucket to the state of the bucket.
@@ -110,6 +111,24 @@ public int getCoordinatorEpoch() {
110111
return coordinatorEpoch;
111112
}
112113

114+
public Set<Integer> getLiveCoordinatorServers() {
115+
return liveCoordinatorServers;
116+
}
117+
118+
@VisibleForTesting
119+
public void setLiveCoordinatorServers(Set<Integer> servers) {
120+
liveCoordinatorServers.clear();
121+
liveCoordinatorServers.addAll(servers);
122+
}
123+
124+
public void addLiveCoordinatorServer(int serverId) {
125+
this.liveCoordinatorServers.add(serverId);
126+
}
127+
128+
public void removeLiveCoordinatorServer(int serverId) {
129+
this.liveCoordinatorServers.remove(serverId);
130+
}
131+
113132
public Map<Integer, ServerInfo> getLiveTabletServers() {
114133
return liveTabletServers;
115134
}

fluss-server/src/main/java/com/alibaba/fluss/server/coordinator/CoordinatorEventProcessor.java

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,17 @@
5050
import com.alibaba.fluss.server.coordinator.event.CoordinatorEventManager;
5151
import com.alibaba.fluss.server.coordinator.event.CreatePartitionEvent;
5252
import com.alibaba.fluss.server.coordinator.event.CreateTableEvent;
53+
import com.alibaba.fluss.server.coordinator.event.DeadCoordinatorServerEvent;
5354
import com.alibaba.fluss.server.coordinator.event.DeadTabletServerEvent;
5455
import com.alibaba.fluss.server.coordinator.event.DeleteReplicaResponseReceivedEvent;
5556
import com.alibaba.fluss.server.coordinator.event.DropPartitionEvent;
5657
import com.alibaba.fluss.server.coordinator.event.DropTableEvent;
5758
import com.alibaba.fluss.server.coordinator.event.EventProcessor;
5859
import com.alibaba.fluss.server.coordinator.event.FencedCoordinatorEvent;
60+
import com.alibaba.fluss.server.coordinator.event.NewCoordinatorServerEvent;
5961
import com.alibaba.fluss.server.coordinator.event.NewTabletServerEvent;
6062
import com.alibaba.fluss.server.coordinator.event.NotifyLeaderAndIsrResponseReceivedEvent;
63+
import com.alibaba.fluss.server.coordinator.event.watcher.CoordinatorServerChangeWatcher;
6164
import com.alibaba.fluss.server.coordinator.event.watcher.TableChangeWatcher;
6265
import com.alibaba.fluss.server.coordinator.event.watcher.TabletServerChangeWatcher;
6366
import com.alibaba.fluss.server.coordinator.statemachine.ReplicaState;
@@ -92,6 +95,7 @@
9295
import javax.annotation.concurrent.NotThreadSafe;
9396

9497
import java.util.ArrayList;
98+
import java.util.Arrays;
9599
import java.util.Collections;
96100
import java.util.HashSet;
97101
import java.util.List;
@@ -128,6 +132,7 @@ public class CoordinatorEventProcessor implements EventProcessor {
128132
private final LakeTableTieringManager lakeTableTieringManager;
129133
private final TableChangeWatcher tableChangeWatcher;
130134
private final CoordinatorChannelManager coordinatorChannelManager;
135+
private final CoordinatorServerChangeWatcher coordinatorServerChangeWatcher;
131136
private final TabletServerChangeWatcher tabletServerChangeWatcher;
132137
private final CoordinatorMetadataCache serverMetadataCache;
133138
private final CoordinatorRequestBatch coordinatorRequestBatch;
@@ -137,6 +142,7 @@ public class CoordinatorEventProcessor implements EventProcessor {
137142
private final CompletedSnapshotStoreManager completedSnapshotStoreManager;
138143

139144
// metrics
145+
private volatile int aliveCoordinatorServerCount;
140146
private volatile int tabletServerCount;
141147
private volatile int offlineBucketCount;
142148
private volatile int tableCount;
@@ -183,6 +189,8 @@ public CoordinatorEventProcessor(
183189
replicaStateMachine,
184190
tableBucketStateMachine,
185191
new RemoteStorageCleaner(conf, ioExecutor));
192+
this.coordinatorServerChangeWatcher =
193+
new CoordinatorServerChangeWatcher(zooKeeperClient, coordinatorEventManager);
186194
this.tableChangeWatcher = new TableChangeWatcher(zooKeeperClient, coordinatorEventManager);
187195
this.tabletServerChangeWatcher =
188196
new TabletServerChangeWatcher(zooKeeperClient, coordinatorEventManager);
@@ -203,6 +211,8 @@ public CoordinatorEventProcessor(
203211

204212
private void registerMetrics() {
205213
coordinatorMetricGroup.gauge(MetricNames.ACTIVE_COORDINATOR_COUNT, () -> 1);
214+
coordinatorMetricGroup.gauge(
215+
MetricNames.ALIVE_COORDINATOR_COUNT, () -> aliveCoordinatorServerCount);
206216
coordinatorMetricGroup.gauge(
207217
MetricNames.ACTIVE_TABLET_SERVER_COUNT, () -> tabletServerCount);
208218
coordinatorMetricGroup.gauge(MetricNames.OFFLINE_BUCKET_COUNT, () -> offlineBucketCount);
@@ -219,6 +229,7 @@ public CoordinatorEventManager getCoordinatorEventManager() {
219229
public void startup() {
220230
coordinatorContext.setCoordinatorServerInfo(getCoordinatorServerInfo());
221231
// start watchers first so that we won't miss node in zk;
232+
coordinatorServerChangeWatcher.start();
222233
tabletServerChangeWatcher.start();
223234
tableChangeWatcher.start();
224235
LOG.info("Initializing coordinator context.");
@@ -257,7 +268,7 @@ public void shutdown() {
257268
private ServerInfo getCoordinatorServerInfo() {
258269
try {
259270
return zooKeeperClient
260-
.getCoordinatorAddress()
271+
.getCoordinatorLeaderAddress()
261272
.map(
262273
coordinatorAddress ->
263274
// TODO we set id to 0 as that CoordinatorServer don't support
@@ -285,6 +296,11 @@ public int getCoordinatorEpoch() {
285296

286297
private void initCoordinatorContext() throws Exception {
287298
long start = System.currentTimeMillis();
299+
// get all coordinator servers
300+
int[] currentCoordinatorServers = zooKeeperClient.getCoordinatorServerList();
301+
coordinatorContext.setLiveCoordinatorServers(
302+
Arrays.stream(currentCoordinatorServers).boxed().collect(Collectors.toSet()));
303+
288304
// get all tablet server's
289305
int[] currentServers = zooKeeperClient.getSortedTabletServerList();
290306
List<ServerInfo> tabletServerInfos = new ArrayList<>();
@@ -441,6 +457,7 @@ private void onShutdown() {
441457
tableManager.shutdown();
442458

443459
// then stop watchers
460+
coordinatorServerChangeWatcher.stop();
444461
tableChangeWatcher.stop();
445462
tabletServerChangeWatcher.stop();
446463
}
@@ -461,6 +478,10 @@ public void process(CoordinatorEvent event) {
461478
(NotifyLeaderAndIsrResponseReceivedEvent) event);
462479
} else if (event instanceof DeleteReplicaResponseReceivedEvent) {
463480
processDeleteReplicaResponseReceived((DeleteReplicaResponseReceivedEvent) event);
481+
} else if (event instanceof NewCoordinatorServerEvent) {
482+
processNewCoordinatorServer((NewCoordinatorServerEvent) event);
483+
} else if (event instanceof DeadCoordinatorServerEvent) {
484+
processDeadCoordinatorServer((DeadCoordinatorServerEvent) event);
464485
} else if (event instanceof NewTabletServerEvent) {
465486
processNewTabletServer((NewTabletServerEvent) event);
466487
} else if (event instanceof DeadTabletServerEvent) {
@@ -505,6 +526,7 @@ public void process(CoordinatorEvent event) {
505526
}
506527

507528
private void updateMetrics() {
529+
aliveCoordinatorServerCount = coordinatorContext.getLiveCoordinatorServers().size();
508530
tabletServerCount = coordinatorContext.getLiveTabletServers().size();
509531
tableCount = coordinatorContext.allTables().size();
510532
bucketCount = coordinatorContext.bucketLeaderAndIsr().size();
@@ -754,10 +776,33 @@ private void onReplicaBecomeOffline(Set<TableBucketReplica> offlineReplicas) {
754776
replicaStateMachine.handleStateChanges(offlineReplicas, OfflineReplica);
755777
}
756778

779+
private void processNewCoordinatorServer(NewCoordinatorServerEvent newCoordinatorServerEvent) {
780+
int coordinatorServerId = newCoordinatorServerEvent.getServerId();
781+
if (coordinatorContext.getLiveCoordinatorServers().contains(coordinatorServerId)) {
782+
return;
783+
}
784+
785+
// process new coordinator server
786+
LOG.info("New coordinator server callback for coordinator server {}", coordinatorServerId);
787+
788+
coordinatorContext.addLiveCoordinatorServer(coordinatorServerId);
789+
}
790+
791+
private void processDeadCoordinatorServer(
792+
DeadCoordinatorServerEvent deadCoordinatorServerEvent) {
793+
int coordinatorServerId = deadCoordinatorServerEvent.getServerId();
794+
if (!coordinatorContext.getLiveCoordinatorServers().contains(coordinatorServerId)) {
795+
return;
796+
}
797+
// process dead coordinator server
798+
LOG.info("Coordinator server failure callback for {}.", coordinatorServerId);
799+
coordinatorContext.removeLiveCoordinatorServer(coordinatorServerId);
800+
}
801+
757802
private void processNewTabletServer(NewTabletServerEvent newTabletServerEvent) {
758803
// NOTE: we won't need to detect bounced tablet servers like Kafka as we won't
759804
// miss the event of tablet server un-register and register again since we can
760-
// listener the children created and deleted in zk node.
805+
// listen the children created and deleted in zk node.
761806

762807
// Also, Kafka use broker epoch to make it can reject the LeaderAndIsrRequest,
763808
// UpdateMetadataRequest and StopReplicaRequest
@@ -776,7 +821,7 @@ private void processNewTabletServer(NewTabletServerEvent newTabletServerEvent) {
776821
// it may happen during coordinator server initiation, the watcher watch a new tablet
777822
// server register event and put it to event manager, but after that, the coordinator
778823
// server read
779-
// all tablet server nodes registered which contain the tablet server a; in this case,
824+
// all tablet server nodes registered which contain the tablet server; in this case,
780825
// we can ignore it.
781826
return;
782827
}

fluss-server/src/main/java/com/alibaba/fluss/server/coordinator/CoordinatorLeaderElection.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,23 +23,43 @@
2323
import com.alibaba.fluss.shaded.curator5.org.apache.curator.framework.recipes.leader.LeaderSelector;
2424
import com.alibaba.fluss.shaded.curator5.org.apache.curator.framework.recipes.leader.LeaderSelectorListener;
2525
import com.alibaba.fluss.shaded.curator5.org.apache.curator.framework.state.ConnectionState;
26+
import com.alibaba.fluss.utils.concurrent.ExecutorThreadFactory;
2627

2728
import org.slf4j.Logger;
2829
import org.slf4j.LoggerFactory;
2930

31+
import java.util.concurrent.Executors;
32+
import java.util.concurrent.ScheduledExecutorService;
33+
import java.util.concurrent.TimeUnit;
34+
3035
/** Using by coordinator server. Coordinator servers listen ZK node and elect leadership. */
3136
public class CoordinatorLeaderElection {
3237
private static final Logger LOG = LoggerFactory.getLogger(CoordinatorLeaderElection.class);
3338

3439
private final CuratorFramework zkClient;
3540
private final int serverId;
41+
private final ScheduledExecutorService executor;
3642

3743
public CoordinatorLeaderElection(CuratorFramework zkClient, int serverId) {
44+
this(
45+
zkClient,
46+
serverId,
47+
Executors.newSingleThreadScheduledExecutor(
48+
new ExecutorThreadFactory("fluss-coordinator-leader-election")));
49+
}
50+
51+
protected CoordinatorLeaderElection(
52+
CuratorFramework zkClient, int serverId, ScheduledExecutorService executor) {
3853
this.zkClient = zkClient;
3954
this.serverId = serverId;
55+
this.executor = executor;
4056
}
4157

42-
public void startElectLeader(Runnable startLeaderServices) {
58+
public void startElectLeader(Runnable initLeaderServices) {
59+
executor.schedule(() -> electLeader(initLeaderServices), 0, TimeUnit.MILLISECONDS);
60+
}
61+
62+
private void electLeader(Runnable initLeaderServices) {
4363
LeaderSelector leaderSelector =
4464
new LeaderSelector(
4565
zkClient,
@@ -50,7 +70,15 @@ public void takeLeadership(CuratorFramework client) {
5070
LOG.info(
5171
"Coordinator server {} win the leader in election now.",
5272
serverId);
53-
startLeaderServices.run();
73+
initLeaderServices.run();
74+
75+
// Do not return, otherwise the leader will be released immediately.
76+
while (true) {
77+
try {
78+
Thread.sleep(1000);
79+
} catch (InterruptedException e) {
80+
}
81+
}
5482
}
5583

5684
@Override
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package com.alibaba.fluss.server.coordinator.event;
19+
20+
import java.util.Objects;
21+
22+
/** An event for coordinator server became dead. */
23+
public class DeadCoordinatorServerEvent implements CoordinatorEvent {
24+
25+
private final int serverId;
26+
27+
public DeadCoordinatorServerEvent(int serverId) {
28+
this.serverId = serverId;
29+
}
30+
31+
public int getServerId() {
32+
return serverId;
33+
}
34+
35+
@Override
36+
public boolean equals(Object o) {
37+
if (this == o) {
38+
return true;
39+
}
40+
if (o == null || getClass() != o.getClass()) {
41+
return false;
42+
}
43+
DeadCoordinatorServerEvent that = (DeadCoordinatorServerEvent) o;
44+
return serverId == that.serverId;
45+
}
46+
47+
@Override
48+
public int hashCode() {
49+
return Objects.hash(serverId);
50+
}
51+
52+
@Override
53+
public String toString() {
54+
return "DeadCoordinatorServerEvent{" + "serverId=" + serverId + '}';
55+
}
56+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package com.alibaba.fluss.server.coordinator.event;
19+
20+
import java.util.Objects;
21+
22+
/** An event for new coordinator server. */
23+
public class NewCoordinatorServerEvent implements CoordinatorEvent {
24+
25+
private final int serverId;
26+
27+
public NewCoordinatorServerEvent(int serverId) {
28+
this.serverId = serverId;
29+
}
30+
31+
public int getServerId() {
32+
return serverId;
33+
}
34+
35+
@Override
36+
public boolean equals(Object o) {
37+
if (this == o) {
38+
return true;
39+
}
40+
if (o == null || getClass() != o.getClass()) {
41+
return false;
42+
}
43+
NewCoordinatorServerEvent that = (NewCoordinatorServerEvent) o;
44+
return serverId == that.serverId;
45+
}
46+
47+
@Override
48+
public int hashCode() {
49+
return Objects.hash(serverId);
50+
}
51+
52+
@Override
53+
public String toString() {
54+
return "NewCoordinatorServerEvent{" + "serverId=" + serverId + '}';
55+
}
56+
}

0 commit comments

Comments
 (0)