Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,12 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
static final int CONTAINER_CLOSE_THREADS_DEFAULT = 3;
static final int BLOCK_DELETE_THREADS_DEFAULT = 5;

public static final String GRPC_MAX_CONNECTIONS_KEY = "hdds.datanode.grpc.max.connections";
public static final int GRPC_MAX_CONNECTIONS_DEFAULT = 5000;

public static final String GRPC_SO_BACKLOG_KEY = "hdds.datanode.grpc.so.backlog";
public static final int GRPC_SO_BACKLOG_DEFAULT = 4096;

public static final String BLOCK_DELETE_COMMAND_WORKER_INTERVAL =
"hdds.datanode.block.delete.command.worker.interval";
public static final Duration BLOCK_DELETE_COMMAND_WORKER_INTERVAL_DEFAULT = Duration.ofSeconds(2);
Expand All @@ -154,6 +160,33 @@ public class DatanodeConfiguration extends ReconfigurableConfig {
)
private int numReadThreadPerVolume = 10;

/**
* Maximum number of concurrent gRPC connections to the Datanode server.
*/
@Config(key = "hdds.datanode.grpc.max.connections",
type = ConfigType.INT,
defaultValue = "5000",
tags = {DATANODE},
description = "Maximum number of concurrent gRPC connections to the " +
"Datanode server. This helps prevent file descriptor exhaustion. " +
"Set to 0 to disable the limit."
)
private int grpcMaxConnections = GRPC_MAX_CONNECTIONS_DEFAULT;

/**
* SO_BACKLOG value for the gRPC server socket.
*/
@Config(key = "hdds.datanode.grpc.so.backlog",
type = ConfigType.INT,
defaultValue = "4096",
tags = {DATANODE},
description = "The SO_BACKLOG value for the Datanode gRPC server socket. " +
"This limits the number of pending connections in the kernel's " +
"accept queue. When this limit is reached, the kernel will reject " +
"new connection attempts with SYN drops."
)
private int grpcSoBacklog = GRPC_SO_BACKLOG_DEFAULT;

/**
* The maximum number of threads used to delete containers on a datanode
* simultaneously.
Expand Down Expand Up @@ -1212,4 +1245,20 @@ static long getDefaultFreeSpace() {
final StorageSize measure = StorageSize.parse(HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_DEFAULT);
return Math.round(measure.getUnit().toBytes(measure.getValue()));
}

public int getGrpcMaxConnections() {
return grpcMaxConnections;
}

public void setGrpcMaxConnections(int grpcMaxConnections) {
this.grpcMaxConnections = grpcMaxConnections;
}

public int getGrpcSoBacklog() {
return grpcSoBacklog;
}

public void setGrpcSoBacklog(int grpcSoBacklog) {
this.grpcSoBacklog = grpcSoBacklog;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.ozone.container.common.transport.server;

import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.ratis.thirdparty.io.grpc.Attributes;
import org.apache.ratis.thirdparty.io.grpc.ServerTransportFilter;
import org.apache.ratis.thirdparty.io.grpc.Status;
import org.apache.ratis.thirdparty.io.grpc.StatusRuntimeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* A gRPC ServerTransportFilter that limits the number of concurrent
* connections to the Datanode gRPC server.
*
* <p>When the connection limit is reached, new connections are rejected
* with a RESOURCE_EXHAUSTED status, preventing file descriptor exhaustion.
*/
public class GrpcConnectionLimitFilter extends ServerTransportFilter {

private static final Logger LOG =
LoggerFactory.getLogger(GrpcConnectionLimitFilter.class);

private final int maxConnections;
private final AtomicInteger activeConnections = new AtomicInteger(0);
private final AtomicLong totalAcceptedConnections = new AtomicLong(0);
private final AtomicLong totalRejectedConnections = new AtomicLong(0);

/**
* Creates a new connection limit filter.
*
* @param maxConnections the maximum number of concurrent connections allowed.
* If zero or negative, no limit is enforced.
*/
public GrpcConnectionLimitFilter(int maxConnections) {
this.maxConnections = maxConnections;
}

/**
* {@inheritDoc}
*/
@Override
public Attributes transportReady(Attributes transportAttrs) {
if (maxConnections <= 0) {
return super.transportReady(transportAttrs);
}

int current = activeConnections.incrementAndGet();
if (current > maxConnections) {
activeConnections.decrementAndGet();
totalRejectedConnections.incrementAndGet();
LOG.warn("Connection rejected: limit {} reached, current active: {}",
maxConnections, current - 1);
throw new StatusRuntimeException(
Status.RESOURCE_EXHAUSTED.withDescription(
"Datanode connection limit exceeded: " + maxConnections));
}

totalAcceptedConnections.incrementAndGet();
if (LOG.isDebugEnabled()) {
LOG.debug("Connection accepted: active connections: {}/{}",
current, maxConnections);
}
return super.transportReady(transportAttrs);
}

/**
* {@inheritDoc}
*/
@Override
public void transportTerminated(Attributes transportAttrs) {
if (maxConnections > 0) {
int current = activeConnections.decrementAndGet();
if (LOG.isDebugEnabled()) {
LOG.debug("Connection terminated: active connections: {}/{}",
current, maxConnections);
}
}
super.transportTerminated(transportAttrs);
}

/**
* Returns the current number of active connections.
* @return the number of active connections
*/
public int getActiveConnections() {
return activeConnections.get();
}

/**
* Returns the total number of connections accepted since server start.
* @return the total accepted connections count
*/
public long getTotalAcceptedConnections() {
return totalAcceptedConnections.get();
}

/**
* Returns the total number of connections rejected since server start.
* @return the total rejected connections count
*/
public long getTotalRejectedConnections() {
return totalRejectedConnections.get();
}

/**
* Returns the configured maximum number of connections.
* @return the maximum connections limit
*/
public int getMaxConnections() {
return maxConnections;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import org.apache.ratis.thirdparty.io.grpc.ServerInterceptors;
import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts;
import org.apache.ratis.thirdparty.io.grpc.netty.NettyServerBuilder;
import org.apache.ratis.thirdparty.io.netty.channel.ChannelOption;
import org.apache.ratis.thirdparty.io.netty.channel.EventLoopGroup;
import org.apache.ratis.thirdparty.io.netty.channel.ServerChannel;
import org.apache.ratis.thirdparty.io.netty.channel.epoll.Epoll;
Expand Down Expand Up @@ -97,11 +98,17 @@ public XceiverServerGrpc(DatanodeDetails datanodeDetails,
this.port = 0;
}

final int threadCountPerDisk =
conf.getObject(DatanodeConfiguration.class).getNumReadThreadPerVolume();
DatanodeConfiguration dnConf = conf.getObject(DatanodeConfiguration.class);
final int threadCountPerDisk = dnConf.getNumReadThreadPerVolume();
final int numberOfDisks =
HddsServerUtil.getDatanodeStorageDirs(conf).size();
final int poolSize = threadCountPerDisk * numberOfDisks;
final int maxConnections = dnConf.getGrpcMaxConnections();
final int soBacklog = dnConf.getGrpcSoBacklog();
GrpcConnectionLimitFilter connectionLimitFilter =
new GrpcConnectionLimitFilter(maxConnections);
LOG.info("Datanode gRPC server max connections: {}, SO_BACKLOG: {}",
maxConnections > 0 ? maxConnections : "unlimited", soBacklog);

readExecutors = new ThreadPoolExecutor(poolSize, poolSize,
60, TimeUnit.SECONDS,
Expand Down Expand Up @@ -133,10 +140,12 @@ public XceiverServerGrpc(DatanodeDetails datanodeDetails,
.bossEventLoopGroup(eventLoopGroup)
.workerEventLoopGroup(eventLoopGroup)
.channelType(channelType)
.withOption(ChannelOption.SO_BACKLOG, soBacklog)
.executor(readExecutors)
.addService(ServerInterceptors.intercept(
xceiverService.bindServiceWithZeroCopy(),
new GrpcServerInterceptor()));
new GrpcServerInterceptor()))
.addTransportFilter(connectionLimitFilter);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@YutaLin , thanks for working on this.

Could you try use NettyServerBuilder's withChildOption, to set ChannelOption.SO_BACKLOG?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ChenSammi, thanks for the review,
I've added so_backlog to prevent burst connections, also keep connection limit filter to control sustained high connection.


SecurityConfig secConf = new SecurityConfig(conf);
if (secConf.isSecurityEnabled() && secConf.isGrpcTlsEnabled()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.FAILED_DB_VOLUMES_TOLERATED_KEY;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.FAILED_METADATA_VOLUMES_TOLERATED_KEY;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.FAILED_VOLUMES_TOLERATED_DEFAULT;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.GRPC_MAX_CONNECTIONS_DEFAULT;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.GRPC_MAX_CONNECTIONS_KEY;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.GRPC_SO_BACKLOG_DEFAULT;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.GRPC_SO_BACKLOG_KEY;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT_DEFAULT;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT;
import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.PERIODIC_DISK_CHECK_INTERVAL_MINUTES_KEY;
Expand Down Expand Up @@ -283,4 +287,74 @@ static void assertWaitTimeMin(TimeDuration expected,
assertEquals(expected, t,
RaftServerConfigKeys.Log.Appender.WAIT_TIME_MIN_KEY);
}

@Test
void testGrpcMaxConnectionsDefault() {
OzoneConfiguration conf = new OzoneConfiguration();

DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class);

assertEquals(GRPC_MAX_CONNECTIONS_DEFAULT, subject.getGrpcMaxConnections());
}

@Test
void testGrpcMaxConnectionsCustomValue() {
OzoneConfiguration conf = new OzoneConfiguration();
int customMaxConnections = 10000;
conf.setInt(GRPC_MAX_CONNECTIONS_KEY, customMaxConnections);

DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class);

assertEquals(customMaxConnections, subject.getGrpcMaxConnections());
}

@Test
void testGrpcMaxConnectionsZeroDisablesLimit() {
OzoneConfiguration conf = new OzoneConfiguration();
conf.setInt(GRPC_MAX_CONNECTIONS_KEY, 0);

DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class);

assertEquals(0, subject.getGrpcMaxConnections());
}

@Test
void testGrpcMaxConnectionsSetter() {
OzoneConfiguration conf = new OzoneConfiguration();
DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class);

subject.setGrpcMaxConnections(8000);

assertEquals(8000, subject.getGrpcMaxConnections());
}

@Test
void testGrpcSoBacklogDefault() {
OzoneConfiguration conf = new OzoneConfiguration();

DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class);

assertEquals(GRPC_SO_BACKLOG_DEFAULT, subject.getGrpcSoBacklog());
}

@Test
void testGrpcSoBacklogCustomValue() {
OzoneConfiguration conf = new OzoneConfiguration();
int customSoBacklog = 256;
conf.setInt(GRPC_SO_BACKLOG_KEY, customSoBacklog);

DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class);

assertEquals(customSoBacklog, subject.getGrpcSoBacklog());
}

@Test
void testGrpcSoBacklogSetter() {
OzoneConfiguration conf = new OzoneConfiguration();
DatanodeConfiguration subject = conf.getObject(DatanodeConfiguration.class);

subject.setGrpcSoBacklog(512);

assertEquals(512, subject.getGrpcSoBacklog());
}
}
Loading