Skip to content

Commit 2c0925c

Browse files
authored
Merge pull request #10293 from Icinga/graceful-tls-disconnect-214
Add a dedicated method for disconnecting TLS connections
2 parents ee98a9e + 2bc1c8e commit 2c0925c

12 files changed

Lines changed: 345 additions & 114 deletions

lib/base/io-engine.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,14 @@ void AsioConditionVariable::Wait(boost::asio::yield_context yc)
146146
m_Timer.async_wait(yc[ec]);
147147
}
148148

149+
/**
150+
* Cancels any pending timeout callback.
151+
*
152+
* Must be called in the strand in which the callback was scheduled!
153+
*/
149154
void Timeout::Cancel()
150155
{
151-
m_Cancelled.store(true);
156+
m_Cancelled->store(true);
152157

153158
boost::system::error_code ec;
154159
m_Timer.cancel(ec);

lib/base/io-engine.hpp

Lines changed: 63 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
#ifndef IO_ENGINE_H
44
#define IO_ENGINE_H
55

6+
#include "base/atomic.hpp"
7+
#include "base/debug.hpp"
68
#include "base/exception.hpp"
79
#include "base/lazy-init.hpp"
810
#include "base/logger.hpp"
9-
#include "base/shared-object.hpp"
11+
#include "base/shared.hpp"
1012
#include <atomic>
1113
#include <exception>
1214
#include <memory>
@@ -163,51 +165,80 @@ class AsioConditionVariable
163165
/**
164166
* I/O timeout emulator
165167
*
168+
* This class provides a workaround for Boost.ASIO's lack of built-in timeout support.
169+
* While Boost.ASIO handles asynchronous operations, it does not natively support timeouts for these operations.
170+
* This class uses a boost::asio::deadline_timer to emulate a timeout by scheduling a callback to be triggered
171+
* after a specified duration, effectively adding timeout behavior where none exists.
172+
* The callback is executed within the provided strand, ensuring thread-safety.
173+
*
174+
* The constructor returns immediately after scheduling the timeout callback.
175+
* The callback itself is invoked asynchronously when the timeout occurs.
176+
* This allows the caller to continue execution while the timeout is running in the background.
177+
*
178+
* The class provides a Cancel() method to unschedule any pending callback. If the callback has already been run,
179+
* calling Cancel() has no effect. This method can be used to abort the timeout early if the monitored operation
180+
* completes before the callback has been run. The Timeout destructor also automatically cancels any pending callback.
181+
* A callback is considered pending even if the timeout has already expired,
182+
* but the callback has not been executed yet due to a busy strand.
183+
*
166184
* @ingroup base
167185
*/
168-
class Timeout : public SharedObject
186+
class Timeout
169187
{
170188
public:
171-
DECLARE_PTR_TYPEDEFS(Timeout);
172-
173-
template<class Executor, class TimeoutFromNow, class OnTimeout>
174-
Timeout(boost::asio::io_context& io, Executor& executor, TimeoutFromNow timeoutFromNow, OnTimeout onTimeout)
175-
: m_Timer(io)
189+
using Timer = boost::asio::deadline_timer;
190+
191+
/**
192+
* Schedules onTimeout to be triggered after timeoutFromNow on strand.
193+
*
194+
* @param strand The strand in which the callback will be executed.
195+
* The caller must also run in this strand, as well as Cancel() and the destructor!
196+
* @param timeoutFromNow The duration after which the timeout callback will be triggered.
197+
* @param onTimeout The callback to invoke when the timeout occurs.
198+
*/
199+
template<class OnTimeout>
200+
Timeout(boost::asio::io_context::strand& strand, const Timer::duration_type& timeoutFromNow, OnTimeout onTimeout)
201+
: m_Timer(strand.context(), timeoutFromNow), m_Cancelled(Shared<Atomic<bool>>::Make(false))
176202
{
177-
Ptr keepAlive (this);
178-
179-
m_Cancelled.store(false);
180-
m_Timer.expires_from_now(std::move(timeoutFromNow));
181-
182-
IoEngine::SpawnCoroutine(executor, [this, keepAlive, onTimeout](boost::asio::yield_context yc) {
183-
if (m_Cancelled.load()) {
184-
return;
185-
}
203+
VERIFY(strand.running_in_this_thread());
186204

187-
{
188-
boost::system::error_code ec;
189-
190-
m_Timer.async_wait(yc[ec]);
191-
192-
if (ec) {
193-
return;
205+
m_Timer.async_wait(boost::asio::bind_executor(
206+
strand, [cancelled = m_Cancelled, onTimeout = std::move(onTimeout)](boost::system::error_code ec) {
207+
if (!ec && !cancelled->load()) {
208+
onTimeout();
194209
}
195210
}
211+
));
212+
}
196213

197-
if (m_Cancelled.load()) {
198-
return;
199-
}
200-
201-
auto f (onTimeout);
202-
f(std::move(yc));
203-
});
214+
Timeout(const Timeout&) = delete;
215+
Timeout(Timeout&&) = delete;
216+
Timeout& operator=(const Timeout&) = delete;
217+
Timeout& operator=(Timeout&&) = delete;
218+
219+
/**
220+
* Cancels any pending timeout callback.
221+
*
222+
* Must be called in the strand in which the callback was scheduled!
223+
*/
224+
~Timeout()
225+
{
226+
Cancel();
204227
}
205228

206229
void Cancel();
207230

208231
private:
209-
boost::asio::deadline_timer m_Timer;
210-
std::atomic<bool> m_Cancelled;
232+
Timer m_Timer;
233+
234+
/**
235+
* Indicates whether the Timeout has been cancelled.
236+
*
237+
* This must be Shared<> between the lambda in the constructor and Cancel() for the case
238+
* the destructor calls Cancel() while the lambda is already queued in the strand.
239+
* The whole Timeout instance can't be kept alive by the lambda because this would delay the destructor.
240+
*/
241+
Shared<Atomic<bool>>::Ptr m_Cancelled;
211242
};
212243

213244
}

lib/base/tlsstream.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include "base/logger.hpp"
88
#include "base/configuration.hpp"
99
#include "base/convert.hpp"
10+
#include "base/defer.hpp"
11+
#include "base/io-engine.hpp"
1012
#include <boost/asio/ssl/context.hpp>
1113
#include <boost/asio/ssl/verify_context.hpp>
1214
#include <boost/asio/ssl/verify_mode.hpp>
@@ -103,3 +105,62 @@ void UnbufferedAsioTlsStream::BeforeHandshake(handshake_type type)
103105
}
104106
#endif /* SSL_CTRL_SET_TLSEXT_HOSTNAME */
105107
}
108+
109+
/**
110+
* Forcefully close the connection, typically (details are up to the operating system) using a TCP RST.
111+
*/
112+
void AsioTlsStream::ForceDisconnect()
113+
{
114+
if (!lowest_layer().is_open()) {
115+
// Already disconnected, nothing to do.
116+
return;
117+
}
118+
119+
boost::system::error_code ec;
120+
121+
// Close the socket. In case the connection wasn't shut down cleanly by GracefulDisconnect(), the operating system
122+
// will typically terminate the connection with a TCP RST. Otherwise, this just releases the file descriptor.
123+
lowest_layer().close(ec);
124+
}
125+
126+
/**
127+
* Try to cleanly shut down the connection. This involves sending a TLS close_notify shutdown alert and terminating the
128+
* underlying TCP connection. Sending these additional messages can block, hence the method takes a yield context and
129+
* internally implements a timeout of 10 seconds for the operation after which the connection is forcefully terminated
130+
* using ForceDisconnect().
131+
*
132+
* @param strand Asio strand used for other operations on this connection.
133+
* @param yc Yield context for Asio coroutines
134+
*/
135+
void AsioTlsStream::GracefulDisconnect(boost::asio::io_context::strand& strand, boost::asio::yield_context& yc)
136+
{
137+
if (!lowest_layer().is_open()) {
138+
// Already disconnected, nothing to do.
139+
return;
140+
}
141+
142+
{
143+
Timeout shutdownTimeout (strand, boost::posix_time::seconds(10),
144+
[this] {
145+
// Forcefully terminate the connection if async_shutdown() blocked more than 10 seconds.
146+
ForceDisconnect();
147+
}
148+
);
149+
150+
// Close the TLS connection, effectively uses SSL_shutdown() to send a close_notify shutdown alert to the peer.
151+
boost::system::error_code ec;
152+
next_layer().async_shutdown(yc[ec]);
153+
}
154+
155+
if (!lowest_layer().is_open()) {
156+
// Connection got closed in the meantime, most likely by the timeout, so nothing more to do.
157+
return;
158+
}
159+
160+
// Shut down the TCP connection.
161+
boost::system::error_code ec;
162+
lowest_layer().shutdown(lowest_layer_type::shutdown_both, ec);
163+
164+
// Clean up the connection (closes the file descriptor).
165+
ForceDisconnect();
166+
}

lib/base/tlsstream.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ class AsioTlsStream : public boost::asio::buffered_stream<UnbufferedAsioTlsStrea
111111
{
112112
}
113113

114+
void ForceDisconnect();
115+
void GracefulDisconnect(boost::asio::io_context::strand& strand, boost::asio::yield_context& yc);
116+
114117
private:
115118
inline
116119
AsioTlsStream(UnbufferedAsioTlsStreamParams init)

lib/icingadb/redisconnection.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,6 @@ void RedisConnection::Connect(asio::yield_context& yc)
318318
auto conn (Shared<AsioTlsStream>::Make(m_Strand.context(), *m_TLSContext, m_Host));
319319
auto& tlsConn (conn->next_layer());
320320
auto connectTimeout (MakeTimeout(conn));
321-
Defer cancelTimeout ([&connectTimeout]() { connectTimeout->Cancel(); });
322321

323322
icinga::Connect(conn->lowest_layer(), m_Host, Convert::ToString(m_Port), yc);
324323
tlsConn.async_handshake(tlsConn.client, yc);
@@ -348,7 +347,6 @@ void RedisConnection::Connect(asio::yield_context& yc)
348347

349348
auto conn (Shared<TcpConn>::Make(m_Strand.context()));
350349
auto connectTimeout (MakeTimeout(conn));
351-
Defer cancelTimeout ([&connectTimeout]() { connectTimeout->Cancel(); });
352350

353351
icinga::Connect(conn->next_layer(), m_Host, Convert::ToString(m_Port), yc);
354352
Handshake(conn, yc);
@@ -361,7 +359,6 @@ void RedisConnection::Connect(asio::yield_context& yc)
361359

362360
auto conn (Shared<UnixConn>::Make(m_Strand.context()));
363361
auto connectTimeout (MakeTimeout(conn));
364-
Defer cancelTimeout ([&connectTimeout]() { connectTimeout->Cancel(); });
365362

366363
conn->next_layer().async_connect(Unix::endpoint(m_Path.CStr()), yc);
367364
Handshake(conn, yc);

lib/icingadb/redisconnection.hpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ namespace icinga
222222
void Handshake(StreamPtr& stream, boost::asio::yield_context& yc);
223223

224224
template<class StreamPtr>
225-
Timeout::Ptr MakeTimeout(StreamPtr& stream);
225+
Timeout MakeTimeout(StreamPtr& stream);
226226

227227
String m_Path;
228228
String m_Host;
@@ -509,15 +509,12 @@ void RedisConnection::Handshake(StreamPtr& strm, boost::asio::yield_context& yc)
509509
* @param stream Redis server connection
510510
*/
511511
template<class StreamPtr>
512-
Timeout::Ptr RedisConnection::MakeTimeout(StreamPtr& stream)
512+
Timeout RedisConnection::MakeTimeout(StreamPtr& stream)
513513
{
514-
Ptr keepAlive (this);
515-
516-
return new Timeout(
517-
m_Strand.context(),
514+
return Timeout(
518515
m_Strand,
519516
boost::posix_time::microseconds(intmax_t(m_ConnectTimeout * 1000000)),
520-
[keepAlive, stream](boost::asio::yield_context yc) {
517+
[stream] {
521518
boost::system::error_code ec;
522519
stream->lowest_layer().cancel(ec);
523520
}

lib/methods/ifwapichecktask.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ static void DoIfwNetIo(
102102
}
103103

104104
{
105+
// Using async_shutdown() instead of AsioTlsStream::GracefulDisconnect() as this whole function
106+
// is already guarded by a timeout based on the check timeout.
105107
boost::system::error_code ec;
106108
sslConn.async_shutdown(yc[ec]);
107109
}
@@ -454,8 +456,8 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
454456
IoEngine::SpawnCoroutine(
455457
*strand,
456458
[strand, checkable, cr, psCommand, psHost, expectedSan, psPort, conn, req, checkTimeout, reportResult = std::move(reportResult)](asio::yield_context yc) {
457-
Timeout::Ptr timeout = new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(checkTimeout * 1e6)),
458-
[&conn, &checkable](boost::asio::yield_context yc) {
459+
Timeout timeout (*strand, boost::posix_time::microseconds(int64_t(checkTimeout * 1e6)),
460+
[&conn, &checkable] {
459461
Log(LogNotice, "IfwApiCheckTask")
460462
<< "Timeout while checking " << checkable->GetReflectionType()->GetName()
461463
<< " '" << checkable->GetName() << "', cancelling attempt";
@@ -465,8 +467,6 @@ void IfwApiCheckTask::ScriptFunc(const Checkable::Ptr& checkable, const CheckRes
465467
}
466468
);
467469

468-
Defer cancelTimeout ([&timeout]() { timeout->Cancel(); });
469-
470470
DoIfwNetIo(yc, cr, psCommand, psHost, expectedSan, psPort, *conn, *req);
471471

472472
cr->SetExecutionEnd(Utility::GetTime());

lib/remote/apilistener.cpp

Lines changed: 12 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -534,16 +534,15 @@ void ApiListener::ListenerCoroutineProc(boost::asio::yield_context yc, const Sha
534534
auto strand (Shared<asio::io_context::strand>::Make(io));
535535

536536
IoEngine::SpawnCoroutine(*strand, [this, strand, sslConn, remoteEndpoint](asio::yield_context yc) {
537-
Timeout::Ptr timeout(new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
538-
[sslConn, remoteEndpoint](asio::yield_context yc) {
537+
Timeout timeout (*strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
538+
[sslConn, remoteEndpoint] {
539539
Log(LogWarning, "ApiListener")
540540
<< "Timeout while processing incoming connection from " << remoteEndpoint;
541541

542542
boost::system::error_code ec;
543543
sslConn->lowest_layer().cancel(ec);
544544
}
545-
));
546-
Defer cancelTimeout([timeout]() { timeout->Cancel(); });
545+
);
547546

548547
NewClientHandler(yc, strand, sslConn, String(), RoleServer);
549548
});
@@ -585,17 +584,16 @@ void ApiListener::AddConnection(const Endpoint::Ptr& endpoint)
585584

586585
lock.unlock();
587586

588-
Timeout::Ptr timeout(new Timeout(strand->context(), *strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
589-
[sslConn, endpoint, host, port](asio::yield_context yc) {
587+
Timeout timeout (*strand, boost::posix_time::microseconds(int64_t(GetConnectTimeout() * 1e6)),
588+
[sslConn, endpoint, host, port] {
590589
Log(LogCritical, "ApiListener")
591590
<< "Timeout while reconnecting to endpoint '" << endpoint->GetName() << "' via host '" << host
592591
<< "' and port '" << port << "', cancelling attempt";
593592

594593
boost::system::error_code ec;
595594
sslConn->lowest_layer().cancel(ec);
596595
}
597-
));
598-
Defer cancelTimeout([&timeout]() { timeout->Cancel(); });
596+
);
599597

600598
Connect(sslConn->lowest_layer(), host, port, yc);
601599

@@ -683,19 +681,16 @@ void ApiListener::NewClientHandlerInternal(
683681
boost::system::error_code ec;
684682

685683
{
686-
Timeout::Ptr handshakeTimeout (new Timeout(
687-
strand->context(),
684+
Timeout handshakeTimeout (
688685
*strand,
689686
boost::posix_time::microseconds(intmax_t(Configuration::TlsHandshakeTimeout * 1000000)),
690-
[strand, client](asio::yield_context yc) {
687+
[client] {
691688
boost::system::error_code ec;
692689
client->lowest_layer().cancel(ec);
693690
}
694-
));
691+
);
695692

696693
sslConn.async_handshake(role == RoleClient ? sslConn.client : sslConn.server, yc[ec]);
697-
698-
handshakeTimeout->Cancel();
699694
}
700695

701696
if (ec) {
@@ -719,6 +714,9 @@ void ApiListener::NewClientHandlerInternal(
719714
// Ignore the error, but do not throw an exception being swallowed at all cost.
720715
// https://github.com/Icinga/icinga2/issues/7351
721716
boost::system::error_code ec;
717+
718+
// Using async_shutdown() instead of AsioTlsStream::GracefulDisconnect() as this whole function
719+
// is already guarded by a timeout based on the connect timeout.
722720
sslConn.async_shutdown(yc[ec]);
723721
}
724722
});

0 commit comments

Comments
 (0)