Skip to content

Commit 89a5def

Browse files
authored
Fix NetAcceptAction::cancel() use-after-free race condition (#12803)
Fix a race condition between NetAcceptAction::cancel() and NetAccept::acceptEvent() where the server pointer could be dereferenced after the NetAccept object was deleted. The race occurred as follows: 1. Thread T4 calls NetAcceptAction::cancel(), sets cancelled=true 2. Thread T3 running acceptEvent() sees cancelled==true 3. Thread T3 deletes the NetAccept (including embedded Server) 4. Thread T4 tries to call server->close() on freed memory The fix uses std::atomic<Server*> with atomic exchange to ensure only one thread can successfully obtain and use the server pointer. Both cancel() and the cleanup paths before delete this atomically exchange the pointer with nullptr - whichever succeeds first closes the server, the other becomes a no-op. This addresses the TODO comment that was in the code: "// TODO fix race between cancel accept and call back" ASAN report this fixes (seen intermittently on rocky CI builds): ==8850==ERROR: AddressSanitizer: heap-use-after-free on address 0x616000028cb4 at pc 0x000001346739 bp 0x7fa40fd2f580 sp 0x7fa40fd2f570 WRITE of size 4 at 0x616000028cb4 thread T4 ([ET_NET 2]) #0 0x1346738 in UnixSocket::close() ../src/iocore/eventsystem/UnixSocket.cc:138 #1 0x12b44ed in Server::close() ../src/iocore/net/Server.cc:88 #2 0x121fb95 in NetAcceptAction::cancel(Continuation*) ../src/iocore/net/P_NetAccept.h:71 #3 0x7fa41686d082 in TSActionCancel(tsapi_action*) ../src/api/InkAPI.cc:5828 ... 0x616000028cb4 is located 308 bytes inside of 576-byte region [0x616000028b80,0x616000028dc0) freed by thread T3 ([ET_NET 1]) here: #0 0x7fa416d2036f in operator delete(void*, unsigned long) #1 0x12593c4 in NetAccept::~NetAccept() ../src/iocore/net/P_NetAccept.h:128 #2 0x12bebf0 in NetAccept::acceptEvent(int, void*) ../src/iocore/net/UnixNetAccept.cc:484 ...
1 parent a5363d2 commit 89a5def

4 files changed

Lines changed: 24 additions & 16 deletions

File tree

src/iocore/net/P_NetAccept.h

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "iocore/net/NetAcceptEventIO.h"
4444
#include "Server.h"
4545

46+
#include <atomic>
4647
#include <vector>
4748

4849
struct NetAccept;
@@ -60,21 +61,29 @@ AcceptFunction net_accept;
6061

6162
class UnixNetVConnection;
6263

63-
// TODO fix race between cancel accept and call back
6464
struct NetAcceptAction : public Action, public RefCountObjInHeap {
65-
Server *server;
65+
std::atomic<Server *> server{nullptr};
6666

67-
void
68-
cancel(Continuation *cont = nullptr) override
67+
NetAcceptAction(Continuation *cont, Server *s)
6968
{
70-
Action::cancel(cont);
71-
server->close();
69+
continuation = cont;
70+
if (cont != nullptr) {
71+
mutex = cont->mutex;
72+
}
73+
server.store(s, std::memory_order_release);
7274
}
7375

74-
Continuation *
75-
operator=(Continuation *acont)
76+
void
77+
cancel(Continuation *cont = nullptr) override
7678
{
77-
return Action::operator=(acont);
79+
// Close the server before setting the cancelled flag. This ensures that
80+
// when acceptEvent() sees cancelled == true, the server close is already
81+
// complete, preventing use-after-free races.
82+
Server *s = server.exchange(nullptr, std::memory_order_acq_rel);
83+
if (s != nullptr) {
84+
s->close();
85+
}
86+
Action::cancel(cont);
7887
}
7988

8089
~NetAcceptAction() override

src/iocore/net/QUICNetProcessor.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -251,9 +251,7 @@ QUICNetProcessor::main_accept(Continuation *cont, SOCKET fd, AcceptOptions const
251251
na->server.sock = UnixSocket{fd};
252252
ats_ip_copy(&na->server.accept_addr, &accept_ip);
253253

254-
na->action_ = new NetAcceptAction();
255-
*na->action_ = cont;
256-
na->action_->server = &na->server;
254+
na->action_ = new NetAcceptAction(cont, &na->server);
257255
na->init_accept();
258256

259257
return na->action_.get();

src/iocore/net/UnixNetAccept.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@ NetAccept::acceptEvent(int event, void *ep)
479479
MUTEX_TRY_LOCK(lock, m, e->ethread);
480480
if (lock.is_locked()) {
481481
if (action_->cancelled) {
482+
// Server was already closed by whoever called cancel().
482483
e->cancel();
483484
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
484485
delete this;
@@ -487,6 +488,7 @@ NetAccept::acceptEvent(int event, void *ep)
487488

488489
int res;
489490
if ((res = net_accept(this, e, false)) < 0) {
491+
action_->cancel();
490492
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
491493
/* INKqa11179 */
492494
Warning("Accept on port %d failed with error no %d", ats_ip_port_host_order(&server.addr), res);
@@ -637,7 +639,7 @@ NetAccept::acceptFastEvent(int event, void *ep)
637639
return EVENT_CONT;
638640

639641
Lerror:
640-
server.close();
642+
action_->cancel();
641643
e->cancel();
642644
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
643645
delete this;
@@ -656,6 +658,7 @@ NetAccept::acceptLoopEvent(int event, Event *e)
656658
}
657659

658660
// Don't think this ever happens ...
661+
action_->cancel();
659662
Metrics::Gauge::decrement(net_rsb.accepts_currently_open);
660663
delete this;
661664
return EVENT_DONE;

src/iocore/net/UnixNetProcessor.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,7 @@ UnixNetProcessor::accept_internal(Continuation *cont, int fd, AcceptOptions cons
133133
na->proxyPort = sa ? sa->proxyPort : nullptr;
134134
na->snpa = dynamic_cast<SSLNextProtocolAccept *>(cont);
135135

136-
na->action_ = new NetAcceptAction();
137-
*na->action_ = cont;
138-
na->action_->server = &na->server;
136+
na->action_ = new NetAcceptAction(cont, &na->server);
139137

140138
if (opt.frequent_accept) { // true
141139
if (accept_threads > 0 && listen_per_thread == 0) {

0 commit comments

Comments
 (0)