From e29f6546d051c0d473bca08221cbe47f8b4df79b Mon Sep 17 00:00:00 2001 From: bneradt Date: Fri, 8 May 2026 17:03:53 -0500 Subject: [PATCH] Fix AIO callback from_api completion lifetime The AIOCallback io_complete handler used a member variable, from_api, to determine whether to delete itself. The problem is that in the situation where in the course of processing the function `this` was already deleted, the use of the from_api variable was, by definition, a use after free. If built under ASan, this resulted in a use-after-free assertion. Concretely, we were seeing this in docs during cache stripe initialization after an unclean shutdown. If the on-disk cache directory was dirty, startup recovery scanned the data area, cleared directory entries for the uncertain range, and wrote the repaired directory back out. The temporary AIO callbacks for that recovery wrote live in StripeInitInfo. When the recovery write completion was delivered, StripeSM::handle_recover_write_dir() deleted StripeInitInfo, which destroyed the AIOCallback object whose AIOCallback::io_complete() frame was still returning. At that point, the use of from_api was a use after free. This snapshots the API-owned callback flag before dispatching the completion and uses that local value for the post-callback cleanup. This also adds a focused regression test for completion handlers that release the callback owner before AIOCallback::io_complete() returns. Introduced in #13027 --- include/iocore/aio/AIO.h | 4 +- src/iocore/aio/AIO.cc | 7 +- src/iocore/aio/CMakeLists.txt | 4 ++ src/iocore/aio/unit_tests/test_AIOCallback.cc | 67 +++++++++++++++++++ 4 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 src/iocore/aio/unit_tests/test_AIOCallback.cc diff --git a/include/iocore/aio/AIO.h b/include/iocore/aio/AIO.h index e2b3bbf301a..22396753ecb 100644 --- a/include/iocore/aio/AIO.h +++ b/include/iocore/aio/AIO.h @@ -82,8 +82,8 @@ struct AIOCallback : public Continuation { int64_t aio_result = 0; AIO_Reqs *aio_req = nullptr; ink_hrtime sleep_time = 0; - bool from_api = false; - SLINK(AIOCallback, alink); /* for AIO_Reqs::aio_temp_list */ + bool from_api = false; ///< Whether the TS API created this callback. + SLINK(AIOCallback, alink); /* for AIO_Reqs::aio_temp_list */ #if TS_USE_LINUX_IO_URING iovec iov = {}; // this is to support older kernels that only support readv/writev AIOCallback *this_op = nullptr; diff --git a/src/iocore/aio/AIO.cc b/src/iocore/aio/AIO.cc index f8d63d3f3b1..251cb2b84b1 100644 --- a/src/iocore/aio/AIO.cc +++ b/src/iocore/aio/AIO.cc @@ -85,6 +85,11 @@ AIOCallback::io_complete(int event, void *data) { (void)event; (void)data; + // Store from_api's value ahead of time because handling the event in the + // midst of this function may delete `this`, making using from_api itself a + // use-after-free later. + bool const should_delete_self = from_api; + if (aio_err_callback && !ok()) { AIOCallback *err_op = new AIOCallback(); err_op->aiocb.aio_fildes = this->aiocb.aio_fildes; @@ -99,7 +104,7 @@ AIOCallback::io_complete(int event, void *data) if (!action.cancelled && action.continuation) { action.continuation->handleEvent(AIO_EVENT_DONE, this); } - if (from_api) { + if (should_delete_self) { delete this; } return EVENT_DONE; diff --git a/src/iocore/aio/CMakeLists.txt b/src/iocore/aio/CMakeLists.txt index 3d06b67ae17..8040a311189 100644 --- a/src/iocore/aio/CMakeLists.txt +++ b/src/iocore/aio/CMakeLists.txt @@ -33,6 +33,10 @@ if(BUILD_TESTING) COMMAND $ WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/src/iocore/aio ) + + add_executable(test_AIOCallback unit_tests/test_AIOCallback.cc) + target_link_libraries(test_AIOCallback PRIVATE ts::aio Catch2::Catch2WithMain) + add_catch2_test(NAME test_AIOCallback COMMAND test_AIOCallback) endif() clang_tidy_check(aio) diff --git a/src/iocore/aio/unit_tests/test_AIOCallback.cc b/src/iocore/aio/unit_tests/test_AIOCallback.cc new file mode 100644 index 00000000000..0fcd1313094 --- /dev/null +++ b/src/iocore/aio/unit_tests/test_AIOCallback.cc @@ -0,0 +1,67 @@ +/** @file + + Catch based unit tests for AIOCallback. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + */ + +#include + +#include "iocore/aio/AIO.h" +#include "iocore/eventsystem/Event.h" + +namespace +{ + +struct AIOCompletionOwner : Continuation { + AIOCallback callback; + bool *completed = nullptr; + + explicit AIOCompletionOwner(bool &completion_flag) : Continuation(nullptr), completed(&completion_flag) + { + SET_HANDLER(&AIOCompletionOwner::handle_aio_complete); + + callback.action = this; + callback.aiocb.aio_nbytes = 0; + callback.aio_result = 0; + } + + int + handle_aio_complete(int event, void *data) + { + CHECK(event == AIO_EVENT_DONE); + CHECK(data == &callback); + + *completed = true; + delete this; + return EVENT_DONE; + } +}; + +} // namespace + +TEST_CASE("AIOCallback completion tolerates owner deletion", "[iocore][aio]") +{ + bool completed = false; + auto owner = new AIOCompletionOwner(completed); + auto callback = &owner->callback; + + CHECK(callback->io_complete(EVENT_NONE, nullptr) == EVENT_DONE); + CHECK(completed); +}