From 46bc3e7391c7beee22a89dedd87c5aa6dd75626d Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Thu, 9 Apr 2026 10:33:44 -0700 Subject: [PATCH 1/3] Clean up deadlock regression test - Run entire test on a separate thread so gtest can detect deadlock via timeout without hanging the process - Install hook after initialization instead of using enable flag - Use plain bool for hookSignaled (single-thread access) - Move unique_ptr into test thread (single owner) - Update comments and flow diagram to match code Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Tests/UnitTests/Shared/Shared.cpp | 130 ++++++++++++++++-------------- 1 file changed, 69 insertions(+), 61 deletions(-) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index e09da4fd..f3b9ddda 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -131,82 +131,90 @@ TEST(Console, Log) TEST(AppRuntime, DestroyDoesNotDeadlock) { - // Deterministic test for the race condition in the AppRuntime destructor. + // Regression test verifying AppRuntime destruction doesn't deadlock. + // Uses a global arcana hook to sleep while holding the queue mutex + // before wait(), ensuring the worker is in the vulnerable window + // when the destructor fires. See #147 for details on the bug and fix. // - // A global hook sleeps WHILE HOLDING the queue mutex, right before - // condition_variable::wait(). We synchronize so the worker is definitely - // in the hook before triggering destruction. + // The entire test runs on a separate thread so the gtest thread can + // detect a deadlock via timeout without hanging the process. // - // Old (broken) code: cancel() + notify_all() fire without the mutex, - // so the notification is lost while the worker sleeps → deadlock. - // Fixed code: cancel() + Append(no-op), where push() NEEDS the mutex, - // so it blocks until the worker enters wait() → notification delivered. - - // Shared state for hook synchronization - std::atomic hookEnabled{false}; - std::atomic hookSignaled{false}; + // Test flow: + // + // Test Thread Worker Thread + // ----------- ------------- + // 1. Create AppRuntime Worker starts, enters blocking_tick + // Wait for init to complete + // 2. Install hook + // Dispatch(no-op) Worker wakes, runs no-op, + // returns to blocking_tick + // Hook fires: + // signal workerInHook + // sleep 200ms (holding mutex!) + // 3. workerInHook.wait() + // Worker is sleeping in hook + // 4. ~AppRuntime(): + // cancel() + // Append(no-op): + // push() blocks ------> (worker holds mutex) + // 200ms sleep ends + // wait(lock) releases mutex + // push() acquires mutex + // pushes, notifies ---> wakes up! + // join() waits drains no-op, cancelled -> exit + // join() returns <----- thread exits + // 5. destroy completes -> PASS + + bool hookSignaled{false}; std::promise workerInHook; - // Set the callback. It checks hookEnabled so we control - // when it actually sleeps. - arcana::set_before_wait_callback([&]() { - if (hookEnabled.load() && !hookSignaled.exchange(true)) - { - workerInHook.set_value(); - } - if (hookEnabled.load()) - { + // Run the full lifecycle on a separate thread so the gtest thread + // can detect a deadlock via timeout. + std::promise testDone; + std::thread testThread([&]() { + auto runtime = std::make_unique(); + + // Wait for the runtime to fully initialize. The constructor dispatches + // CreateForJavaScript which must complete before we install the hook + // so the worker is idle and ready to enter the hook on the next wait. + std::promise ready; + runtime->Dispatch([&ready](Napi::Env) { + ready.set_value(); + }); + ready.get_future().wait(); + + // Install the hook and dispatch a no-op to wake the worker, + // ensuring it cycles through the hook on its way back to idle. + arcana::set_before_wait_callback([&]() { + if (!hookSignaled) + { + hookSignaled = true; + workerInHook.set_value(); + } std::this_thread::sleep_for(std::chrono::milliseconds(200)); - } - }); + }); + runtime->Dispatch([](Napi::Env) {}); - auto runtime = std::make_unique(); + // Wait for the worker to be in the hook (holding mutex, sleeping) + workerInHook.get_future().wait(); - // Dispatch work and wait for completion - std::promise ready; - runtime->Dispatch([&ready](Napi::Env) { - ready.set_value(); + // Destroy — if the fix works, the destructor completes. + // If broken, it deadlocks and the timeout detects it. + runtime.reset(); + testDone.set_value(); }); - ready.get_future().wait(); - // Enable the hook and dispatch a no-op to wake the worker, - // ensuring it cycles through the hook on its way back to idle - hookEnabled.store(true); - runtime->Dispatch([](Napi::Env) {}); + auto status = testDone.get_future().wait_for(std::chrono::seconds(5)); - // Wait for the worker to be in the hook (holding mutex, sleeping) - auto hookStatus = workerInHook.get_future().wait_for(std::chrono::seconds(5)); - if (hookStatus == std::future_status::timeout) - { - // Hook didn't fire — no deadlock risk, clean up normally - arcana::set_before_wait_callback([]() {}); - FAIL() << "Worker thread did not enter before-wait hook"; - } - - // Worker is in the hook (holding mutex, sleeping). Destroy on a - // detachable thread so the test doesn't hang if the destructor deadlocks. - auto runtimePtr = std::make_shared>(std::move(runtime)); - std::promise destroyDone; - auto destroyFuture = destroyDone.get_future(); - std::thread destroyThread([runtimePtr, &destroyDone]() { - runtimePtr->reset(); - destroyDone.set_value(); - }); + arcana::set_before_wait_callback([]() {}); - auto status = destroyFuture.wait_for(std::chrono::seconds(5)); if (status == std::future_status::timeout) { - destroyThread.detach(); - } - else - { - destroyThread.join(); + testThread.detach(); + FAIL() << "Deadlock detected: AppRuntime destructor did not complete within 5 seconds"; } - arcana::set_before_wait_callback([]() {}); - - ASSERT_NE(status, std::future_status::timeout) - << "Deadlock detected: AppRuntime destructor did not complete within 5 seconds"; + testThread.join(); } int RunTests() From f0ab95d215cc72fb8d753be11cff623a21c23c02 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Thu, 9 Apr 2026 13:36:10 -0700 Subject: [PATCH 2/3] Add comment explaining non-deterministic sleep window Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Tests/UnitTests/Shared/Shared.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index f3b9ddda..e0849e91 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -191,6 +191,11 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) hookSignaled = true; workerInHook.set_value(); } + // This sleep is not truly deterministic — it creates a timing window + // during which the destructor's push() will contend for the mutex. + // The sleep holds the mutex, so push() blocks until it ends and the + // worker enters wait(). This is sufficient for testing but relies on + // the destructor firing within this window. std::this_thread::sleep_for(std::chrono::milliseconds(200)); }); runtime->Dispatch([](Napi::Env) {}); From 2e2ec555ae9dffb5cddb8f496c8ffdb8fbc94536 Mon Sep 17 00:00:00 2001 From: Gary Hsu Date: Thu, 9 Apr 2026 17:12:29 -0700 Subject: [PATCH 3/3] Update arcana.cpp to use renamed test hooks Bump arcana.cpp GIT_TAG to b9bf9d85fce37d5fc9dbfc4a4dc5e1531bee215a which renames ARCANA_TESTING_HOOKS to ARCANA_TEST_HOOKS and moves hooks to arcana::test_hooks::blocking_concurrent_queue namespace. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- CMakeLists.txt | 4 ++-- Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt | 2 +- Tests/UnitTests/Shared/Shared.cpp | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 116efa6b..b74afd1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ FetchContent_Declare(AndroidExtensions EXCLUDE_FROM_ALL) FetchContent_Declare(arcana.cpp GIT_REPOSITORY https://github.com/microsoft/arcana.cpp.git - GIT_TAG d5dd03cc6dd138fc17c277f61abbe2bc388444af + GIT_TAG b9bf9d85fce37d5fc9dbfc4a4dc5e1531bee215a EXCLUDE_FROM_ALL) FetchContent_Declare(asio GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git @@ -114,7 +114,7 @@ endif() # -------------------------------------------------- if(JSRUNTIMEHOST_TESTS) - add_compile_definitions(ARCANA_TESTING_HOOKS) + add_compile_definitions(ARCANA_TEST_HOOKS) endif() FetchContent_MakeAvailable_With_Message(arcana.cpp) diff --git a/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt b/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt index ac5032ed..2e310406 100644 --- a/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt +++ b/Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt @@ -22,7 +22,7 @@ add_library(UnitTestsJNI SHARED ${UNIT_TESTS_DIR}/Shared/Shared.cpp) target_compile_definitions(UnitTestsJNI PRIVATE JSRUNTIMEHOST_PLATFORM="${JSRUNTIMEHOST_PLATFORM}") -target_compile_definitions(UnitTestsJNI PRIVATE ARCANA_TESTING_HOOKS) +target_compile_definitions(UnitTestsJNI PRIVATE ARCANA_TEST_HOOKS) target_include_directories(UnitTestsJNI PRIVATE ${UNIT_TESTS_DIR}) diff --git a/Tests/UnitTests/Shared/Shared.cpp b/Tests/UnitTests/Shared/Shared.cpp index e0849e91..8973575c 100644 --- a/Tests/UnitTests/Shared/Shared.cpp +++ b/Tests/UnitTests/Shared/Shared.cpp @@ -185,7 +185,7 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) // Install the hook and dispatch a no-op to wake the worker, // ensuring it cycles through the hook on its way back to idle. - arcana::set_before_wait_callback([&]() { + arcana::test_hooks::blocking_concurrent_queue::set_before_wait_callback([&]() { if (!hookSignaled) { hookSignaled = true; @@ -211,7 +211,7 @@ TEST(AppRuntime, DestroyDoesNotDeadlock) auto status = testDone.get_future().wait_for(std::chrono::seconds(5)); - arcana::set_before_wait_callback([]() {}); + arcana::test_hooks::blocking_concurrent_queue::set_before_wait_callback([]() {}); if (status == std::future_status::timeout) {