Skip to content

Commit d90e7b8

Browse files
bghgaryCopilot
andcommitted
Fix WorkQueue destructor deadlock by merging into AppRuntime
WorkQueue::~WorkQueue() had a race condition where cancel() + notify_all() fired without the queue mutex, so the signal could be lost if the worker thread hadn't entered condition_variable::wait() yet, causing join() to hang forever. This change merges WorkQueue into AppRuntime, eliminating split-lifetime issues, and dispatches cancellation as a work item via Append(). Since push() acquires the queue mutex, it blocks until the worker enters wait(), guaranteeing the notification is delivered. Changes: - Merge WorkQueue members into AppRuntime (thread, dispatcher, cancel source, env, suspension lock) - Remove WorkQueue.h and WorkQueue.cpp - Update AppRuntime_JSI.cpp TaskRunnerAdapter to use AppRuntime::Dispatch - Add deterministic regression test using arcana testing hooks - Fix member declaration order so m_options outlives worker thread The regression test uses arcana::set_before_wait_callback() to sleep while holding the queue mutex before wait(), deterministically triggering the race. See BabylonJS#146 for the test running against the old broken code. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 08b5ecc commit d90e7b8

File tree

11 files changed

+143
-132
lines changed

11 files changed

+143
-132
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ FetchContent_Declare(AndroidExtensions
1414
GIT_TAG 2d5af72259cc73e5f249d3c99bee2010be9cb042
1515
EXCLUDE_FROM_ALL)
1616
FetchContent_Declare(arcana.cpp
17-
GIT_REPOSITORY https://github.com/microsoft/arcana.cpp.git
18-
GIT_TAG 7c6be8aaa29effbc8ee1a2217388fb6e399150d2
17+
GIT_REPOSITORY https://github.com/bghgary/arcana.cpp.git
18+
GIT_TAG 8328898f159c1778cb16fa41c7ad57994961e5e8
1919
EXCLUDE_FROM_ALL)
2020
FetchContent_Declare(asio
2121
GIT_REPOSITORY https://github.com/chriskohlhoff/asio.git

Core/AppRuntime/CMakeLists.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ set(SOURCES
99
"Include/Babylon/AppRuntime.h"
1010
"Source/AppRuntime.cpp"
1111
"Source/AppRuntime_${NAPI_JAVASCRIPT_ENGINE}.cpp"
12-
"Source/AppRuntime_${JSRUNTIMEHOST_PLATFORM}.${IMPL_EXT}"
13-
"Source/WorkQueue.cpp"
14-
"Source/WorkQueue.h")
12+
"Source/AppRuntime_${JSRUNTIMEHOST_PLATFORM}.${IMPL_EXT}")
1513

1614
add_library(AppRuntime ${SOURCES})
1715
warnings_as_errors(AppRuntime)

Core/AppRuntime/Include/Babylon/AppRuntime.h

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,17 @@
66

77
#include <napi/utilities.h>
88

9+
#include <arcana/threading/dispatcher.h>
10+
911
#include <memory>
1012
#include <functional>
1113
#include <exception>
14+
#include <optional>
15+
#include <mutex>
16+
#include <thread>
1217

1318
namespace Babylon
1419
{
15-
class WorkQueue;
16-
1720
class AppRuntime final
1821
{
1922
public:
@@ -43,6 +46,23 @@ namespace Babylon
4346
static void BABYLON_API DefaultUnhandledExceptionHandler(const Napi::Error& error);
4447

4548
private:
49+
template<typename CallableT>
50+
void Append(CallableT callable)
51+
{
52+
if constexpr (std::is_copy_constructible<CallableT>::value)
53+
{
54+
m_dispatcher.queue([this, callable = std::move(callable)]() {
55+
callable(m_env.value());
56+
});
57+
}
58+
else
59+
{
60+
m_dispatcher.queue([this, callablePtr = std::make_shared<CallableT>(std::move(callable))]() {
61+
(*callablePtr)(m_env.value());
62+
});
63+
}
64+
}
65+
4666
// These three methods are the mechanism by which platform- and JavaScript-specific
4767
// code can be "injected" into the execution of the JavaScript thread. These three
4868
// functions are implemented in separate files, thus allowing implementations to be
@@ -61,7 +81,11 @@ namespace Babylon
6181
// extra logic around the invocation of a dispatched callback.
6282
void Execute(Dispatchable<void()> callback);
6383

64-
std::unique_ptr<WorkQueue> m_workQueue;
6584
Options m_options;
85+
std::optional<Napi::Env> m_env{};
86+
std::optional<std::scoped_lock<std::mutex>> m_suspensionLock{};
87+
arcana::cancellation_source m_cancelSource{};
88+
arcana::manual_dispatcher<128> m_dispatcher{};
89+
std::thread m_thread;
6690
};
6791
}

Core/AppRuntime/Source/AppRuntime.cpp

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include "AppRuntime.h"
2-
#include "WorkQueue.h"
32
#include <cassert>
43

54
namespace Babylon
@@ -10,8 +9,8 @@ namespace Babylon
109
}
1110

1211
AppRuntime::AppRuntime(Options options)
13-
: m_workQueue{std::make_unique<WorkQueue>([this] { RunPlatformTier(); })}
14-
, m_options{std::move(options)}
12+
: m_options{std::move(options)}
13+
, m_thread{[this] { RunPlatformTier(); }}
1514
{
1615
Dispatch([this](Napi::Env env) {
1716
JsRuntime::CreateForJavaScript(env, [this](auto func) { Dispatch(std::move(func)); });
@@ -20,26 +19,53 @@ namespace Babylon
2019

2120
AppRuntime::~AppRuntime()
2221
{
22+
if (m_suspensionLock.has_value())
23+
{
24+
m_suspensionLock.reset();
25+
}
26+
27+
// Dispatch cancellation as a work item so the worker thread processes it
28+
// naturally via blocking_tick, avoiding the race condition where an external
29+
// cancel signal can be missed by the condition variable wait.
30+
Append([this](Napi::Env) {
31+
m_cancelSource.cancel();
32+
});
33+
34+
m_thread.join();
2335
}
2436

2537
void AppRuntime::Run(Napi::Env env)
2638
{
27-
m_workQueue->Run(env);
39+
m_env = std::make_optional(env);
40+
41+
m_dispatcher.set_affinity(std::this_thread::get_id());
42+
43+
while (!m_cancelSource.cancelled())
44+
{
45+
m_dispatcher.blocking_tick(m_cancelSource);
46+
}
47+
48+
// The dispatcher can be non-empty if something is dispatched after cancellation.
49+
m_dispatcher.clear();
2850
}
2951

3052
void AppRuntime::Suspend()
3153
{
32-
m_workQueue->Suspend();
54+
auto suspensionMutex = std::make_shared<std::mutex>();
55+
m_suspensionLock.emplace(*suspensionMutex);
56+
Append([suspensionMutex{std::move(suspensionMutex)}](Napi::Env) {
57+
std::scoped_lock lock{*suspensionMutex};
58+
});
3359
}
3460

3561
void AppRuntime::Resume()
3662
{
37-
m_workQueue->Resume();
63+
m_suspensionLock.reset();
3864
}
3965

4066
void AppRuntime::Dispatch(Dispatchable<void(Napi::Env)> func)
4167
{
42-
m_workQueue->Append([this, func{std::move(func)}](Napi::Env env) mutable {
68+
Append([this, func{std::move(func)}](Napi::Env env) mutable {
4369
Execute([this, env, func{std::move(func)}]() mutable {
4470
try
4571
{

Core/AppRuntime/Source/AppRuntime_JSI.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#include "AppRuntime.h"
2-
#include "WorkQueue.h"
32

43
#include <napi/env.h>
54
#include <V8JsiRuntime.h>
@@ -10,24 +9,23 @@ namespace
109
class TaskRunnerAdapter : public v8runtime::JSITaskRunner
1110
{
1211
public:
13-
TaskRunnerAdapter(Babylon::WorkQueue& workQueue)
14-
: m_workQueue(workQueue)
12+
TaskRunnerAdapter(Babylon::AppRuntime& appRuntime)
13+
: m_appRuntime(appRuntime)
1514
{
1615
}
1716

1817
void postTask(std::unique_ptr<v8runtime::JSITask> task) override
1918
{
20-
std::shared_ptr<v8runtime::JSITask> shared_task(task.release());
21-
m_workQueue.Append([shared_task2 = std::move(shared_task)](Napi::Env) {
22-
shared_task2->run();
19+
m_appRuntime.Dispatch([task = std::shared_ptr<v8runtime::JSITask>(std::move(task))](Napi::Env) {
20+
task->run();
2321
});
2422
}
2523

2624
private:
2725
TaskRunnerAdapter(const TaskRunnerAdapter&) = delete;
2826
TaskRunnerAdapter& operator=(const TaskRunnerAdapter&) = delete;
2927

30-
Babylon::WorkQueue& m_workQueue;
28+
Babylon::AppRuntime& m_appRuntime;
3129
};
3230
}
3331

@@ -37,7 +35,7 @@ namespace Babylon
3735
{
3836
v8runtime::V8RuntimeArgs args{};
3937
args.inspectorPort = 5643;
40-
args.foreground_task_runner = std::make_shared<TaskRunnerAdapter>(*m_workQueue);
38+
args.foreground_task_runner = std::make_shared<TaskRunnerAdapter>(*this);
4139
const auto runtime = v8runtime::makeV8Runtime(std::move(args));
4240

4341
const auto env = Napi::Attach(*runtime);

Core/AppRuntime/Source/WorkQueue.cpp

Lines changed: 0 additions & 49 deletions
This file was deleted.

Core/AppRuntime/Source/WorkQueue.h

Lines changed: 0 additions & 58 deletions
This file was deleted.

Tests/UnitTests/Android/app/src/main/cpp/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ add_library(UnitTestsJNI SHARED
2121
${UNIT_TESTS_DIR}/Shared/Shared.cpp)
2222

2323
target_compile_definitions(UnitTestsJNI PRIVATE JSRUNTIMEHOST_PLATFORM="${JSRUNTIMEHOST_PLATFORM}")
24+
target_compile_definitions(UnitTestsJNI PRIVATE ARCANA_TESTING_HOOKS)
2425

2526
target_include_directories(UnitTestsJNI
2627
PRIVATE ${UNIT_TESTS_DIR})

Tests/UnitTests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ endif()
4545

4646
add_executable(UnitTests ${SOURCES} ${SCRIPTS} ${TYPE_SCRIPTS} ${ASSETS})
4747
target_compile_definitions(UnitTests PRIVATE JSRUNTIMEHOST_PLATFORM="${JSRUNTIMEHOST_PLATFORM}")
48+
target_compile_definitions(UnitTests PRIVATE ARCANA_TESTING_HOOKS)
4849

4950
if(ENABLE_SANITIZERS)
5051
target_compile_definitions(UnitTests PRIVATE JSRUNTIMEHOST_SANITIZERS_ENABLED=1)

0 commit comments

Comments
 (0)