Skip to content

Commit 15bb2f1

Browse files
bghgaryCopilot
andcommitted
Revert WorkQueue destructor to old broken code + updated test
Reverts the deadlock fix to demonstrate the test detects it. Test updated with cleanup from BabylonJS#151. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 598f004 commit 15bb2f1

2 files changed

Lines changed: 70 additions & 71 deletions

File tree

Core/AppRuntime/Source/WorkQueue.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,8 @@ namespace Babylon
1414
Resume();
1515
}
1616

17-
// Cancel immediately so pending work is dropped promptly, then append
18-
// a no-op work item to wake the worker thread from blocking_tick. The
19-
// no-op goes through push() which acquires the queue mutex, avoiding
20-
// the race where a bare notify_all() can be missed by wait().
21-
//
22-
// NOTE: This preserves the existing shutdown behavior where pending
23-
// callbacks are dropped on cancellation. A more complete solution
24-
// would add cooperative shutdown (e.g. NotifyDisposing/Rundown) so
25-
// consumers can finish cleanup work before the runtime is destroyed.
2617
m_cancelSource.cancel();
27-
Append([](Napi::Env) {});
18+
m_dispatcher.cancelled();
2819

2920
m_thread.join();
3021
}

Tests/UnitTests/Shared/Shared.cpp

Lines changed: 69 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -131,82 +131,90 @@ TEST(Console, Log)
131131

132132
TEST(AppRuntime, DestroyDoesNotDeadlock)
133133
{
134-
// Deterministic test for the race condition in the AppRuntime destructor.
134+
// Regression test verifying AppRuntime destruction doesn't deadlock.
135+
// Uses a global arcana hook to sleep while holding the queue mutex
136+
// before wait(), ensuring the worker is in the vulnerable window
137+
// when the destructor fires. See #147 for details on the bug and fix.
135138
//
136-
// A global hook sleeps WHILE HOLDING the queue mutex, right before
137-
// condition_variable::wait(). We synchronize so the worker is definitely
138-
// in the hook before triggering destruction.
139+
// The entire test runs on a separate thread so the gtest thread can
140+
// detect a deadlock via timeout without hanging the process.
139141
//
140-
// Old (broken) code: cancel() + notify_all() fire without the mutex,
141-
// so the notification is lost while the worker sleeps → deadlock.
142-
// Fixed code: cancel() + Append(no-op), where push() NEEDS the mutex,
143-
// so it blocks until the worker enters wait() → notification delivered.
144-
145-
// Shared state for hook synchronization
146-
std::atomic<bool> hookEnabled{false};
147-
std::atomic<bool> hookSignaled{false};
142+
// Test flow:
143+
//
144+
// Test Thread Worker Thread
145+
// ----------- -------------
146+
// 1. Create AppRuntime Worker starts, enters blocking_tick
147+
// Wait for init to complete
148+
// 2. Install hook
149+
// Dispatch(no-op) Worker wakes, runs no-op,
150+
// returns to blocking_tick
151+
// Hook fires:
152+
// signal workerInHook
153+
// sleep 200ms (holding mutex!)
154+
// 3. workerInHook.wait()
155+
// Worker is sleeping in hook
156+
// 4. ~AppRuntime():
157+
// cancel()
158+
// Append(no-op):
159+
// push() blocks ------> (worker holds mutex)
160+
// 200ms sleep ends
161+
// wait(lock) releases mutex
162+
// push() acquires mutex
163+
// pushes, notifies ---> wakes up!
164+
// join() waits drains no-op, cancelled -> exit
165+
// join() returns <----- thread exits
166+
// 5. destroy completes -> PASS
167+
168+
bool hookSignaled{false};
148169
std::promise<void> workerInHook;
149170

150-
// Set the callback. It checks hookEnabled so we control
151-
// when it actually sleeps.
152-
arcana::set_before_wait_callback([&]() {
153-
if (hookEnabled.load() && !hookSignaled.exchange(true))
154-
{
155-
workerInHook.set_value();
156-
}
157-
if (hookEnabled.load())
158-
{
171+
// Run the full lifecycle on a separate thread so the gtest thread
172+
// can detect a deadlock via timeout.
173+
std::promise<void> testDone;
174+
std::thread testThread([&]() {
175+
auto runtime = std::make_unique<Babylon::AppRuntime>();
176+
177+
// Wait for the runtime to fully initialize. The constructor dispatches
178+
// CreateForJavaScript which must complete before we install the hook
179+
// so the worker is idle and ready to enter the hook on the next wait.
180+
std::promise<void> ready;
181+
runtime->Dispatch([&ready](Napi::Env) {
182+
ready.set_value();
183+
});
184+
ready.get_future().wait();
185+
186+
// Install the hook and dispatch a no-op to wake the worker,
187+
// ensuring it cycles through the hook on its way back to idle.
188+
arcana::set_before_wait_callback([&]() {
189+
if (!hookSignaled)
190+
{
191+
hookSignaled = true;
192+
workerInHook.set_value();
193+
}
159194
std::this_thread::sleep_for(std::chrono::milliseconds(200));
160-
}
161-
});
195+
});
196+
runtime->Dispatch([](Napi::Env) {});
162197

163-
auto runtime = std::make_unique<Babylon::AppRuntime>();
198+
// Wait for the worker to be in the hook (holding mutex, sleeping)
199+
workerInHook.get_future().wait();
164200

165-
// Dispatch work and wait for completion
166-
std::promise<void> ready;
167-
runtime->Dispatch([&ready](Napi::Env) {
168-
ready.set_value();
201+
// Destroy — if the fix works, the destructor completes.
202+
// If broken, it deadlocks and the timeout detects it.
203+
runtime.reset();
204+
testDone.set_value();
169205
});
170-
ready.get_future().wait();
171206

172-
// Enable the hook and dispatch a no-op to wake the worker,
173-
// ensuring it cycles through the hook on its way back to idle
174-
hookEnabled.store(true);
175-
runtime->Dispatch([](Napi::Env) {});
207+
auto status = testDone.get_future().wait_for(std::chrono::seconds(5));
176208

177-
// Wait for the worker to be in the hook (holding mutex, sleeping)
178-
auto hookStatus = workerInHook.get_future().wait_for(std::chrono::seconds(5));
179-
if (hookStatus == std::future_status::timeout)
180-
{
181-
// Hook didn't fire — no deadlock risk, clean up normally
182-
arcana::set_before_wait_callback([]() {});
183-
FAIL() << "Worker thread did not enter before-wait hook";
184-
}
185-
186-
// Worker is in the hook (holding mutex, sleeping). Destroy on a
187-
// detachable thread so the test doesn't hang if the destructor deadlocks.
188-
auto runtimePtr = std::make_shared<std::unique_ptr<Babylon::AppRuntime>>(std::move(runtime));
189-
std::promise<void> destroyDone;
190-
auto destroyFuture = destroyDone.get_future();
191-
std::thread destroyThread([runtimePtr, &destroyDone]() {
192-
runtimePtr->reset();
193-
destroyDone.set_value();
194-
});
209+
arcana::set_before_wait_callback([]() {});
195210

196-
auto status = destroyFuture.wait_for(std::chrono::seconds(5));
197211
if (status == std::future_status::timeout)
198212
{
199-
destroyThread.detach();
200-
}
201-
else
202-
{
203-
destroyThread.join();
213+
testThread.detach();
214+
FAIL() << "Deadlock detected: AppRuntime destructor did not complete within 5 seconds";
204215
}
205216

206-
arcana::set_before_wait_callback([]() {});
207-
208-
ASSERT_NE(status, std::future_status::timeout)
209-
<< "Deadlock detected: AppRuntime destructor did not complete within 5 seconds";
217+
testThread.join();
210218
}
211219

212220
int RunTests()

0 commit comments

Comments
 (0)