Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 47 additions & 10 deletions .github/workflows/android-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,53 @@ jobs:
-noaudio -no-boot-anim
disable-animations: false
script: |
# Wait for system services after snapshot restore —
# sys.boot_completed can flip before the `input` service publishes,
# which makes emulator-runner's own `input keyevent 82` crash the
# emulator. We run this loop ourselves and skip the keyevent
# (disable-animations: false).
adb wait-for-device shell 'while [[ -z $(getprop sys.boot_completed) ]]; do sleep 1; done'
# Disable animations via settings (more reliable than input keyevent)
adb shell settings put global window_animation_scale 0.0
adb shell settings put global transition_animation_scale 0.0
adb shell settings put global animator_duration_scale 0.0
# Post-snapshot service-readiness wait.
#
# `sys.boot_completed` flips as soon as zygote is alive, which on
# a snapshot restore happens before `system_server` finishes
# binding the `settings`, `package`, and `activity` services.
# The previous wait-for-device + `getprop` loop returned almost
# instantly because the property was already set, and the very
# next `adb shell settings put` then crashed with
# `cmd: Failure calling service settings: Broken pipe (32)` and
# exit-code 224 — the failure mode this is fixing.
#
# We poll each service we're about to call until it actually
# responds, with a hard 120s ceiling. Single retry on each
# `settings put` afterwards covers any final transient hiccup.
wait_for_emulator() {
local deadline=$((SECONDS + 120))
while [[ $SECONDS -lt $deadline ]]; do
# adb 'tr -d \r' strips the CRLF Windows-style line endings
# the device sends back, otherwise the [[ == "1" ]] check
# fails on a stray \r.
bc=$(adb shell getprop sys.boot_completed 2>/dev/null | tr -d '\r')
if [[ "$bc" == "1" ]] \
&& adb shell 'pm path android' >/dev/null 2>&1 \
&& adb shell 'cmd settings list global' >/dev/null 2>&1; then
return 0
fi
sleep 1
done
echo "::error::Emulator services did not become ready within 120s" >&2
adb shell getprop | grep -E 'boot|svc' >&2 || true
return 1
}
wait_for_emulator
# Disable animations via settings (more reliable than input
# keyevent — emulator-runner's keyevent path crashes on some
# snapshots, which is why disable-animations is left at
# `false` and we drive the settings here directly).
#
# One retry on each `settings put` because the service can
# still hiccup briefly even after the readiness probe passes
# (the probe verifies it answers a `list`, but `put` opens a
# write transaction which can race with first-time
# initialisation).
for s in window_animation_scale transition_animation_scale animator_duration_scale; do
adb shell settings put global "$s" 0.0 \
|| { sleep 2; adb shell settings put global "$s" 0.0; }
done
# -i prints per-test STARTED/PASSED/FAILED so a hanging test is
# identifiable
cd apps/example/android && ./gradlew :comapeo-core-react-native:connectedDebugAndroidTest :app:connectedDebugAndroidTest --no-daemon -i -PreactNativeArchitectures=x86_64
Expand Down
70 changes: 70 additions & 0 deletions android/src/androidTest/java/com/comapeo/core/NodeJSIPCTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,76 @@ class NodeJSIPCTest {
Thread.sleep(500)
}

/**
* `close()` is the synchronous teardown path used from
* `ComapeoCoreModule.OnDestroy` (the Expo lifecycle hook that fires on
* React Native JS context reload). The contract — distinct from
* `disconnect()` — is that the underlying socket FD is released on the
* calling thread before `close()` returns, so the peer (the Node.js
* backend over AF_UNIX) observes EOF before the next `OnCreate` opens a
* fresh connection. That ordering is what lets the backend's
* per-connection rpc-reflector cleanup run against the prior session
* rather than the new one.
*
* This test pins that contract: after `close()` returns, the server's
* blocking read on the same connection unblocks immediately with EOF
* (`-1` from `DataInputStream.read()`).
*/
@Test
fun closeReleasesSocketSynchronously() {
val connected = CountDownLatch(1)
val readResult = java.util.concurrent.atomic.AtomicInteger(Int.MIN_VALUE)
val readReturned = CountDownLatch(1)

startMockServer { input, _ ->
connected.countDown()
// Block until the client closes its end. Without
// synchronous close on the client, this read would only
// unblock after the OS GC's the orphaned fd — racy and
// user-visible-slow.
try {
val n = input.read()
readResult.set(n)
} catch (e: IOException) {
// Some kernels surface peer-close as IOException rather
// than EOF; either is a valid EOF signal — treat both
// as "peer closed".
readResult.set(-1)
}
readReturned.countDown()
}

val ipc = NodeJSIPC(socketFile) { msg -> receivedMessages.add(msg) }
assertTrue("Should connect within 10s", connected.await(10, TimeUnit.SECONDS))
// Let the IPC settle into Connected and start its receive loop
// before we close — otherwise we're testing a connect-cancel
// race rather than steady-state synchronous close.
Thread.sleep(200)

// The synchronous-close contract: the moment close() returns,
// the server's blocking read must already have observed EOF.
// We give the kernel a generous 1s window to deliver FIN — far
// longer than necessary on a local AF_UNIX socket but short
// enough that a regression to the old fire-and-forget close
// (which depends on the launched coroutine eventually running
// on Dispatchers.IO) shows up as a test failure.
ipc.close()

assertTrue(
"Server-side read must unblock with EOF within 1s of close() returning. " +
"Regression: if close() reverts to a launched-coroutine teardown, the " +
"FD release happens after the JS reload has already opened a new " +
"connection and the rpc-reflector cleanup runs against the wrong " +
"connection.",
readReturned.await(1, TimeUnit.SECONDS)
)
assertEquals(
"Server's read must return -1 (EOF) — peer closed cleanly.",
-1,
readResult.get()
)
}

@Test
fun handlesServerDisconnect() {
val connected = CountDownLatch(1)
Expand Down
24 changes: 22 additions & 2 deletions android/src/main/java/com/comapeo/core/ComapeoCoreModule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,17 @@ class ComapeoCoreModule : Module() {
}

override fun definition() = ModuleDefinition {
// OnCreate / OnDestroy are bound to the Expo `AppContext`, whose
// lifetime is the React Native JS runtime — not the Android
// Activity. They fire on every JS context tear-down/rebuild
// (dev reload, `DevSettings.reload()`, fast-refresh full
// reload), which is exactly the reconnect boundary we want for
// the RPC sockets: the previous JS session's rpc-reflector
// client is gone, but its event-listener subscriptions are
// still attached on the backend's MapeoManager. Closing the
// socket here forces the backend to observe the disconnect and
// tear those subscriptions down before the next JS session
// opens a fresh connection from a new OnCreate.
OnCreate {
val socketFile =
File(appContext.persistentFilesDirectory, ComapeoCoreService.COMAPEO_SOCKET_FILENAME)
Expand Down Expand Up @@ -190,8 +201,17 @@ class ComapeoCoreModule : Module() {
}

OnDestroy {
ipc.disconnect()
controlIpc.disconnect()
// Synchronous close so the AF_UNIX socket FD is released on
// this thread before OnDestroy returns. The fire-and-forget
// disconnect() would let the next OnCreate open a new
// connection while the old socket is still alive in a
// launched coroutine; a synchronous close means the backend
// sees EOF on the previous session before it accepts the
// new connection, so rpc-reflector's per-connection cleanup
// path (server.close → removeListener for every prior
// subscription) runs against the right connection.
ipc.close()
controlIpc.close()
}

OnActivityEntersForeground {
Expand Down
35 changes: 35 additions & 0 deletions android/src/main/java/com/comapeo/core/NodeJSIPC.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.Job
import kotlinx.coroutines.SupervisorJob
import kotlinx.coroutines.cancel
import kotlinx.coroutines.cancelAndJoin
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.delay
Expand Down Expand Up @@ -231,6 +232,40 @@ class NodeJSIPC(
connect()
sendChannel.trySend(message)
}

/**
* Synchronous, terminal teardown for module-destroy lifecycle.
*
* Unlike [disconnect] — which launches a coroutine and returns
* immediately — [close] closes the underlying [LocalSocket] on the
* calling thread before returning, so the peer (the Node.js backend
* over the AF_UNIX socket) observes EOF before [close] returns.
* That ordering is the point: it lets a caller running in a tight
* lifecycle window (e.g. the Expo module's `OnDestroy`, immediately
* before a fresh `OnCreate` opens a new connection) guarantee the
* old socket is gone before the new one is opened, so the backend's
* per-connection state (notably any rpc-reflector event-listener
* subscriptions registered against the long-lived handler) is torn
* down on the same reload that starts a fresh client.
*
* After [close] the instance must not be reused: the scope is
* cancelled and a fresh [NodeJSIPC] should be constructed.
*/
fun close() {
// Cancel the scope first so the receive/send coroutines stop
// touching the streams as we close them. socket.close() will
// also unblock any in-flight read/write with an IOException;
// the cancelled scope means the coroutines exit without
// re-entering disconnect().
scope.cancel()
sendChannel.close()
try { dataOutputStream?.close() } catch (_: Exception) {}
try { dataInputStream?.close() } catch (_: Exception) {}
if (::socket.isInitialized) {
try { socket.close() } catch (_: Exception) {}
}
state.value = State.Disconnected
}
}

private suspend fun connectWithRetry(
Expand Down
11 changes: 11 additions & 0 deletions backend/lib/message-port.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,16 @@ export class SocketMessagePort extends TypedEmitter {
this.#state = "closed";
this.#queue.length = 0;
this.#framedStream.destroy();
// Emit after `#state = "closed"` and `#framedStream.destroy()` so a
// listener that synchronously inspects state or writes a final
// postMessage observes the "closed" state and the destroyed
// stream. Consumers (notably `ComapeoRpcServer`, which wires
// `messagePort.on("close", () => server.close())` to remove every
// RPC event-listener subscription registered against
// `MapeoManager` for this connection) rely on this event firing
// exactly once when the underlying socket goes away — without it
// the rpc-reflector cleanup never runs, and listeners accumulate
// across client reconnects (e.g. on every React Native JS reload).
this.emit("close");
}
}
1 change: 1 addition & 0 deletions backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"type": "module",
"scripts": {
"build": "rollup -c",
"test": "node --test \"test/*.test.js\"",
"types": "tsc"
},
"keywords": [],
Expand Down
Loading
Loading