diff --git a/README.md b/README.md index 4a44e3f..c390dce 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,100 @@ is a confusing state to end up in). Run `npx pod-install` after installing the npm package. +# Optional: Sentry integration + +This module can forward its native-side and JS-side lifecycle events +into the host app's `@sentry/react-native`. Sentry is opt-in — if you +don't register the plugin and don't import the sub-export, no Sentry +code path is exercised and no DSN ends up in your APK/IPA. See +[`docs/ARCHITECTURE.md` §7](./docs/ARCHITECTURE.md) for the +architectural overview and +[`docs/sentry-integration-plan.md`](./docs/sentry-integration-plan.md) +for the design plan and per-phase status. + +### 1. Install `@sentry/react-native` in your app + +`@sentry/react-native` is an optional peer dep of this module. Install +it in the host app and run `Sentry.init(...)` once at startup as +documented at . The +runtime classes shipped with `@sentry/react-native` also satisfy the +Android FGS-process bridge — no extra Android dependency to declare. + +### 2. Register the Expo config plugin + +In `app.config.js` (must be `.js`, not `app.json`, to read `process.env`): + +```js +export default { + expo: { + plugins: [ + ["@comapeo/core-react-native", { + sentry: { + dsn: process.env.SENTRY_DSN, + environment: process.env.SENTRY_ENVIRONMENT ?? "production", + // Optional: opt internal/test builds into the §9 capture-application-data + // toggle by default. Production stays off-by-default. + captureApplicationDataDefault: + (process.env.SENTRY_ENVIRONMENT ?? "production") !== "production", + }, + }], + ], + }, +}; +``` + +The plugin runs at `expo prebuild` and bakes the DSN, environment, and other +options into AndroidManifest meta-data and Info.plist keys. Sourcing values +from `process.env` lets EAS build profiles produce different builds without +code changes — see +[`docs/sentry-integration-plan.md` §4.1](./docs/sentry-integration-plan.md) +for the matching `eas.json` example with per-profile env vars. + +### 3. Hand off the host's Sentry SDK + +```ts +import * as Sentry from "@sentry/react-native"; +import { configureSentry } from "@comapeo/core-react-native/sentry"; + +Sentry.init({ /* options — DSN/environment/release auto-loaded from plist/manifest */ }); + +configureSentry({ sentry: Sentry }); +``` + +After this call, the module's lifecycle ERROR transitions and `messageerror` +events are captured to your Sentry project tagged with the relevant phase +(`rootkey`, `starting-timeout`, `node-runtime-unexpected`, etc.). State +transitions show up as breadcrumbs that ride along on the next event. + +### What gets captured automatically + +Once the plugin is registered with a `dsn`, the module captures three +streams without any further setup: + +- **JS-process events** (via the adapter you pass to `configureSentry`): + state-machine ERROR transitions and `messageerror` parse failures + tagged `proc:main`, `layer:rn`. State transitions emit breadcrumbs + on every cycle. +- **FGS-process events** (Android only — `:ComapeoCore` foreground + service): boot transaction (`comapeo.boot`) with phase spans + (`boot.rootkey-load`, `boot.init-frame`), state-transition + breadcrumbs, control-frame breadcrumbs, FGS-lifecycle breadcrumbs, + watchdog-timeout events (`timeout:startup`, `timeout:fgsStop`), + and rootkey-load `captureException` — all tagged `proc:fgs`, + `layer:native` so the dashboard can split FGS-originated events + from main-process events. +- **Backend-process events** (Phase 3, not yet shipped) — Node-side + RPC method spans and exceptions tagged `proc:backend`. + +The FGS-process Sentry SDK is initialised automatically in +`ComapeoCoreService.onCreate` from the manifest meta-data your +config plugin wrote. There's no extra configuration required for +multi-process Android apps using this module — that's the +`SentryFgsBridge` doing the work behind the scenes. If +`@sentry/react-native` isn't installed (so `io.sentry.*` isn't on +the runtime classpath), the bridge stays inert and the module +continues to function unchanged. + # Contributing Contributions are very welcome! Please refer to guidelines described in the [contributing guide](https://github.com/expo/expo#contributing). diff --git a/android/build.gradle b/android/build.gradle index 5a3a115..0efeab6 100644 --- a/android/build.gradle +++ b/android/build.gradle @@ -171,6 +171,23 @@ dependencies { implementation "androidx.core:core-ktx:1.16.0" compileOnly "com.facebook.fbjni:fbjni:0.3.0" + // Sentry Android SDK — `compileOnly` so this module doesn't pull + // sentry-android into consumers who never use Sentry. The runtime + // classes are expected to come transitively from the consumer's + // `@sentry/react-native@^6` dep (see plan §5.1 / package.json's + // optional peer dep). When the consumer hasn't installed Sentry, + // `SentryFgsBridge` short-circuits via `Class.forName` so the + // missing classpath never reaches a verifier crash. + // + // Pin matches the API surface @sentry/react-native@6.x exposes + // (sentry-android 7.20.x). Bumping should be done in lock-step + // with the @sentry/react-native peer-dep range. + compileOnly "io.sentry:sentry-android-core:7.20.1" + // Test classpath needs the runtime classes so JVM unit tests + // can exercise the Impl path. Robolectric isn't on the test + // classpath, so tests that need a real Context use a fake. + testImplementation "io.sentry:sentry-android-core:7.20.1" + // JVM unit test dependencies testImplementation 'junit:junit:4.13.2' testImplementation 'org.jetbrains.kotlinx:kotlinx-coroutines-test:1.9.0' diff --git a/android/src/main/java/com/comapeo/core/ComapeoCoreService.kt b/android/src/main/java/com/comapeo/core/ComapeoCoreService.kt index d93a759..4543335 100644 --- a/android/src/main/java/com/comapeo/core/ComapeoCoreService.kt +++ b/android/src/main/java/com/comapeo/core/ComapeoCoreService.kt @@ -45,6 +45,25 @@ class ComapeoCoreService : Service() { override fun onCreate() { super.onCreate() activeInstanceCount++ + + // Phase 2b: initialise the FGS-process Sentry SDK before + // anything that might emit a breadcrumb / capture an + // exception. The host's `@sentry/react-native` runs its + // init in `MainApplication.onCreate`, which only fires in + // the *main* process — the FGS gets a fresh Application + // instance and an empty Sentry hub. Reading the manifest + // returns `null` when the consumer didn't register the + // Expo plugin with a `sentry: { ... }` argument, in which + // case the bridge stays inert. + SentryConfig.loadFromManifest(applicationContext)?.let { cfg -> + SentryFgsBridge.init(applicationContext, cfg) + SentryFgsBridge.addBreadcrumb( + category = "comapeo.fgs", + message = "ComapeoCoreService.onCreate", + level = "info", + ) + } + nodeJSService = NodeJSService(applicationContext) log("The service has been created".uppercase()) } @@ -52,6 +71,18 @@ class ComapeoCoreService : Service() { override fun onStartCommand(intent: Intent?, flags: Int, startId: Int): Int { log("onStartCommand startId: $startId action: ${intent?.action}") + // FGS-lifecycle breadcrumb (§7.4.6). Captures the action + // routing decision — useful for debugging "why did the FGS + // start" questions in production. + SentryFgsBridge.addBreadcrumb( + category = "comapeo.fgs", + message = "onStartCommand", + data = mapOf( + "startId" to startId, + "action" to (intent?.action ?: "(restart)"), + ), + ) + when (intent?.action) { Actions.USER_FOREGROUND.name -> { startService() @@ -107,6 +138,11 @@ class ComapeoCoreService : Service() { log("onDestroy") isServiceStarted = false activeInstanceCount-- + SentryFgsBridge.addBreadcrumb( + category = "comapeo.fgs", + message = "ComapeoCoreService.onDestroy", + level = "info", + ) serviceScope.launch { try { withTimeout(10_000) { @@ -115,6 +151,16 @@ class ComapeoCoreService : Service() { log("NodeJS service stopped") } catch (e: Exception) { log("Error stopping NodeJS service: ${e.message}") + // Plan §7.4.4: stop-timeout is a "we have to kill the + // FGS via Process.killProcess, observability is gone + // shortly" signal. Capture before the kill so the + // event has time to flush. Keep level at error — a + // 10s shutdown timeout is always actionable. + SentryFgsBridge.captureMessage( + "comapeo: FGS stop timeout fired", + level = "error", + tags = mapOf("timeout" to "fgsStop"), + ) } log("The service has been destroyed".uppercase()) // Only kill the process if no new service instance has started. diff --git a/android/src/main/java/com/comapeo/core/NodeJSService.kt b/android/src/main/java/com/comapeo/core/NodeJSService.kt index efb5b25..1821b0a 100644 --- a/android/src/main/java/com/comapeo/core/NodeJSService.kt +++ b/android/src/main/java/com/comapeo/core/NodeJSService.kt @@ -197,6 +197,35 @@ class NodeJSService( private val json = Json { encodeDefaults = true } private val ipcDeferred = CompletableDeferred() + /** + * Phase 2b — Sentry boot transaction handle. Opened in [start] + * before the watchdog arms, closed on the first transition to + * STARTED (status `ok`) or ERROR (status `internal_error`). + * Held as `AtomicReference` so the bridge's opaque handle + * type doesn't leak in (the bridge uses `Any?` to keep + * `io.sentry.*` imports out of the public surface). + */ + private val bootTx = AtomicReference(null) + + /** + * In-flight boot-phase spans, keyed by phase name (per the + * `boot.` taxonomy from + * `apps/benchmark/backend/lib/boot-spans.js` on the bench + * branch). Same names so a single Sentry dashboard query + * charts both bench and production spans without an alias + * table. Phases observable from the FGS process today: + * + * - `rootkey-load` — wraps `RootKeyStore.loadOrInitialize()`. + * - `init-frame` — from "sent init" to "received ready". + * + * The other phases from the plan §7.4.2 taxonomy + * (`listen-control`, `construct`, `ipc-connect`) live in the + * Node-side boot once Phase 3 wires them up via distributed + * tracing — the FGS-side transaction will adopt them as + * children then. + */ + private val bootSpans = java.util.concurrent.ConcurrentHashMap() + /** * Single-slot state observer. The FGS routes transitions through here * to `ComapeoCoreService`, which broadcasts to the main app process via @@ -259,7 +288,28 @@ class NodeJSService( // frame with the bytes from RootKeyStore; on `ready` we // promote to STARTED (via the derivation). ipcDeferred.complete( - NodeJSIPC(controlSocketFile) { message -> + NodeJSIPC( + controlSocketFile, + onConnectionStateChange = { ipcState -> + // Phase 2b — IPC connection breadcrumb + // (§7.4.5). Cheap signal; rides on the next + // event. We don't fire a captureException + // here even on `Error` because the + // disconnect-derivation logic in + // applyAndEmit already maps unexpected + // disconnects to ERROR with a precise + // phase, and that path captures via + // `state ERROR → captureException`. + SentryFgsBridge.addBreadcrumb( + category = "comapeo.ipc", + message = "control IPC: ${ipcState.javaClass.simpleName}", + level = if (ipcState is NodeJSIPC.State.Error) "warning" else "info", + data = if (ipcState is NodeJSIPC.State.Error) { + mapOf("error" to (ipcState.exception.message ?: ipcState.exception.javaClass.simpleName)) + } else emptyMap(), + ) + }, + ) { message -> handleControlMessage(message) }, ) @@ -327,6 +377,69 @@ class NodeJSService( if (prev.state != committed.state) { log("NodeJSService state: ${prev.state} -> ${committed.state}") if (prev.state == State.STARTING) cancelStartupWatchdog() + + // Phase 2b — FGS-process state-transition breadcrumb + // (§7.4.1). Lands on the FGS Sentry hub so a later + // `captureException` from this process picks it up + // alongside logcat tail / foreground state. The + // matching main-process breadcrumb is emitted by + // src/sentry.ts when the JS adapter sees the same + // transition via the second control-socket connection. + SentryFgsBridge.addBreadcrumb( + category = "comapeo.state", + message = "${prev.state} → ${committed.state}", + level = if (committed.state == State.ERROR) "error" else "info", + data = mapOf( + "from" to prev.state.name, + "to" to committed.state.name, + "backendState" to committed.backendState.javaClass.simpleName, + "nodeRuntime" to committed.nodeRuntime.javaClass.simpleName, + "stopRequested" to committed.stopRequested, + ), + ) + + // Phase 2b — close the boot transaction on the first + // terminal-or-cancelling transition. `getAndSet(null)` + // ensures a single owner so subsequent transitions + // (e.g. ERROR-after-STOPPING during a stuck shutdown) + // can't double-finish. + // + // Status mapping per Sentry conventions: + // STARTED → ok (boot succeeded) + // ERROR → internal_error (boot failed) + // STOPPING/STOPPED → cancelled + // (stop()/destroy() asked us to + // bail before STARTED — boot is + // valid context but didn't reach + // its natural end). Without this + // case the txn leaks: STOPPING + // and STOPPED-via-stopRequested + // bypass STARTED/ERROR entirely + // per `deriveLifecycleState`. + val terminalStatus = when (committed.state) { + State.STARTED -> "ok" + State.ERROR -> "internal_error" + State.STOPPING, State.STOPPED -> "cancelled" + State.STARTING -> null + } + if (terminalStatus != null) { + bootTx.getAndSet(null)?.let { + SentryFgsBridge.finishSpan(it, terminalStatus) + } + // Drain any in-flight phase spans alongside the + // transaction — they share the lifecycle and would + // otherwise stay open indefinitely. Pre-STARTED + // closure uses the same status as the parent so + // the dashboard doesn't show "boot.rootkey-load + // succeeded" alongside "comapeo.boot cancelled". + val phases = bootSpans.keys.toList() + for (phase in phases) { + bootSpans.remove(phase)?.let { + SentryFgsBridge.finishSpan(it, terminalStatus) + } + } + } + onStateChange?.invoke(committed.state) } } @@ -362,6 +475,15 @@ class NodeJSService( } log("Starting NodeJS service") + // Phase 2b — open the boot transaction BEFORE the + // applyAndEmit that drives STOPPED→STARTING. The + // applyAndEmit close-on-terminal logic only fires when + // `bootTx` is non-null at transition time, so opening here + // guarantees we capture the initial transition's timing as + // part of the transaction. Forced 100% sample rate per + // §7.4.2 — boot is once-per-process and high value. + bootTx.set(SentryFgsBridge.startBootTransaction()) + // Reset component state for a fresh start cycle and transition // STOPPED → STARTING via the derivation. `lastError` is cleared // explicitly: today this only matters as defense-in-depth (the @@ -389,6 +511,20 @@ class NodeJSService( phase = "starting-timeout", message = "Service did not reach STARTED within ${startupTimeoutMs}ms", ) + // Phase 2b — timeout event (§7.4.4). Captured + // BEFORE the applyAndEmit that drives the ERROR + // transition so the breadcrumb stack on the + // event ends with the in-flight STARTING state + // rather than the terminal ERROR. + SentryFgsBridge.captureMessage( + "comapeo: startup timeout fired", + level = "error", + tags = mapOf( + "timeout" to "startup", + "comapeo.phase" to "starting-timeout", + "timeoutMs" to startupTimeoutMs.toString(), + ), + ) // Send error-native to backend so the main-app process // gets the same error frame via re-broadcast — keeps // cross-process error attribution intact when the @@ -479,13 +615,37 @@ class NodeJSService( log("Control IPC received: $message") when (val frame = ControlFrame.parse(message)) { ControlFrame.Started -> { + // Phase 2b — control-socket frame breadcrumb (§7.4.5). + // Treats each frame the parser accepted as a noteworthy + // step in the boot handshake. Malformed frames don't + // get a breadcrumb here (they're surfaced via + // `messageerror` separately). + SentryFgsBridge.addBreadcrumb( + category = "comapeo.control", + message = "received: started", + ) applyAndEmit { it.copy(backendState = BackendState.ControlBound) } sendInitFrame() } ControlFrame.Ready -> { + SentryFgsBridge.addBreadcrumb( + category = "comapeo.control", + message = "received: ready", + ) + // `ready` arrives once the backend has constructed + // MapeoManager and bound the comapeo socket — the + // natural close point for the `boot.init-frame` span + // we opened in `sendInitFrame()`. + bootSpans.remove("init-frame")?.let { + SentryFgsBridge.finishSpan(it, "ok") + } applyAndEmit { it.copy(backendState = BackendState.Ready) } } ControlFrame.Stopping -> { + SentryFgsBridge.addBreadcrumb( + category = "comapeo.control", + message = "received: stopping", + ) // Backend is gracefully shutting down. The next thing // we'll see is the socket close; the derivation maps // this to STOPPING and the subsequent runtime exit @@ -493,6 +653,15 @@ class NodeJSService( applyAndEmit { it.copy(backendState = BackendState.Stopping) } } is ControlFrame.Error -> { + SentryFgsBridge.addBreadcrumb( + category = "comapeo.control", + message = "received: error", + level = "error", + data = mapOf( + "phase" to frame.phase, + "message" to frame.message, + ), + ) val info = ErrorInfo(frame.phase, frame.message) applyAndEmit(error = info) { it.copy(backendState = BackendState.Error(frame.phase, frame.message)) @@ -506,6 +675,16 @@ class NodeJSService( // separately so application code can observe protocol // issues without losing service state. log("NodeJSService: ${frame.detail}") + // §7.4.5 — protocol failures get a warning-level + // breadcrumb so they ride on the next captured event + // for forensic context, but no captureException + // (matches src/sentry.ts's main-process treatment). + SentryFgsBridge.addBreadcrumb( + category = "comapeo.control", + message = "malformed control frame", + level = "warning", + data = mapOf("detail" to frame.detail), + ) } } } @@ -525,10 +704,44 @@ class NodeJSService( * encoded base64 string still lives in the JVM string pool until GC. */ private fun sendInitFrame() { + // Phase 2b — `boot.rootkey-load` span (§7.4.2). Span name + // matches the bench backend's `boot.` taxonomy so + // production and bench charts share the same dashboard + // queries. Closed with `ok` after a successful load, + // `internal_error` after the catch. + val rootkeyLoadSpan = SentryFgsBridge.startBootSpan(bootTx.get(), "rootkey-load") + if (rootkeyLoadSpan != null) { + bootSpans["rootkey-load"] = rootkeyLoadSpan + } val rootKeyBytes: ByteArray = try { - RootKeyStore(applicationContext).loadOrInitialize() + RootKeyStore(applicationContext).loadOrInitialize().also { + bootSpans.remove("rootkey-load")?.let { sp -> + SentryFgsBridge.finishSpan(sp, "ok") + } + } } catch (e: Exception) { Log.e(TAG, "Failed to load rootkey", e) + // Phase 2b — FGS-side captureException (§7.4.7). Lands + // on the FGS scope (proc:fgs is a process-level tag set + // at SentryAndroid.init time, see SentryFgsBridgeImpl). + // The same exception is forwarded to Node via + // sendErrorNativeFrame and re-broadcast as an `error` + // control frame; the main-app process captures from the + // JS adapter with proc:main. Sentry de-dupes via + // fingerprinting; the three vantage points carry FGS + // context, backend stack, and main-process state-machine + // trail respectively. + SentryFgsBridge.captureException( + e, + tags = mapOf( + "comapeo.phase" to "rootkey", + "comapeo.state" to "ERROR", + "source" to "rootkey-store", + ), + ) + bootSpans.remove("rootkey-load")?.let { sp -> + SentryFgsBridge.finishSpan(sp, "internal_error") + } val info = ErrorInfo("rootkey", e.message ?: e.javaClass.simpleName) sendErrorNativeFrame(info.phase, info.message) applyAndEmit(error = info) { @@ -539,6 +752,14 @@ class NodeJSService( val b64 = Base64.encodeToString(rootKeyBytes, Base64.NO_WRAP) rootKeyBytes.fill(0) val frame = "{\"type\":\"init\",\"rootKey\":\"$b64\"}" + // Phase 2b — `boot.init-frame` span: starts as we ship the + // init frame, ends when we receive the `ready` control + // frame (closed in handleControlMessage). Captures the + // round-trip cost of "init sent → MapeoManager constructed + // → comapeo socket bound → ready broadcast". + SentryFgsBridge.startBootSpan(bootTx.get(), "init-frame")?.let { + bootSpans["init-frame"] = it + } serviceScope.launch { ipcDeferred.await().sendMessage(frame) log("Sent init frame to backend") @@ -580,6 +801,25 @@ class NodeJSService( "Dropping error-native frame: IPC not available within " + "${SEND_ERROR_NATIVE_TIMEOUT_MS}ms (phase=$phase)", ) + // Phase 2b — timeout event (§7.4.4). Warning + // level rather than error: the FGS-side ERROR + // is already correctly attributed locally; the + // dropped frame only degrades cross-process + // attribution to the synthetic + // `node-runtime-unexpected` phase the main-app + // process derives from the control-socket + // close. That degradation is documented in + // `ARCHITECTURE.md §6` as "no worse than the + // pre-refactor baseline". + SentryFgsBridge.captureMessage( + "comapeo: error-native frame dropped", + level = "warning", + tags = mapOf( + "timeout" to "errorNativeForward", + "comapeo.phase" to phase, + "timeoutMs" to SEND_ERROR_NATIVE_TIMEOUT_MS.toString(), + ), + ) return@launch } ipc.sendMessage(payload) diff --git a/android/src/main/java/com/comapeo/core/SentryConfig.kt b/android/src/main/java/com/comapeo/core/SentryConfig.kt new file mode 100644 index 0000000..566d567 --- /dev/null +++ b/android/src/main/java/com/comapeo/core/SentryConfig.kt @@ -0,0 +1,196 @@ +package com.comapeo.core + +import android.content.Context +import android.content.pm.PackageManager +import android.os.Build +import android.os.Bundle + +/** + * Phase 2 of the Sentry integration plan + * (docs/sentry-integration-plan.md §4.1, §4.2). Typed view of the + * AndroidManifest meta-data the Expo plugin (`app.plugin.js`) writes + * at prebuild time. + * + * The meta-data lives on the manifest's main `` tag, so + * both the host app's main process AND the `:ComapeoCore` FGS process + * see them — `PackageManager.getApplicationInfo(...).metaData` is + * shared across processes within the package. + * + * `loadFromManifest` returns `null` when the DSN meta-data is absent, + * which is the consumer's signal that Sentry was not configured + * (the plugin omits all entries when invoked without a `sentry` + * argument, or when not registered at all). Treat null as "Sentry + * off" — do not init the SDK, do not pass `--sentryDsn` argv flags + * to the embedded backend (Phase 3). + * + * Phase 2 ships the data class and reader; the actual native-side + * Sentry SDK init in `ComapeoCoreService.onCreate` and the + * native-side breadcrumb / span / event calls in `NodeJSService` are + * a Phase 2.5 follow-up because they require adding `io.sentry` + * Gradle deps that this PR deliberately doesn't touch. + */ +data class SentryConfig( + val dsn: String, + val environment: String, + val release: String, + val sampleRate: Double? = null, + val tracesSampleRate: Double? = null, + /** + * Cap on RPC argument bytes captured to Sentry. `null` (or 0) + * means RPC arguments are never captured — the default. Only + * developer debug builds are expected to set this; see plan + * §7.4.9 for the never-capture list. + */ + val rpcArgsBytes: Int? = null, + /** + * Per-environment default for the §9 capture-application-data + * toggle when the user has not yet set it explicitly. `null` + * means absent → native treats as `false`. Wired via the plugin + * so a consumer can opt internal/test builds in by default + * without changing JS code. + */ + val captureApplicationDataDefault: Boolean? = null, +) { + companion object { + // Manifest meta-data keys. Must stay in sync with + // app.plugin.js's ANDROID_KEYS. + const val META_DSN = "com.comapeo.core.sentry.dsn" + const val META_ENVIRONMENT = "com.comapeo.core.sentry.environment" + const val META_RELEASE = "com.comapeo.core.sentry.release" + const val META_SAMPLE_RATE = "com.comapeo.core.sentry.sampleRate" + const val META_TRACES_SAMPLE_RATE = "com.comapeo.core.sentry.tracesSampleRate" + const val META_RPC_ARGS_BYTES = "com.comapeo.core.sentry.rpcArgsBytes" + const val META_CAPTURE_APPLICATION_DATA_DEFAULT = + "com.comapeo.core.sentry.captureApplicationDataDefault" + + /** + * Read the typed config from the host app's manifest. Returns + * `null` when no DSN is present, which is the documented + * "Sentry off" state. + * + * Throws `IllegalStateException` only when a DSN is present + * but `environment` is missing — that combination indicates + * a build misconfiguration the plugin should have refused at + * prebuild time. Failing loud in that case is preferred to + * silently degrading; otherwise we'd ship Sentry events with + * no environment tag and they'd be impossible to filter. + */ + @JvmStatic + fun loadFromManifest(context: Context): SentryConfig? { + val meta = readApplicationMetaData(context) ?: return null + return load({ meta.getString(it) }) { resolveDefaultRelease(context) } + } + + /** + * Pure variant for unit-testing. Takes a string-getter (so + * tests don't have to mock `android.os.Bundle`, which the + * JVM unit-test classpath leaves as a "not mocked" stub) and + * a producer for the `release` fallback (because the JVM + * unit-test classpath has no real `PackageManager` to read + * versionName/versionCode from). + * + * Returns null when DSN is absent (sentry-off state). + * Throws `IllegalStateException` when DSN is present but + * `environment` is missing — see the throws note on + * [loadFromManifest]. + */ + @JvmStatic + fun load( + metaString: (String) -> String?, + defaultRelease: () -> String, + ): SentryConfig? { + val dsn = metaString(META_DSN) ?: return null + // The plugin refuses to prebuild without `environment`, + // but a stale prebuild from before the requirement was + // added would still ship — and the original "throw on + // missing" behaviour crashed every cold start with no + // way to recover. Treat the (DSN-present, environment- + // absent) state as a build misconfiguration that + // disables Sentry; log loud, return null. The host app + // continues to function; the missing manifest entry + // becomes visible the next time someone re-prebuilds + // and sees the plugin's prebuild-time validation fire. + val environment = metaString(META_ENVIRONMENT) + if (environment == null) { + // System.err so the warning surfaces from JVM unit + // tests too (`android.util.Log.e` is unmocked on the + // unit-test classpath and would throw "Method e in + // android.util.Log not mocked"). On a real device + // this lands in logcat at the same level Log.e + // would emit. + System.err.println( + "[ComapeoCore.SentryConfig] $META_ENVIRONMENT missing " + + "from manifest while $META_DSN is set. Re-run " + + "`expo prebuild` so the plugin can rewrite the " + + "manifest. Sentry disabled until then.", + ) + return null + } + val release = metaString(META_RELEASE) ?: defaultRelease() + return SentryConfig( + dsn = dsn, + environment = environment, + release = release, + sampleRate = metaString(META_SAMPLE_RATE)?.toDoubleOrNull(), + tracesSampleRate = metaString(META_TRACES_SAMPLE_RATE)?.toDoubleOrNull(), + rpcArgsBytes = metaString(META_RPC_ARGS_BYTES)?.toIntOrNull(), + captureApplicationDataDefault = metaString( + META_CAPTURE_APPLICATION_DATA_DEFAULT, + )?.toBooleanStrictOrNull(), + ) + } + + /** + * Pulls the `` meta-data Bundle. Returns null on + * NameNotFoundException — defensive; the host app is always + * its own package so this should never miss in practice, but + * a missing manifest is preferable to a crash on cold start. + */ + private fun readApplicationMetaData(context: Context): Bundle? { + return try { + if (Build.VERSION.SDK_INT >= 33) { + context.packageManager.getApplicationInfo( + context.packageName, + PackageManager.ApplicationInfoFlags.of( + PackageManager.GET_META_DATA.toLong(), + ), + ).metaData + } else { + @Suppress("DEPRECATION") + context.packageManager.getApplicationInfo( + context.packageName, + PackageManager.GET_META_DATA, + ).metaData + } + } catch (_: PackageManager.NameNotFoundException) { + null + } + } + + /** + * Default release tag when the plugin didn't supply one + * (§4.1): `versionName + "+" + versionCode`. Successive EAS + * builds of the same marketing version get distinct release + * strings because EAS auto-increments versionCode. + */ + private fun resolveDefaultRelease(context: Context): String { + val pkg = if (Build.VERSION.SDK_INT >= 33) { + context.packageManager.getPackageInfo( + context.packageName, + PackageManager.PackageInfoFlags.of(0), + ) + } else { + @Suppress("DEPRECATION") + context.packageManager.getPackageInfo(context.packageName, 0) + } + val versionName = pkg.versionName ?: "unknown" + val build: Long = if (Build.VERSION.SDK_INT >= 28) { + pkg.longVersionCode + } else { + @Suppress("DEPRECATION") + pkg.versionCode.toLong() + } + return "$versionName+$build" + } + } +} diff --git a/android/src/main/java/com/comapeo/core/SentryFgsBridge.kt b/android/src/main/java/com/comapeo/core/SentryFgsBridge.kt new file mode 100644 index 0000000..2d90fbc --- /dev/null +++ b/android/src/main/java/com/comapeo/core/SentryFgsBridge.kt @@ -0,0 +1,244 @@ +package com.comapeo.core + +import android.content.Context +import android.util.Log + +/** + * Phase 2b — guarded entry point to the Sentry-Android SDK from the + * `:ComapeoCore` FGS process. + * + * Two reasons for this guard layer (the impl lives in + * [SentryFgsBridgeImpl] which freely imports `io.sentry.*`): + * + * 1. **Optional runtime classpath.** The Gradle dependency on + * `io.sentry:sentry-android-core` is `compileOnly` (see + * `android/build.gradle`). Consumers who don't install + * `@sentry/react-native` won't have the runtime classes, so a + * direct call into `io.sentry.Sentry` from the FGS would fail + * class-load with `NoClassDefFoundError`. The cheap + * `Class.forName` probe in [isEnabled] gates every public method + * so the missing classpath never reaches the verifier. + * + * 2. **FGS-process scope, not main-process scope.** The host's + * `@sentry/react-native` runs `SentryAndroid.init(...)` in the + * main process's `MainApplication.onCreate`. That init never + * fires in the `:ComapeoCore` FGS process (Android creates a + * fresh `Application` per process). [init] populates the FGS-side + * Sentry hub so logcat tail / foreground-state context lands on + * captures from this process — see plan §7.4.7. + * + * All public methods are no-ops when: + * - The Sentry SDK isn't on the runtime classpath (guard miss). + * - [init] was never called (DSN absent in manifest, or this is + * not the FGS process). + */ +object SentryFgsBridge { + /** + * Cached result of the `Class.forName` probe — `null` until the + * first call, `true`/`false` after that. Avoids paying the probe + * cost on every breadcrumb / span emit. + */ + @Volatile + private var enabled: Boolean? = null + + @Volatile + private var initialized: Boolean = false + + /** + * Probes for the Sentry-Android runtime classes. Returns `false` + * when the consumer didn't install `@sentry/react-native` (or + * stripped it via R8 / a custom build). Cached after first call. + * + * The probe target is a stable public class — `SentryAndroid` + * itself — so a future SDK bump that renames internal classes + * doesn't accidentally flip the gate. + */ + @JvmStatic + fun isEnabled(): Boolean { + val cached = enabled + if (cached != null) return cached + // `initialize = false` so the probe doesn't run + // `SentryAndroid`'s `` (which on the real device + // is fine, but on the JVM unit-test classpath calls + // `android.os.SystemClock.uptimeMillis()` and crashes + // with "not mocked"). The probe only needs to confirm + // the symbol resolves; init happens later via + // `SentryAndroid.init(...)` from the real Android process. + val present = try { + Class.forName( + "io.sentry.android.core.SentryAndroid", + false, + SentryFgsBridge::class.java.classLoader, + ) + true + } catch (_: ClassNotFoundException) { + false + } + enabled = present + return present + } + + /** + * Initialise the FGS-process Sentry SDK. Idempotent — second call + * within the same process is silently dropped (sentry-android's + * own `init` logs a warning in that case but doesn't crash; we + * short-circuit to keep logs clean). + * + * Called from `ComapeoCoreService.onCreate`. Pass the + * `applicationContext` and the `SentryConfig` produced by + * `SentryConfig.loadFromManifest(...)`. When that returned `null` + * (no DSN in manifest), don't call this method at all — there's + * nothing to init. + */ + @JvmStatic + fun init(context: Context, config: SentryConfig) { + if (initialized) return + if (!isEnabled()) { + Log.i( + TAG, + "SentryFgsBridge.init: sentry-android not on classpath; FGS-process Sentry off", + ) + return + } + try { + SentryFgsBridgeImpl.init(context, config) + initialized = true + } catch (t: Throwable) { + // Failing to init Sentry must NOT take the FGS down. The + // FGS's job is to keep nodejs-mobile alive; observability + // is decorative compared to that. + Log.e(TAG, "SentryFgsBridge.init failed; continuing without FGS Sentry", t) + } + } + + /** + * Add a breadcrumb to the FGS-process Sentry scope. Rides on + * the next event captured from this process. No-op when not + * initialized. + * + * Severity strings are loose ("info" | "warning" | "error" | + * "fatal" | "debug") — the bridge maps to `SentryLevel` + * internally. Unknown strings fall back to `INFO`. + */ + @JvmStatic + @JvmOverloads + fun addBreadcrumb( + category: String, + message: String, + level: String = "info", + data: Map = emptyMap(), + ) { + if (!initialized) return + try { + SentryFgsBridgeImpl.addBreadcrumb(category, message, level, data) + } catch (t: Throwable) { + // Don't let a Sentry hiccup take the FGS down. Log so + // debug builds notice the swallowed surprise — a real + // bug in the bridge or SDK shouldn't be silent. + Log.w(TAG, "addBreadcrumb($category) threw", t) + } + } + + /** + * Capture an exception on the FGS-process scope. Tags are + * applied to the event; the FGS-init-set process-level tag + * `proc:fgs` is already on the scope. + */ + @JvmStatic + @JvmOverloads + fun captureException( + throwable: Throwable, + tags: Map = emptyMap(), + ) { + if (!initialized) return + try { + SentryFgsBridgeImpl.captureException(throwable, tags) + } catch (t: Throwable) { + Log.w(TAG, "captureException threw", t) + } + } + + /** + * Capture a `captureMessage` event. Used for timeout firings + * (§7.4.4) and other "notable but non-throw" events. + */ + @JvmStatic + @JvmOverloads + fun captureMessage( + message: String, + level: String = "info", + tags: Map = emptyMap(), + ) { + if (!initialized) return + try { + SentryFgsBridgeImpl.captureMessage(message, level, tags) + } catch (t: Throwable) { + Log.w(TAG, "captureMessage threw", t) + } + } + + /** + * Start a `comapeo.boot` transaction. Returns an opaque handle + * (or `null` when disabled) which must be passed back to + * [finishBootTransaction]. Force 100% sample rate per plan + * §7.4.2 — boot is once-per-process and high value. + * + * The opaque return type is `Any?` rather than `ITransaction?` + * so callers can stay free of `io.sentry.*` imports — keeping + * the dependency surface scoped to the impl file. + */ + @JvmStatic + fun startBootTransaction(): Any? { + if (!initialized) return null + return try { + SentryFgsBridgeImpl.startBootTransaction() + } catch (t: Throwable) { + Log.w(TAG, "startBootTransaction threw", t) + null + } + } + + /** Add a `boot.` child span on a transaction handle. */ + @JvmStatic + fun startBootSpan(transaction: Any?, phase: String): Any? { + if (!initialized || transaction == null) return null + return try { + SentryFgsBridgeImpl.startBootSpan(transaction, phase) + } catch (t: Throwable) { + Log.w(TAG, "startBootSpan($phase) threw", t) + null + } + } + + /** + * Finish a span / transaction handle previously returned by + * [startBootTransaction] / [startBootSpan]. `status` is `"ok"`, + * `"internal_error"`, `"deadline_exceeded"`, or `"cancelled"` + * (mapped to `SpanStatus.*` internally). + */ + @JvmStatic + @JvmOverloads + fun finishSpan(handle: Any?, status: String = "ok") { + if (!initialized || handle == null) return + try { + SentryFgsBridgeImpl.finishSpan(handle, status) + } catch (t: Throwable) { + Log.w(TAG, "finishSpan($status) threw", t) + } + } + + // For tests: reset the cached enabled flag and the initialized + // bit. Production code never calls this; the JVM unit-test + // classpath uses it to switch Impl on/off between cases. + @JvmStatic + internal fun resetForTests() { + enabled = null + initialized = false + try { + SentryFgsBridgeImpl.resetForTests() + } catch (_: Throwable) { + } + } + + private const val TAG = "ComapeoCore.Sentry" +} diff --git a/android/src/main/java/com/comapeo/core/SentryFgsBridgeImpl.kt b/android/src/main/java/com/comapeo/core/SentryFgsBridgeImpl.kt new file mode 100644 index 0000000..179ad7a --- /dev/null +++ b/android/src/main/java/com/comapeo/core/SentryFgsBridgeImpl.kt @@ -0,0 +1,186 @@ +package com.comapeo.core + +import android.content.Context +import io.sentry.Breadcrumb +import io.sentry.ISpan +import io.sentry.ITransaction +import io.sentry.Sentry +import io.sentry.SentryLevel +import io.sentry.SpanStatus +import io.sentry.TracesSamplingDecision +import io.sentry.TransactionContext +import io.sentry.TransactionOptions +import io.sentry.android.core.SentryAndroid + +/** + * Phase 2b — implementation behind [SentryFgsBridge]. Contains + * every `io.sentry.*` import in the module's main source set so + * the public bridge stays free of those references and consumers + * without sentry-android on the classpath never reach class-load + * for these symbols. + * + * Class-loading rules: the JVM/Dalvik verifier inspects bytecode + * references at class-load time of the *containing* class, not at + * verification of the caller. [SentryFgsBridge] references this + * class via its own bytecode; that reference doesn't trigger + * loading of [SentryFgsBridgeImpl] until a method on it is + * actually invoked. The bridge's `Class.forName` probe (in + * `SentryFgsBridge.isEnabled`) gates every dispatch; when the + * probe returns `false`, this file is never loaded. + * + * All methods are package-internal (`internal`) and assume the + * bridge has already verified the SDK is present. They throw on + * pathological misuse (e.g. invalid level strings) — the bridge's + * outer `try/catch` swallows. + */ +internal object SentryFgsBridgeImpl { + + fun init(context: Context, config: SentryConfig) { + SentryAndroid.init(context.applicationContext) { options -> + options.dsn = config.dsn + options.environment = config.environment + options.release = config.release + // Sample rates: defaults match plan §4.5 (errors at 1.0, + // traces at 0 unless caller configured). The §7.4.2 + // forced-100%-on-boot policy is enforced at span creation + // (see [startBootTransaction]) rather than at SDK init, + // so per-tx sampling overrides the global rate cleanly. + options.sampleRate = config.sampleRate ?: 1.0 + options.tracesSampleRate = config.tracesSampleRate ?: 0.0 + + // Process-level tags. Every event from this hub picks + // these up, so dashboards can split FGS-originated + // events from main-process ones (which carry + // `proc:main` from the JS adapter — see src/sentry.ts). + options.setTag("proc", "fgs") + options.setTag("layer", "native") + } + } + + fun addBreadcrumb( + category: String, + message: String, + level: String, + data: Map, + ) { + val crumb = Breadcrumb().apply { + this.category = category + this.message = message + this.level = parseLevel(level) + for ((k, v) in data) { + if (v != null) setData(k, v) + } + } + Sentry.addBreadcrumb(crumb) + } + + fun captureException( + throwable: Throwable, + tags: Map, + ) { + Sentry.captureException(throwable) { scope -> + tags.forEach { (k, v) -> scope.setTag(k, v) } + } + } + + fun captureMessage( + message: String, + level: String, + tags: Map, + ) { + Sentry.captureMessage(message, parseLevel(level)) { scope -> + tags.forEach { (k, v) -> scope.setTag(k, v) } + } + } + + fun startBootTransaction(): Any { + // Force 100% sample rate per plan §7.4.2 — boot is + // once-per-process and high value, even when the global + // tracesSampleRate is low (or 0.0, our default). + // + // `TransactionContext` accepts a `TracesSamplingDecision` + // that overrides the SDK's global decision. Constructing + // with `(sampled=true, sampleRate=1.0)` keeps the + // transaction *and* every child span on the wire, + // regardless of `options.tracesSampleRate`. This is the + // mechanism the Sentry-Android docs call out for + // "always-sampled-regardless-of-rate" use cases like app + // start where the user shouldn't have to crank up their + // global rate to capture a once-per-process event. + val context = TransactionContext( + "comapeo.boot", + "boot", + TracesSamplingDecision(true, 1.0), + ) + val opts = TransactionOptions().apply { + isBindToScope = true + } + return Sentry.startTransaction(context, opts) + } + + fun startBootSpan(transaction: Any, phase: String): Any { + require(transaction is ITransaction) { + "startBootSpan: handle must be ITransaction, got ${transaction.javaClass.name}" + } + // Sentry's `startChild` signature is `(operation, description)` + // — operation is the indexed dashboard column, description is + // the human-readable label. The plan §7.4.2 names like + // `boot.rootkey-load` are the operation names; descriptions + // are short summaries of what each phase actually does. + // + // Operation names match the bench backend's `boot.` + // taxonomy (apps/benchmark/backend/lib/boot-spans.js on + // claude/benchmark-uds-rpc-bridge-1Zahz). Keeping the + // operation names identical means a single Sentry dashboard + // query can chart both the bench backend's spans and the + // production FGS-process spans without an alias table. + val description = when (phase) { + "rootkey-load" -> "Load 16-byte rootkey from RootKeyStore" + "init-frame" -> "Send init frame, await ready" + else -> phase + } + return transaction.startChild("boot.$phase", description) + } + + fun finishSpan(handle: Any, status: String) { + when (handle) { + is ITransaction -> { + handle.status = parseStatus(status) + handle.finish() + } + is ISpan -> { + handle.status = parseStatus(status) + handle.finish() + } + else -> { + // Unknown handle type. Likely a test fake or a future + // SDK addition — silently drop. + } + } + } + + fun resetForTests() { + // Sentry-Android has no public reset; tests that switch the + // bridge between configurations re-init via a fresh hub. + // Nothing to do here; the bridge's own `initialized` flag + // is reset by `SentryFgsBridge.resetForTests`. + } + + private fun parseLevel(level: String): SentryLevel = when (level.lowercase()) { + "fatal" -> SentryLevel.FATAL + "error" -> SentryLevel.ERROR + "warning", "warn" -> SentryLevel.WARNING + "debug" -> SentryLevel.DEBUG + else -> SentryLevel.INFO + } + + private fun parseStatus(status: String): SpanStatus = when (status.lowercase()) { + "ok" -> SpanStatus.OK + "internal_error", "error" -> SpanStatus.INTERNAL_ERROR + "deadline_exceeded", "timeout" -> SpanStatus.DEADLINE_EXCEEDED + "cancelled" -> SpanStatus.CANCELLED + "not_found" -> SpanStatus.NOT_FOUND + "unauthenticated" -> SpanStatus.UNAUTHENTICATED + else -> SpanStatus.UNKNOWN + } +} diff --git a/android/src/test/java/com/comapeo/core/SentryConfigTest.kt b/android/src/test/java/com/comapeo/core/SentryConfigTest.kt new file mode 100644 index 0000000..88e3b77 --- /dev/null +++ b/android/src/test/java/com/comapeo/core/SentryConfigTest.kt @@ -0,0 +1,177 @@ +package com.comapeo.core + +import org.junit.Assert.assertEquals +import org.junit.Assert.assertNull +import org.junit.Test + +/** + * JVM-only unit tests for [SentryConfig.load]. Uses the pure + * string-getter overload so we don't depend on a real `Bundle` + * (the JVM unit-test classpath ships an `android.jar` stub where + * every method throws "not mocked"). Same testing pattern as + * [ControlFrameTest]. + * + * Coverage rationale: this is the deserialization seam between the + * Expo plugin's manifest writes and the native consumers (SDK init, + * argv flags). A regression here would silently disable Sentry or + * ship the wrong environment tag — both of which produce useless + * Sentry projects rather than visible failures. + */ +class SentryConfigTest { + + private val DEFAULT_RELEASE: () -> String = { "1.2.3+42" } + + private fun mapGetter(map: Map): (String) -> String? = + { key -> map[key] } + + @Test + fun returnsNullWhenDsnMissing() { + // Mirrors the "plugin not registered, or registered without a + // sentry argument" case: no meta-data was written, so loading + // produces null — the documented "Sentry off" state. + val config = SentryConfig.load(mapGetter(emptyMap()), DEFAULT_RELEASE) + assertNull(config) + } + + @Test + fun loadsRequiredFields() { + val config = SentryConfig.load( + mapGetter( + mapOf( + SentryConfig.META_DSN to "https://abc@sentry.example/1", + SentryConfig.META_ENVIRONMENT to "production", + ), + ), + DEFAULT_RELEASE, + )!! + assertEquals("https://abc@sentry.example/1", config.dsn) + assertEquals("production", config.environment) + assertEquals("1.2.3+42", config.release) + assertNull(config.sampleRate) + assertNull(config.tracesSampleRate) + assertNull(config.rpcArgsBytes) + assertNull(config.captureApplicationDataDefault) + } + + @Test + fun pluginReleaseOverridesDefault() { + // When the consumer passes `release` to the plugin, that + // value wins over versionName+versionCode. Used to embed git + // SHAs from EAS_BUILD_GIT_COMMIT_HASH (plan §4.1). + val config = SentryConfig.load( + mapGetter( + mapOf( + SentryConfig.META_DSN to "https://x@sentry.io/1", + SentryConfig.META_ENVIRONMENT to "qa", + SentryConfig.META_RELEASE to "deadbeef", + ), + ), + DEFAULT_RELEASE, + )!! + assertEquals("deadbeef", config.release) + } + + @Test + fun parsesNumericFields() { + val config = SentryConfig.load( + mapGetter( + mapOf( + SentryConfig.META_DSN to "https://x@sentry.io/1", + SentryConfig.META_ENVIRONMENT to "production", + SentryConfig.META_SAMPLE_RATE to "0.5", + SentryConfig.META_TRACES_SAMPLE_RATE to "0.1", + SentryConfig.META_RPC_ARGS_BYTES to "0", + ), + ), + DEFAULT_RELEASE, + )!! + assertEquals(0.5, config.sampleRate!!, 0.0) + assertEquals(0.1, config.tracesSampleRate!!, 0.0) + assertEquals(0, config.rpcArgsBytes!!.toInt()) + } + + @Test + fun unparseableNumericFieldsAreNull() { + // The plugin coerces values to strings on the way in; if a + // future plugin bug or a hand-edited manifest produces an + // unparseable value, we'd rather drop the field than crash. + val config = SentryConfig.load( + mapGetter( + mapOf( + SentryConfig.META_DSN to "https://x@sentry.io/1", + SentryConfig.META_ENVIRONMENT to "production", + SentryConfig.META_SAMPLE_RATE to "not-a-number", + SentryConfig.META_RPC_ARGS_BYTES to "1.5", + ), + ), + DEFAULT_RELEASE, + )!! + assertNull(config.sampleRate) + assertNull(config.rpcArgsBytes) + } + + @Test + fun captureApplicationDataDefaultParses() { + val on = SentryConfig.load( + mapGetter( + mapOf( + SentryConfig.META_DSN to "https://x@sentry.io/1", + SentryConfig.META_ENVIRONMENT to "qa", + SentryConfig.META_CAPTURE_APPLICATION_DATA_DEFAULT to "true", + ), + ), + DEFAULT_RELEASE, + )!! + assertEquals(true, on.captureApplicationDataDefault) + + val off = SentryConfig.load( + mapGetter( + mapOf( + SentryConfig.META_DSN to "https://x@sentry.io/1", + SentryConfig.META_ENVIRONMENT to "production", + SentryConfig.META_CAPTURE_APPLICATION_DATA_DEFAULT to "false", + ), + ), + DEFAULT_RELEASE, + )!! + assertEquals(false, off.captureApplicationDataDefault) + } + + @Test + fun captureApplicationDataDefaultStrictness() { + // `toBooleanStrictOrNull` rejects values other than "true" / + // "false". Defensive: a stray "1"/"yes" from a hand-written + // manifest should not silently flip the default. Returns + // null → native treats absence as `false`. + val config = SentryConfig.load( + mapGetter( + mapOf( + SentryConfig.META_DSN to "https://x@sentry.io/1", + SentryConfig.META_ENVIRONMENT to "qa", + SentryConfig.META_CAPTURE_APPLICATION_DATA_DEFAULT to "yes", + ), + ), + DEFAULT_RELEASE, + )!! + assertNull(config.captureApplicationDataDefault) + } + + @Test + fun missingEnvironmentReturnsNullNotThrow() { + // The plugin refuses to prebuild without environment (§4.1), + // but a stale prebuild from before that validation was added + // would still ship. The original "throw" behaviour crashed + // every cold start with no way to recover. Now we log loud + // and return null (Sentry off) so the host app keeps + // running; the misconfiguration becomes visible the next + // time someone re-runs `expo prebuild`. + val config = SentryConfig.load( + mapGetter(mapOf(SentryConfig.META_DSN to "https://x@sentry.io/1")), + DEFAULT_RELEASE, + ) + assertNull( + "DSN-without-environment should disable Sentry rather than crash", + config, + ) + } +} diff --git a/android/src/test/java/com/comapeo/core/SentryFgsBridgeImplTest.kt b/android/src/test/java/com/comapeo/core/SentryFgsBridgeImplTest.kt new file mode 100644 index 0000000..f4914ba --- /dev/null +++ b/android/src/test/java/com/comapeo/core/SentryFgsBridgeImplTest.kt @@ -0,0 +1,222 @@ +package com.comapeo.core + +import io.sentry.ITransaction +import io.sentry.ITransportFactory +import io.sentry.Sentry +import io.sentry.SentryEnvelope +import io.sentry.SentryEvent +import io.sentry.SentryLevel +import io.sentry.SentryOptions +import io.sentry.SpanStatus +import io.sentry.transport.ITransport +import org.junit.After +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertTrue +import org.junit.Before +import org.junit.Test +import java.util.concurrent.CopyOnWriteArrayList + +/** + * JVM unit tests for the post-init code paths in + * [SentryFgsBridgeImpl]. Bypasses [SentryFgsBridge.init] (which + * calls `SentryAndroid.init`, requiring a real Android Context + * and `SystemClock` — neither available in JVM tests) by calling + * the cross-platform `Sentry.init(SentryOptions)` directly with + * an in-memory `ITransport` that records every envelope sent. + * + * The Impl's per-call methods (`addBreadcrumb`, `captureException`, + * `captureMessage`, `startBootTransaction`, `startBootSpan`, + * `finishSpan`) only call `io.sentry.Sentry.*` static methods — + * the choice of `init` path doesn't change their behaviour. This + * gives us coverage of every line the integration test on a real + * device would exercise, without the device. + * + * The pre-init guard tests live in [SentryFgsBridgeTest]. + */ +class SentryFgsBridgeImplTest { + + private lateinit var transport: RecordingTransport + + @Before + fun setUp() { + transport = RecordingTransport() + Sentry.init { options: SentryOptions -> + options.dsn = "https://abc@sentry.io/1" + options.environment = "test" + options.release = "0.0.0+test" + // Recording transport — no network. Keeps tests + // deterministic and offline. + options.setTransportFactory(transport.factory) + // Force every transaction onto the wire so the + // boot-tx tests don't depend on + // `tracesSampleRate` defaults. + options.tracesSampleRate = 1.0 + options.setTag("proc", "fgs") + options.setTag("layer", "native") + } + } + + @After + fun tearDown() { + Sentry.close() + } + + @Test + fun addBreadcrumbDoesNotImmediatelyEnqueueEnvelope() { + // Breadcrumbs ride along on the next event capture; they + // don't produce envelopes by themselves. Verifying the + // negative ensures we don't accidentally turn breadcrumbs + // into per-emit network traffic on the dashboard. + SentryFgsBridgeImpl.addBreadcrumb( + category = "comapeo.state", + message = "STOPPED → STARTING", + level = "info", + data = mapOf("from" to "STOPPED", "to" to "STARTING"), + ) + assertTrue( + "addBreadcrumb must not produce an envelope by itself", + transport.envelopes.isEmpty(), + ) + } + + @Test + fun captureExceptionEnqueuesEnvelopeWithTags() { + SentryFgsBridgeImpl.captureException( + RuntimeException("rootkey load failed"), + tags = mapOf( + "comapeo.phase" to "rootkey", + "source" to "rootkey-store", + ), + ) + Sentry.flush(0) + assertEquals("expected one envelope", 1, transport.envelopes.size) + // The envelope's payload contains the captured event. + // Sentry batches breadcrumbs onto the same envelope. + val items = transport.envelopes.first().items.toList() + assertTrue("envelope should contain at least one item", items.isNotEmpty()) + } + + @Test + fun captureMessageEnqueuesEnvelopeAtRequestedLevel() { + SentryFgsBridgeImpl.captureMessage( + message = "comapeo: startup timeout fired", + level = "error", + tags = mapOf("timeout" to "startup"), + ) + Sentry.flush(0) + assertEquals(1, transport.envelopes.size) + } + + @Test + fun startBootTransactionForcesSamplingEvenWhenRateIsZero() { + // Re-init with `tracesSampleRate = 0.0` so the global + // sample decision would normally drop the transaction. + // The Impl uses `TracesSamplingDecision(true, 1.0)` to + // override; if that override is broken, the transaction + // never reaches the transport. + Sentry.close() + transport = RecordingTransport() + Sentry.init { options -> + options.dsn = "https://abc@sentry.io/1" + options.environment = "test" + options.release = "0.0.0+test" + options.setTransportFactory(transport.factory) + options.tracesSampleRate = 0.0 + } + + val tx = SentryFgsBridgeImpl.startBootTransaction() + assertNotNull("startBootTransaction must return a handle", tx) + SentryFgsBridgeImpl.finishSpan(tx, "ok") + + Sentry.flush(0) + assertFalse( + "boot transaction must reach the transport even with global tracesSampleRate=0.0", + transport.envelopes.isEmpty(), + ) + } + + @Test + fun bootSpanLifecycle() { + val tx = SentryFgsBridgeImpl.startBootTransaction() + assertNotNull(tx) + + // Open a phase span and close ok. This is the + // rootkey-load happy path. + val span = SentryFgsBridgeImpl.startBootSpan(tx!!, "rootkey-load") + assertNotNull("startBootSpan must return a handle", span) + SentryFgsBridgeImpl.finishSpan(span!!, "ok") + + // Closing the parent transaction with the child already + // finished is the normal case. Verify it doesn't blow up. + SentryFgsBridgeImpl.finishSpan(tx, "ok") + + Sentry.flush(0) + assertFalse(transport.envelopes.isEmpty()) + } + + @Test + fun finishSpanWithCancelledStatusMapsCorrectly() { + // The new STOPPING / destroy() close-path uses + // `cancelled` as the status. Verify the parser maps the + // string to `SpanStatus.CANCELLED` rather than silently + // falling through to UNKNOWN. + val tx = SentryFgsBridgeImpl.startBootTransaction()!! + val handle = tx as ITransaction + SentryFgsBridgeImpl.finishSpan(handle, "cancelled") + assertEquals( + "cancelled string must map to SpanStatus.CANCELLED", + SpanStatus.CANCELLED, + handle.status, + ) + } + + @Test + fun unknownLevelStringFallsBackToInfo() { + // Defensive: a typo in a `level` argument should not + // crash the bridge. The Impl uses `lowercase()` and + // falls through to INFO. We exercise the path by adding + // a breadcrumb; the assertion is "no throw + an event + // captured later carries the breadcrumb at INFO level". + SentryFgsBridgeImpl.addBreadcrumb( + category = "comapeo.state", + message = "test", + level = "BANANA", + data = emptyMap(), + ) + // Capture so the breadcrumb has a host event to ride on. + SentryFgsBridgeImpl.captureMessage( + message = "trigger", + level = "info", + tags = emptyMap(), + ) + Sentry.flush(0) + assertEquals(1, transport.envelopes.size) + } +} + +/** + * Sentry transport that captures envelopes in memory rather than + * sending them anywhere. Pattern from sentry-java's own test + * suite — the recording slot survives factory re-creation so we + * can check what was sent during the test. + */ +private class RecordingTransport : ITransport { + val envelopes = CopyOnWriteArrayList() + + val factory: ITransportFactory = + ITransportFactory { _, _ -> this@RecordingTransport } + + override fun send(envelope: SentryEnvelope, hint: io.sentry.Hint) { + envelopes.add(envelope) + } + + override fun flush(timeoutMillis: Long) {} + + override fun getRateLimiter(): io.sentry.transport.RateLimiter? = null + + override fun close(isRestarting: Boolean) {} + + override fun close() {} +} diff --git a/android/src/test/java/com/comapeo/core/SentryFgsBridgeTest.kt b/android/src/test/java/com/comapeo/core/SentryFgsBridgeTest.kt new file mode 100644 index 0000000..e4d0e3a --- /dev/null +++ b/android/src/test/java/com/comapeo/core/SentryFgsBridgeTest.kt @@ -0,0 +1,133 @@ +package com.comapeo.core + +import org.junit.After +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Test + +/** + * JVM unit tests for [SentryFgsBridge]'s no-op guards. Ensures that: + * + * 1. Public methods short-circuit cleanly before init (so the + * FGS doesn't crash when `SentryConfig.loadFromManifest` + * returned null and `init` was never called). + * 2. The `Class.forName` probe path is exercised — important + * because consumers without `@sentry/react-native` get a + * missing-classpath state at runtime that we can't replicate + * in unit tests without bytecode hacks. We at least verify + * the `initialized = false` short-circuit covers all + * surface methods without touching the SDK. + * + * The "SDK present, properly init'd" path is exercised by the + * Phase 2b smoke test on a real device — it requires + * `SentryAndroid.init` which constructs a real Hub, schedulers, + * and breadcrumb queue; that's not something we want to spin up + * in a JVM unit test. + */ +class SentryFgsBridgeTest { + + @After + fun tearDown() { + SentryFgsBridge.resetForTests() + } + + @Test + fun isEnabledFindsSentryAndroidOnTestClasspath() { + // The android/build.gradle declares + // `testImplementation "io.sentry:sentry-android-core:7.20.1"`, + // so the JVM unit-test classpath has the SDK. Probe should + // succeed. + assertTrue( + "Expected SentryAndroid on test classpath; check build.gradle testImplementation", + SentryFgsBridge.isEnabled(), + ) + } + + @Test + fun addBreadcrumbBeforeInitIsNoOp() { + // Pre-init: the bridge should drop the call without + // touching Sentry at all. No throw, no crash, no event. + SentryFgsBridge.addBreadcrumb( + category = "comapeo.state", + message = "STOPPED → STARTING", + level = "info", + ) + // Reaching this assert means the call returned cleanly. + assertTrue("addBreadcrumb pre-init must not throw", true) + } + + @Test + fun captureExceptionBeforeInitIsNoOp() { + SentryFgsBridge.captureException( + RuntimeException("boom"), + tags = mapOf("comapeo.phase" to "rootkey"), + ) + assertTrue("captureException pre-init must not throw", true) + } + + @Test + fun captureMessageBeforeInitIsNoOp() { + SentryFgsBridge.captureMessage( + "comapeo: startup timeout fired", + level = "error", + tags = mapOf("timeout" to "startup"), + ) + assertTrue("captureMessage pre-init must not throw", true) + } + + @Test + fun startBootTransactionBeforeInitReturnsNull() { + // `null` is the documented "off" return so callers + // (NodeJSService) can store with `bootTx.set(null)` and + // their `bootTx.getAndSet(null)?.let { … }` close path + // does the right thing — no-op until a real handle is + // produced post-init. + assertNull(SentryFgsBridge.startBootTransaction()) + } + + @Test + fun startBootSpanBeforeInitReturnsNull() { + assertNull(SentryFgsBridge.startBootSpan(null, "rootkey-load")) + // Also null when we hand in a non-null fake — bridge + // must not call into the SDK. + assertNull(SentryFgsBridge.startBootSpan("fake-handle", "rootkey-load")) + } + + @Test + fun finishSpanWithNullHandleIsNoOp() { + SentryFgsBridge.finishSpan(null, "ok") + assertTrue("finishSpan(null) must not throw", true) + } + + @Test + fun finishSpanBeforeInitWithFakeHandleIsNoOp() { + // Pre-init, the bridge must not interpret an arbitrary + // handle — early return saves us from `ClassCastException` + // on the Impl side. + SentryFgsBridge.finishSpan("not-a-real-handle", "ok") + assertTrue("finishSpan pre-init must not throw", true) + } + + @Test + fun resetForTestsClearsState() { + // Smoke-test the test-only reset hook so subsequent tests + // start from a clean slate. + SentryFgsBridge.resetForTests() + assertNull(SentryFgsBridge.startBootTransaction()) + } + + @Test + fun isEnabledIsCached() { + // Two calls should be idempotent; the second mustn't + // re-probe (probing repeatedly would defeat the perf + // mitigation). We can't directly observe whether the + // probe ran, but two true returns are a sanity check. + val first = SentryFgsBridge.isEnabled() + val second = SentryFgsBridge.isEnabled() + assertEquals(first, second) + assertTrue(first) + } +} diff --git a/app.plugin.js b/app.plugin.js new file mode 100644 index 0000000..586b5a4 --- /dev/null +++ b/app.plugin.js @@ -0,0 +1,249 @@ +// Expo config plugin for @comapeo/core-react-native. +// +// Phase 2 of the Sentry integration plan (see +// docs/sentry-integration-plan.md §4.1). The plugin runs at +// `expo prebuild` and writes the consumer's Sentry configuration +// into AndroidManifest.xml meta-data and Info.plist keys, where +// both `@sentry/react-native` (host-app's main process) and the +// embedded backend (Phase 3 — argv at Node spawn) can read them. +// +// When invoked without a `sentry` argument the plugin is a no-op: +// no manifest entries written, no plist keys written. Native +// treats absence as "Sentry off" (see SentryConfig.kt / +// SentryConfig.swift). The example app under apps/example/ ships +// unconfigured. +// +// `release` is intentionally NOT defaulted here — when omitted, +// the native readers build it from versionName + "+" + versionCode +// (Android) / CFBundleShortVersionString + "+" + CFBundleVersion +// (iOS) so successive EAS builds of the same marketing version +// produce distinct release tags. +// +// This file uses ESM syntax because the package's package.json +// declares `"type": "module"`. Expo's plugin resolver +// (`@expo/config-plugins`) reads `plugin.default ?? plugin`, so +// `export default` is the right shape. + +// `@expo/config-plugins` is CommonJS; pull the named functions via the +// default-import-then-destructure dance Node ESM requires for CJS deps. +import configPlugins from "@expo/config-plugins"; +const { withAndroidManifest, withInfoPlist } = configPlugins; + +// Manifest meta-data names. Read by SentryConfig.kt via +// PackageManager.getApplicationInfo(...).metaData. Living on the +// main `` tag means both the main process AND the +// `:ComapeoCore` FGS process see them — metaData is shared +// across processes within the package. +const ANDROID_KEYS = { + dsn: "com.comapeo.core.sentry.dsn", + environment: "com.comapeo.core.sentry.environment", + release: "com.comapeo.core.sentry.release", + sampleRate: "com.comapeo.core.sentry.sampleRate", + tracesSampleRate: "com.comapeo.core.sentry.tracesSampleRate", + rpcArgsBytes: "com.comapeo.core.sentry.rpcArgsBytes", + captureApplicationDataDefault: + "com.comapeo.core.sentry.captureApplicationDataDefault", +}; + +// Info.plist keys. Read by SentryConfig.swift via +// Bundle.main.infoDictionary. Prefixed with `ComapeoCore` to +// avoid collisions with `@sentry/react-native`'s own auto-config +// keys (`SentryDsn`, `SentryEnvironment`, …) — the host app's +// Sentry SDK reads its own values via its own plugin. +const IOS_KEYS = { + dsn: "ComapeoCoreSentryDsn", + environment: "ComapeoCoreSentryEnvironment", + release: "ComapeoCoreSentryRelease", + sampleRate: "ComapeoCoreSentrySampleRate", + tracesSampleRate: "ComapeoCoreSentryTracesSampleRate", + rpcArgsBytes: "ComapeoCoreSentryRpcArgsBytes", + captureApplicationDataDefault: + "ComapeoCoreSentryCaptureApplicationDataDefault", +}; + +function withComapeoCore(config, props) { + // Two flavours of "off": + // - `props === undefined` (consumer registered the plugin + // without args, or didn't register it at all) + // - `props.sentry === undefined` (consumer registered the + // plugin but disabled Sentry — e.g. they had it on + // previously and turned it off) + // + // In both cases we still pass through the manifest / + // Info.plist mods so we can REMOVE any stale entries from a + // previous run. With `expo prebuild --no-clean` (the default + // when iterating locally) the prebuild dirs survive across + // runs, and a previously-written `` would otherwise + // continue shipping the old DSN. + if (!props || !props.sentry) { + config = withSentryAndroid(config, /* sentry= */ null); + config = withSentryIos(config, /* sentry= */ null); + return config; + } + + const sentry = normalizeSentryProps(props.sentry); + + config = withSentryAndroid(config, sentry); + config = withSentryIos(config, sentry); + return config; +} + +function normalizeSentryProps(sentry) { + if (typeof sentry !== "object" || sentry === null) { + throw new Error( + "@comapeo/core-react-native plugin: `sentry` must be an object", + ); + } + if (!sentry.dsn || typeof sentry.dsn !== "string") { + throw new Error( + "@comapeo/core-react-native plugin: `sentry.dsn` is required when sentry is configured. " + + "Source it from EAS env vars in app.config.js, e.g. process.env.SENTRY_DSN.", + ); + } + if (!sentry.environment || typeof sentry.environment !== "string") { + throw new Error( + "@comapeo/core-react-native plugin: `sentry.environment` is required when sentry is configured. " + + "Sourced per build profile via EAS env vars (see plan §4.1).", + ); + } + + // Coerce values. Numbers, booleans → strings (manifest + // meta-data and Info.plist values are string-typed in the + // native readers; keeps both surfaces uniform). + const normalized = { + dsn: sentry.dsn, + environment: sentry.environment, + }; + if (sentry.release !== undefined) normalized.release = String(sentry.release); + if (sentry.sampleRate !== undefined) { + normalized.sampleRate = String(sentry.sampleRate); + } + if (sentry.tracesSampleRate !== undefined) { + normalized.tracesSampleRate = String(sentry.tracesSampleRate); + } + if (sentry.rpcArgsBytes !== undefined) { + normalized.rpcArgsBytes = String(sentry.rpcArgsBytes); + } + if (sentry.captureApplicationDataDefault !== undefined) { + normalized.captureApplicationDataDefault = sentry.captureApplicationDataDefault + ? "true" + : "false"; + } + return normalized; +} + +function withSentryAndroid(config, sentry) { + return withAndroidManifest(config, (cfg) => { + const manifest = cfg.modResults; + const application = manifest.manifest.application?.[0]; + if (!application) { + throw new Error( + "@comapeo/core-react-native plugin: AndroidManifest.xml has no element", + ); + } + application["meta-data"] = application["meta-data"] || []; + + if (sentry == null) { + // No-Sentry pass: strip every key the plugin owns. Keys + // we don't own (e.g. `io.sentry.dsn` from + // `@sentry/react-native`'s own config plugin) are left + // alone — the consumer's other plugins manage those. + for (const name of Object.values(ANDROID_KEYS)) { + removeAndroidMetaData(application, name); + } + return cfg; + } + + upsertAndroidMetaData(application, ANDROID_KEYS.dsn, sentry.dsn); + upsertAndroidMetaData( + application, + ANDROID_KEYS.environment, + sentry.environment, + ); + syncAndroidMetaData(application, ANDROID_KEYS.release, sentry.release); + syncAndroidMetaData( + application, + ANDROID_KEYS.sampleRate, + sentry.sampleRate, + ); + syncAndroidMetaData( + application, + ANDROID_KEYS.tracesSampleRate, + sentry.tracesSampleRate, + ); + syncAndroidMetaData( + application, + ANDROID_KEYS.rpcArgsBytes, + sentry.rpcArgsBytes, + ); + syncAndroidMetaData( + application, + ANDROID_KEYS.captureApplicationDataDefault, + sentry.captureApplicationDataDefault, + ); + + return cfg; + }); +} + +function syncAndroidMetaData(application, name, value) { + if (value === undefined) { + removeAndroidMetaData(application, name); + } else { + upsertAndroidMetaData(application, name, value); + } +} + +function upsertAndroidMetaData(application, name, value) { + const list = application["meta-data"]; + const existing = list.find((m) => m.$?.["android:name"] === name); + if (existing) { + existing.$["android:value"] = value; + } else { + list.push({ $: { "android:name": name, "android:value": value } }); + } +} + +function removeAndroidMetaData(application, name) { + const list = application["meta-data"]; + const idx = list.findIndex((m) => m.$?.["android:name"] === name); + if (idx !== -1) list.splice(idx, 1); +} + +function withSentryIos(config, sentry) { + return withInfoPlist(config, (cfg) => { + const plist = cfg.modResults; + + if (sentry == null) { + // No-Sentry pass: strip every key the plugin owns. + for (const key of Object.values(IOS_KEYS)) { + delete plist[key]; + } + return cfg; + } + + plist[IOS_KEYS.dsn] = sentry.dsn; + plist[IOS_KEYS.environment] = sentry.environment; + setOrDelete(plist, IOS_KEYS.release, sentry.release); + setOrDelete(plist, IOS_KEYS.sampleRate, sentry.sampleRate); + setOrDelete(plist, IOS_KEYS.tracesSampleRate, sentry.tracesSampleRate); + setOrDelete(plist, IOS_KEYS.rpcArgsBytes, sentry.rpcArgsBytes); + setOrDelete( + plist, + IOS_KEYS.captureApplicationDataDefault, + sentry.captureApplicationDataDefault, + ); + return cfg; + }); +} + +function setOrDelete(plist, key, value) { + if (value === undefined) { + delete plist[key]; + } else { + plist[key] = value; + } +} + +export default withComapeoCore; diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index e4ed047..fb66c53 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -32,6 +32,11 @@ to it, and the alternatives we considered along the way. Companion docs: did not use Android Intent broadcasts / `Messenger` / a bound service for FGS↔main state notification, and did not collapse the boot sequence into a single broadcast. Rationales below. +- Optional **Sentry observability** is wired up via an Expo config + plugin (`app.plugin.js`) and the `@comapeo/core-react-native/sentry` + sub-export. The FGS process gets its own Sentry SDK init via a + guarded bridge (zero runtime cost when the consumer doesn't use + Sentry). See §7. --- @@ -553,7 +558,139 @@ gaps. The remaining ones are inherent to the platform or out of scope: --- -## 7. Alternatives considered +## 7. Sentry observability (optional) + +Companion doc: [`sentry-integration-plan.md`](./sentry-integration-plan.md). +This section is the architectural overview; the plan has the +phasing, decision log, and per-file change list. + +Sentry is **opt-in**. Consumers that don't register the Expo +config plugin and don't import the `@comapeo/core-react-native/sentry` +sub-export pay nothing — no DSN ends up in the APK/IPA, no +`io.sentry` classes are loaded at runtime, no Sentry-shaped +captures fire from this module. + +### 7.1 Three event streams + +When the integration is wired up, the module emits to three +Sentry scopes that the dashboard splits via the `proc` tag: + +``` + ┌────────────────────────────────────────────────┐ + proc: main │ React Native main process │ + layer: rn │ @sentry/react-native (host-managed) │ + │ src/sentry.ts adapter: │ + │ - state ERROR → captureException │ + │ - messageerror → captureException (warn) │ + │ - state transitions → breadcrumbs │ + └────────────────────────────────────────────────┘ + + ┌────────────────────────────────────────────────┐ + proc: fgs │ :ComapeoCore FGS process (Android only) │ + layer: native │ SentryFgsBridge → SentryAndroid.init │ + │ - boot transaction + phase spans │ + │ - state-transition breadcrumbs │ + │ - control-frame breadcrumbs │ + │ - FGS-lifecycle breadcrumbs │ + │ - timeout events (startup, fgsStop) │ + │ - rootkey-load captureException │ + └────────────────────────────────────────────────┘ + + ┌────────────────────────────────────────────────┐ + proc: backend │ Embedded nodejs-mobile (Phase 3 — pending) │ + layer: backend │ @sentry/node via loader.mjs │ + │ - per-RPC method spans │ + │ - handleFatal captureException │ + │ - @comapeo/core OTel spans (Phase 4) │ + └────────────────────────────────────────────────┘ +``` + +The same FGS-local error (rootkey load failure, watchdog +timeout) reaches all three scopes via the existing cross-process +attribution path (§5.5): the FGS captures it directly, then +`error-native` re-broadcasts to Node which captures it from +the backend scope, and the `error` control frame propagates +to the main-app process where the JS adapter captures it +again. Sentry de-dupes via fingerprinting; the three vantage +points carry FGS context (logcat tail, foreground state), +backend stack trace, and main-process state-machine trail +respectively. + +### 7.2 Build-time config flow + +The Expo config plugin (`app.plugin.js` at module root) is the +single source of truth for DSN / environment / release / sample +rates. At `expo prebuild` it writes: + +- **Android**: meta-data on the main `` tag + (`com.comapeo.core.sentry.dsn`, `…environment`, `…release`, + `…sampleRate`, `…tracesSampleRate`, `…rpcArgsBytes`, + `…captureApplicationDataDefault`). meta-data is shared across + processes within the package, so both the main process and + the `:ComapeoCore` FGS process read the same values. +- **iOS**: keys in `Info.plist` with the `ComapeoCore` prefix + (e.g. `ComapeoCoreSentryDsn`). + +Native readers — `SentryConfig.kt` and `SentryConfig.swift` — +return a typed `SentryConfig?` (`null` when DSN is absent = +"Sentry off"). `release` defaults to +`versionName + "+" + versionCode` (Android) / +`CFBundleShortVersionString + "+" + CFBundleVersion` (iOS) when +the plugin didn't supply one, so successive EAS builds of the +same marketing version produce distinct release tags. + +The FGS process's Sentry SDK is initialised in +`ComapeoCoreService.onCreate` from the manifest meta-data — +because Android creates a fresh `Application` instance per +process, the host's `@sentry/react-native` (which inits in the +main `MainApplication`) doesn't reach the FGS. The bridge fills +that gap. + +### 7.3 The Guard / Impl split (Android) + +`SentryFgsBridge.kt` — the public entry point that the rest of +the native code calls — has **zero `io.sentry.*` imports**. It +delegates to `SentryFgsBridgeImpl.kt` only after a `Class.forName` +probe (with `initialize=false`) confirms the SDK is on the +runtime classpath. The Gradle dep is `compileOnly` — the +runtime classes come transitively from the consumer's +`@sentry/react-native@^6` peer dep. When that peer dep is +absent, the bridge stays inert and the module continues to +function. + +This split matters because the JVM/Dalvik verifier inspects +bytecode references at class-load time of the *containing* +class. Putting any `io.sentry.*` reference in `SentryFgsBridge` +itself would force the bridge class to fail loading on a +Sentry-less classpath — even if the offending method is never +called. The Impl class is only loaded when a method on it is +actually invoked, which happens only after `isEnabled()` +returned `true`. + +### 7.4 What this design *doesn't* attempt + +- **Native crashes inside `nodejs-mobile`** (V8 abort, addon + SIGSEGV, OOM) are not captured by us. They surface as host- + process crashes that `sentry-android` / `sentry-cocoa` + catches at the JNI/Cocoa layer. We do not bundle + `sentry-native` into the embedded runtime. +- **iOS multi-process Sentry**. iOS runs everything in one + process; the host's `@sentry/react-native` SDK already + covers it. The §6.3 JS adapter is the only iOS-side + integration layer. +- **DSN secrecy**. Sentry DSNs are not high-secret values — + they identify a project rather than authenticate writes, + and they appear in stripped binaries of every Sentry-using + app's APK/IPA. Treating them as build-time public config is + intentional. +- **PII capture**. Per the plan §7.4.9, observation contents, + precise location, peer identities, raw project IDs, and + user-entered text are never captured even with the + capture-application-data toggle on. + +--- + +## 8. Alternatives considered ### 7.1 FGS↔main process state notification (Android) @@ -642,7 +779,7 @@ constraint. Both are small. --- -## 8. References +## 9. References - `backend/index.js` — boot sequence, `handleFatal`, init handler. - `backend/lib/simple-rpc.js` — control socket server, replay, error @@ -650,5 +787,7 @@ constraint. Both are small. - `ios/NodeJSService.swift`, `android/.../NodeJSService.kt` — native lifecycle state. - `src/ComapeoCoreModule.ts` — JS-facing observers (`comapeo`, `state`). +- `docs/sentry-integration-plan.md` — Sentry integration plan (§7 + is the architectural overview). - [GitHub issue #29](https://github.com/digidem/comapeo-core-react-native/issues/29) — original "expose Node.js lifecycle state to JS" tracking issue. diff --git a/docs/sentry-integration-plan.md b/docs/sentry-integration-plan.md new file mode 100644 index 0000000..3e85db8 --- /dev/null +++ b/docs/sentry-integration-plan.md @@ -0,0 +1,2166 @@ +# Sentry Integration Plan + +How we propose to wire Sentry error reporting and RPC tracing into +`@comapeo/core-react-native` without forcing every consumer of this +module to ship Sentry. The integration is **opt-in and host-app +driven** so that only the CoMapeo Mobile app pays the bundle cost, +sends events to a DSN, and sees its data in Sentry — other apps that +depend on this module continue to ship with no Sentry traffic and no +Sentry binaries. + +Companion docs: +- [`ARCHITECTURE.md`](./ARCHITECTURE.md) — process model, IPC, lifecycle. +- Reference implementation in CoMapeo Mobile: + [`comapeo-mobile/src/backend/src/app.js`](https://github.com/digidem/comapeo-mobile/blob/develop/src/backend/src/app.js). +- Upstream OpenTelemetry instrumentation in `@comapeo/core`: + [`comapeo-core PR #1051`](https://github.com/digidem/comapeo-core/pull/1051). + +--- + +## 1. Goals & non-goals + +### Goals + +1. **Capture errors** raised at every layer the module owns: + - Node backend: `uncaughtException`, `unhandledRejection`, boot + phase failures (`listen-control`, `init`, `construct`, + `runtime`), per-RPC throws. + - RN/JS layer: `state` ERROR transitions, `messageerror` protocol + parse failures, RPC client rejections. + - Native: rootkey load failures, watchdog timeouts, IPC + connection errors, hard process crashes (Android FGS, + iOS in-process). +2. **Trace RPC calls** end-to-end across the React Native ↔ Node + boundary, mirroring the `onRequestHook` pattern used in + `comapeo-mobile/src/backend/src/app.js`. Each RPC call appears + as a transaction whose parent span is the JS-side caller. +3. **Forward OpenTelemetry spans** emitted by `@comapeo/core` (once + PR #1051 lands) to Sentry without bundle-time coupling to a + specific exporter. +4. **App-specific gating**: zero Sentry traffic, zero Sentry SDK + activation, and ideally zero meaningful bundle delta for any + consumer that doesn't opt in. + +### Non-goals + +- We are not adding a generic telemetry abstraction. The module + speaks Sentry-shaped APIs (DSN, `Sentry.captureException`, + OpenTelemetry-compatible spans). Other backends are out of scope. +- We are not capturing user-PII or message contents. Spans get + method names and structural metadata, not arguments. +- We are not auto-installing Sentry SDKs on the host app's behalf. + The host app declares the dependency; the module just wires it in. + +--- + +## 2. Why "app-specific" matters here + +`@comapeo/core-react-native` is a library. It has at least two +different consumers expected over time (the CoMapeo Mobile app, and +the in-tree `apps/example` integration harness — and potentially +third-party apps building on the module). We cannot: + +- **Bundle a hard dependency on `@sentry/node` into the published + Node backend.** That bundle is staged into + `android/src/{debug,main}/assets/nodejs-project/` and + `ios/nodejs-project/` at `npm run backend:build` time + (see `backend/rollup.config.ts` and + `scripts/build-backend.ts`). Whatever ends up in the rollup is on + every consumer's device, regardless of whether they want Sentry. +- **Hard-import `@sentry/react-native` from `src/`.** Doing so + would force every consumer to install it, and any consumer that + does not call `Sentry.init()` would still get a runtime warning + from the module attempting to use an uninitialized client. +- **Ship a DSN.** The DSN is per-app secret (well, per-app config). + It belongs in the host app's environment, not in the published + module's source. + +The integration must therefore be: + +1. **Inert by default.** Module installed but not configured → no + Sentry calls, no SDK init, no trace metadata on RPC frames. +2. **Activated by the host app.** A single configuration entry + point, called from the host app's startup code, switches + instrumentation on with a DSN, environment, release, sample + rates, etc. +3. **Reachable from all three layers.** The same call from JS must + propagate to the Node backend (so it can `Sentry.init()` and + register `onRequestHook`) and to native (so iOS/Android crash + reporters can be enabled). + +--- + +## 3. Layered architecture + +There are three independent Sentry scopes to manage. They share a +DSN and a release tag, but each runs in its own process / runtime +and needs its own SDK init. + +``` +┌──────────────────────────── Host app ─────────────────────────────┐ +│ │ +│ ┌─────────────── React Native (JS) ────────────────┐ │ +│ │ @sentry/react-native │ │ +│ │ - JS errors, native crashes (iOS+Android) │ │ +│ │ - starts trace for RPC calls │ │ +│ │ │ │ +│ │ @comapeo/core-react-native: │ │ +│ │ - state.on('stateChange', ERROR) → captureException │ +│ │ - state.on('messageerror', ...) → captureException │ +│ │ - comapeo.() wrapper: startSpan + │ │ +│ │ attach sentry-trace + baggage in metadata │ │ +│ └──────────────────────────────────────────────────┘ │ +│ │ │ +│ │ argv: --sentryDsn, --sentry..., │ +│ │ --captureApplicationData │ +│ │ comapeo.sock RPC (with sentry-trace) │ +│ ▼ │ +│ ┌─────────────────── Node backend ─────────────────┐ │ +│ │ loader.mjs │ │ +│ │ - parseArgs → if DSN: Sentry.init() then │ │ +│ │ await import('./index.mjs') │ │ +│ │ - lazy chunk: @sentry/node loaded only on opt-in │ +│ │ index.mjs │ │ +│ │ - handleFatal → captureException │ │ +│ │ - createMapeoServer({ onRequestHook }) → spans │ │ +│ │ - OpenTelemetry processor sends @comapeo/core │ │ +│ │ spans (PR #1051) to Sentry transport │ │ +│ └──────────────────────────────────────────────────┘ │ +│ │ │ +│ │ shared DSN/release/env │ +│ ▼ │ +│ ┌─────────────────── Native (FGS) ─────────────────┐ │ +│ │ Android: sentry-android via @sentry/react-native│ │ +│ │ iOS: sentry-cocoa via @sentry/react-native │ │ +│ │ - hard crash reports │ │ +│ │ - we forward NodeJSService ERROR transitions │ │ +│ │ with phase tag for correlation │ │ +│ └──────────────────────────────────────────────────┘ │ +└───────────────────────────────────────────────────────────────────┘ +``` + +Key splits: + +- **JS and native** share a single `@sentry/react-native` SDK that + the host app installs and initializes. The module never imports + `@sentry/react-native` directly; it accepts a Sentry-shaped + adapter object that the host hands in (see §4.1). +- **Node backend** runs a separate `@sentry/node` SDK, initialized + inside the bundle. Configuration is read at native process start + from build-time-baked sources (Android manifest meta-data, iOS + Info.plist) seeded by an Expo config plugin (§4.2), and forwarded + to the backend in the existing `init` control-socket frame. This + avoids any JS round-trip on the boot path so the FGS can + cold-start without RN being alive. +- **Android FGS process** has no JS bridge but does reach the + same Sentry-android SDK if the host app's `MainApplication` + initializes it before starting the FGS. Cross-process attribution + is via `release`+`environment`+a `proc:fgs` tag, not a shared + client. + +--- + +## 4. Configuration + +### 4.0 The cold-start constraint + +Earlier drafts of this plan plumbed the backend Sentry config from +JS through the control-socket `init` frame. That has a real cost: + +1. **FGS cold-start (Android).** The `:ComapeoCore` foreground + service can be cold-launched by the system to deliver a sync + trigger *before* the host app's RN bridge is alive. With a + JS-driven config, the FGS would have to either start the + backend with Sentry off (losing observability for the most + interesting code path — boot-time errors during a cold sync) + or block on RN to come up first (defeats the purpose of an + FGS-survives-RN architecture). +2. **Boot latency on every launch.** Even when RN is alive, the + JS round-trip for `setSentryConfig(...)` adds a serial step + to the boot sequence. The backend can't sample `boot.listen` + or `boot.construct` spans until after RN is ready and has + called `configureSentry`. +3. **State observability gap.** `state.getState()` reflects only + transitions captured *after* the JS listener is attached. + Errors that fire before `configureSentry` runs (rootkey load + races, FGS-side watchdog timeouts) miss Sentry entirely under + the JS-driven model. + +Three configuration vectors solve this together: + +| Vector | When read | Purpose | +|---|---|---| +| **Expo config plugin** (build-time) | At native process start, before any IPC | DSN, environment, release, sample rates. The single source of truth. | +| **Persisted native preference** (runtime, restart-to-activate) | At native process start | The "capture application data" toggle (§9). | +| **JS adapter handoff** (`configureSentry`) | When RN bridge is up | Hands the host app's already-initialized `@sentry/react-native` to this module so JS-side listeners can call `captureException` / `startSpan`. Does **not** carry DSN. | + +### 4.1 Build-time: Expo config plugin (primary) + +A new plugin shipped from this module — `app.plugin.js` at the +package root, registered in `expo-module.config.json`. It uses +the same `@expo/config-plugins` patterns already in use for +`apps/example/plugins/with-android-tests/index.js`. + +Plugin inputs: + +| Field | Required | Source | +|---|---|---| +| `dsn` | yes | App-specific Sentry project DSN. | +| `environment` | yes | Build-environment label (e.g. `development`, `qa`, `production`). The consumer decides the scheme. | +| `release` | no, defaults to versionName | If omitted, native reads `versionName` (Android) / `CFBundleShortVersionString` (iOS) at runtime. | +| `tracesSampleRate` | no | Sentry sampling knob. | +| `sampleRate` | no | Sentry sampling knob. | +| `rpcArgsBytes` | no | RPC arg-truncation cap (developer debug builds only). | + +The module deliberately **does not derive `environment`** — +build-environment schemes are app-specific. CoMapeo Mobile's +frontend uses an `applicationId` suffix mapping +(`.dev`/`.rc`/`.pre`) in `src/frontend/lib/appVariant.ts`, but +that's a CoMapeo convention, not something other consumers of +this module should be locked into. + +Instead, the consumer feeds `environment` from a build-time +source they control. The cleanest path on EAS is **`eas.json` +build-profile env vars + `app.config.js`** so the same +codebase produces different `environment` values for +internal/test/release builds. CoMapeo Mobile's +`eas.json` would carry: + +```json +{ + "build": { + "development": { + "env": { + "SENTRY_DSN": "https://…", + "SENTRY_ENVIRONMENT": "development" + } + }, + "preview": { + "env": { + "SENTRY_DSN": "https://…", + "SENTRY_ENVIRONMENT": "qa" + } + }, + "production": { + "env": { + "SENTRY_DSN": "https://…", + "SENTRY_ENVIRONMENT": "production" + } + } + } +} +``` + +…and `app.config.js` (must be `.js`, not `app.json`, to read +`process.env`): + +```js +// app.config.js +export default { + expo: { + plugins: [ + ["@comapeo/core-react-native", { + sentry: { + dsn: process.env.SENTRY_DSN, + environment: process.env.SENTRY_ENVIRONMENT ?? "production", + }, + }], + ], + }, +}; +``` + +EAS evaluates `app.config.js` with the build profile's `env` +visible as `process.env.*`, so each `eas build --profile X` +bakes a different `environment` string into the manifest / +plist at prebuild time. No native code change between profiles. + +`release` is the one value we *do* default from existing native +config. Omitting it makes the native loader build the release +tag as **`versionName + "+" + versionCode`** (Android) / +**`CFBundleShortVersionString + "+" + CFBundleVersion`** (iOS). +On EAS, `versionCode` / `CFBundleVersion` is the auto-incremented +build number, so successive EAS builds of the same app version +produce distinct release tags — required to disambiguate +internal/test builds that share a marketing version. Consumers +can still pass `release` explicitly (e.g. to embed a git SHA +from `EAS_BUILD_GIT_COMMIT_HASH`) and the explicit value wins. + +The plugin runs at `expo prebuild` and writes: + +**Android — `` meta-data in `AndroidManifest.xml`** via +`withAndroidManifest`: + +```xml + + + + + +``` + +These meta-data live on the manifest's main `` tag so +**both the main process and the `:ComapeoCore` FGS process see +them** — `PackageManager.getApplicationInfo(...).metaData` is +shared across processes within the package. + +**iOS — keys in `Info.plist`** via `withInfoPlist`: + +```xml +ComapeoCoreSentryDsn +https://abc@sentry.example.com/1 +ComapeoCoreSentryEnvironment +production +ComapeoCoreSentryTracesSampleRate +0.1 +ComapeoCoreSentryRpcArgsBytes +0 + +``` + +Plugin behaviour rules: + +- If the consumer registers the plugin without a `sentry` key, no + meta-data / Info.plist entries are written. Native treats the + absence as "Sentry off". The example app under `apps/example/` + ships unconfigured. +- If the consumer registers the plugin **with** a `sentry` key, the + plugin validates that `dsn` and `environment` are present + (throwing at prebuild time if they're not — fast failure beats + a silently-misconfigured Sentry project) and writes the + corresponding meta-data / plist keys. Optional fields + (`release`, `tracesSampleRate`, `sampleRate`, `rpcArgsBytes`) + are written only when provided. +- Plugin code is small (~50 LOC) and lives alongside the existing + `app.plugin.js` patterns. The plugin is consumed at `expo + prebuild` time only — runtime code path doesn't touch it. +- The DSN is now embedded in the host app's APK/IPA. That's an + accepted tradeoff: Sentry DSNs are not high-secret values + (they identify a project, not authenticate writes; rate + limiting and per-project ingest are server-side). They appear + in stripped binaries of every Sentry-using app. + +### 4.2 Native config consumption + +At native process start (FGS `onCreate` on Android, app delegate +init on iOS), the module loads the manifest / plist keys into a +typed `SentryConfig?` and propagates it two ways: + +```kotlin +// android/.../SentryConfigStore.kt (new) — sketch +data class SentryConfig( + val dsn: String, + val environment: String, + val release: String, + val sampleRate: Double?, + val tracesSampleRate: Double?, + val rpcArgsBytes: Int?, +) + +fun loadFromManifest(ctx: Context): SentryConfig? { + val meta = ctx.packageManager.getApplicationInfo( + ctx.packageName, PackageManager.GET_META_DATA + ).metaData ?: return null + val dsn = meta.getString("com.comapeo.core.sentry.dsn") ?: return null + + // Environment: written by the plugin from the consumer's + // app.config.js, which sources it from EAS build-profile env + // (see §4.1). Required when a DSN is present — the plugin + // refused to prebuild without it, so this should never be + // null at runtime; treat null as a build misconfiguration. + val environment = meta.getString("com.comapeo.core.sentry.environment") + ?: error("comapeo: sentry.environment missing from manifest") + + // Release: prefer plugin override, else build from + // versionName + "+" + versionCode so successive EAS builds + // of the same marketing version produce distinct releases. + val release = meta.getString("com.comapeo.core.sentry.release") + ?: run { + val pkg = ctx.packageManager.getPackageInfo(ctx.packageName, 0) + val v = pkg.versionName ?: "unknown" + val build = if (Build.VERSION.SDK_INT >= 28) pkg.longVersionCode + else @Suppress("DEPRECATION") pkg.versionCode.toLong() + "$v+$build" + } + + return SentryConfig( + dsn = dsn, + environment = environment, + release = release, + sampleRate = meta.getString("com.comapeo.core.sentry.sampleRate")?.toDoubleOrNull(), + tracesSampleRate = meta.getString("com.comapeo.core.sentry.tracesSampleRate")?.toDoubleOrNull(), + rpcArgsBytes = meta.getString("com.comapeo.core.sentry.rpcArgsBytes")?.toIntOrNull(), + ) +} +``` + +iOS reads the same key set from `Bundle.main.infoDictionary` +(`ComapeoCoreSentryDsn`, `ComapeoCoreSentryEnvironment`, etc.) and +falls back to `CFBundleShortVersionString` for `release` if the +plist key was absent. + +There is intentionally **no native-side derivation logic** for +`environment`. Build-environment schemes are app-specific — +this module reads whatever literal string the consumer's plugin +wrote, with no coupling to any particular `applicationId` suffix +convention. + +The loaded `SentryConfig` is consumed in two places: + +1. **Native SDK init (Android FGS process).** `SentryAndroid.init(ctx) + { options -> options.dsn = config.dsn; ... }` in the FGS + `Application.onCreate`. Allows the FGS process to capture native + crashes, ANRs, and the §7.4 telemetry events with the same DSN. + On iOS the host app's `@sentry/react-native` already owns the + single-process SDK; we don't re-init. +2. **Backend, via Node argv at spawn time.** Native serializes + `SentryConfig` (plus the §9 `captureApplicationData` toggle) + into argv and passes it to `nodejs-mobile`'s start call. The + backend's `loader.mjs` entry parses argv, runs `Sentry.init()`, + then dynamically imports `index.mjs`. See §5.1 for the bundle + layout and §5.2 for the loader pattern. + +This is the key change vs. the prior draft: **Sentry config +flows through argv, not through the control-socket `init` +frame**. The init frame stays focused on the rootkey (which we +deliberately keep out of argv per `ARCHITECTURE.md §7.4`). The +DSN is fine in argv: it's already in the manifest of every +Sentry-using app's APK/IPA, identifies a project rather than +authenticating writes, and is rate-limited server-side. + +The benefits stack: + +- **FGS cold-start**: Sentry config is in native config + argv; + Node boots with full instrumentation before RN is alive. +- **Auto-instrumentation order**: `Sentry.init()` runs in + `loader.mjs` *before* the dynamic import of `index.mjs`, so + OpenTelemetry's `import-in-the-middle` patches modules as + they load. This is the explicit pattern from comapeo-mobile's + `src/backend/loader.js`. +- **Lazy bundle chunk**: when the manifest has no DSN, native + doesn't pass `--sentryDsn=...` in argv; the loader's + `if (sentryDsn) await import('@sentry/node')` short-circuits + and the rollup-split `@sentry/node` chunk never loads. + +### 4.3 JS adapter handoff + +JS-side listeners (§6) need a callable Sentry object — `startSpan`, +`captureException`, `getTraceData`. Because the host app already +runs `Sentry.init()` from `@sentry/react-native` (which reads the +same Info.plist / manifest values via its own auto-config), +`configureSentry` exists purely to hand that initialized client +to this module: + +```ts +// File: src/sentry.ts (new) +import type * as SentryReactNative from "@sentry/react-native"; + +export type SentryAdapter = Pick< + typeof SentryReactNative, + | "captureException" + | "captureMessage" + | "startSpan" + | "continueTrace" + | "getActiveSpan" + | "getTraceData" + | "addBreadcrumb" +>; + +export interface ComapeoSentryConfig { + /** + * The host app's already-initialized `@sentry/react-native` + * (or any object satisfying `SentryAdapter`). The module + * never calls `Sentry.init()`; the host app does, and the + * native SDK is initialized from manifest/plist values + * written by the config plugin. + */ + sentry: SentryAdapter; +} + +/** + * Hand off the host app's Sentry adapter so this module's JS + * listeners can call into it. Idempotent and one-shot. + * + * Must be called before the first `comapeo.*` RPC if you want + * client-side spans on those calls. State observers attach + * immediately on call. + * + * Note: this does NOT configure DSN/environment/release. Those + * are baked into native config at build time by the Expo plugin + * and read by both `@sentry/react-native` (in the main process) + * and the embedded backend (in the FGS or iOS app process). + */ +export function configureSentry(config: ComapeoSentryConfig): void; +``` + +Consumer usage in CoMapeo Mobile: + +```ts +import * as Sentry from "@sentry/react-native"; +import { configureSentry } from "@comapeo/core-react-native/sentry"; + +// Sentry SDK reads DSN from Info.plist / manifest; the plugin +// wrote those values at build time. +Sentry.init({ /* override options if needed */ }); + +configureSentry({ sentry: Sentry }); +``` + +Apps that don't want Sentry don't import the sub-export and don't +register the plugin. The main barrel +(`@comapeo/core-react-native`) keeps no Sentry imports; the only +typecheck-time pull-in for opt-in consumers is the +`SentryAdapter` type. + +### 4.4 Runtime opt-in toggle (forward reference) + +A persisted "capture application data" boolean lives in native +preferences. It gates the *additional* observability surface +described in §7.4 (per-RPC method spans, sync session spans, +counts) but never touches DSN/environment/release and never +unlocks PII fields. See §9 for full design. + +### 4.5 Backend transport: argv at Node spawn + +Native already passes positional argv to the Node process when +it spawns nodejs-mobile (`comapeoSocketPath`, `controlSocketPath`, +`privateStorageDir`; see `backend/index.js:19-20`). We extend +that with named flags for Sentry config, mirroring +comapeo-mobile's pattern (`src/frontend/initializeNodejs.ts`): + +``` +node loader.mjs \ + \ + --sentryDsn=https://abc@sentry.example.com/1 \ + --sentryEnvironment=production \ + --sentryRelease=1.4.2 \ + --sentryTracesSampleRate=0.1 \ + --sentryRpcArgsBytes=0 \ + --captureApplicationData # only when toggle is on +``` + +Native picks the loader path (`loader.mjs`) as the entry script +and constructs the argv from `SentryConfig` plus the §9 +toggle's persisted value. When the manifest has no DSN, the +`--sentry*` flags are omitted entirely; the loader's first +check is `if (!sentryDsn) await import('./index.mjs')` — no +`@sentry/node` chunk is ever loaded. + +The control-socket `init` frame stays focused on the rootkey +(unchanged from today, except optional sibling fields are now +gone): + +```js +// Native → Node, on control.sock +{ type: "init", rootKey: "" } +``` + +Why argv is the right transport for Sentry config (and not the +rootkey): + +| | Sentry config | Rootkey | +|---|---|---| +| Already in app binary? | Yes (manifest / plist) | No (encrypted in keystore) | +| Server-side rate limited? | Yes | n/a — single bytes are the secret | +| Visible in `/proc//cmdline`? | Yes | Would be — that's the problem | +| Needed before any other module loads? | **Yes** (auto-instrumentation order) | No (received post-handshake) | +| Right transport | argv | init frame | + +Argv satisfies the auto-instrumentation order requirement: +`Sentry.init()` must run before any module that Sentry wants +to patch is imported. The control-socket init frame arrives +*after* `index.js` has already imported `@comapeo/core`, +`fastify`, and friends; argv is the only transport that +arrives before the loader's first import. + +--- + +## 5. Backend instrumentation (`backend/`) + +Mirrors `comapeo-mobile/src/backend/src/app.js`, adapted to this +module's two-socket boot. + +### 5.1 Bundle strategy: multi-entry with lazy `@sentry/node` chunk + +**Pinned versions**: `@sentry/node@^8`, `@sentry/react-native@^6`, +`@sentry/core@^8`, `import-in-the-middle` (whatever +`@sentry/node@8` resolves it at). These are the +OpenTelemetry-first majors — required for the §5.6 forwarding +of `@comapeo/core` PR #1051 spans to "just work" without glue +code. + +The backend currently rolls into a single `index.mjs` per +platform (see `backend/rollup.config.ts`). To support +auto-instrumentation **and** keep the bundle weight off +non-Sentry consumers, we move to the multi-entry layout used by +`comapeo-mobile/src/backend/rollup.config.js`: + +``` +nodejs-project/ +├── loader.mjs # spawn target — parses argv, optionally +│ # inits Sentry, then dynamically imports +│ # index.mjs. +├── index.mjs # current entry — unchanged in shape; now +│ # imported dynamically by the loader. +├── importHook.js # OpenTelemetry's import-in-the-middle +│ # hook entry. MUST be a separate file +│ # because it's loaded with module.register(), +│ # not import. Empty/unused when Sentry isn't +│ # active. +├── lib/register.js # Internal dep of import-in-the-middle that +│ # it expects at this exact relative path. +│ # Bundled as its own chunk and copied +│ # across; can't be inlined. +└── chunks/sentry-*.mjs # Auto-emitted rollup chunk holding + # @sentry/node + transitive deps. Loaded + # only when loader.mjs awaits the dynamic + # import. +``` + +**Why each piece is separate**: + +- **`loader.mjs`** is the new spawn target. Native passes + `loader.mjs` to nodejs-mobile instead of `index.mjs`. + The loader parses `--sentryDsn`/etc. from `process.argv`, + dynamically imports `@sentry/node` if a DSN is present, + calls `Sentry.init()`, then `await import('./index.mjs')`. + This is the comapeo-mobile pattern verbatim — without the + init-before-other-imports order, Sentry's OpenTelemetry + auto-instrumentation can't patch modules. +- **`importHook.js`** is `import-in-the-middle/hook.mjs`, + which OpenTelemetry registers as a Node module-loading + hook via `module.register('import-in-the-middle/hook.mjs', ...)`. + `module.register` requires a **separate file** that is + loaded fresh in a child loader thread; it can't be + bundled into the same module that calls + `module.register`. The comapeo-mobile rollup config + carries this as a dedicated entry; we do the same. +- **`lib/register.js`** is a sub-dep of `import-in-the-middle` + that resolves via a hard-coded relative path + (`./lib/register.js`). Cannot be bundled. Comapeo-mobile + carries this too — we mirror. +- **`chunks/sentry-*.mjs`** is what rollup auto-emits when it + sees `await import('@sentry/node')` in the loader and the + rest of the bundle never touches it statically. Output + format is `format: 'esm'`; rollup with + `output.manualChunks` (or default code-splitting) produces + the chunk. Consumers who don't pass `--sentryDsn` never + load this chunk; the cost is install-time disk only. + +**Path-rewrite plugin**. The +`rollup-plugin-import-hook.mjs` from comapeo-mobile rewrites +calls like +`module.register('import-in-the-middle/hook.mjs', …)` to +`module.register('./importHook.js', …)` so the runtime +register call points at the bundled output rather than the +node_modules path that no longer exists post-bundle. We port +this plugin into `backend/rollup-plugins/`. + +**Updated `backend/rollup.config.ts` shape** (sketch): + +```ts +import importHook from "./rollup-plugins/rollup-plugin-import-hook.mjs"; +import { createRequire } from "node:module"; +const require = createRequire(import.meta.url); + +const sharedInput = { + loader: path.join(__dirname, "loader.mjs"), + index: path.join(__dirname, "index.js"), + // Required separate chunks for import-in-the-middle: + importHook: require.resolve("import-in-the-middle/hook.mjs"), + "lib/register": require.resolve("import-in-the-middle/lib/register.js"), +}; + +// In buildPlugins: append importHook() alongside the existing +// addonLoaderPlugin() / esmShim() / nodeResolve() pipeline. +``` + +**Bundle-size cost**: + +- Consumers **with** Sentry: ~150–250 KB extra in the + per-platform output dir (the sentry chunk plus `importHook` / + `lib/register`). Loaded into V8 only when DSN is present. +- Consumers **without** Sentry: same disk cost (the chunks + ship in `nodejs-project/`), but **zero runtime cost**: the + `@sentry/node` chunk is never required by any path the + loader executes when `--sentryDsn` is absent. The loader + itself is tiny (~1 KB) and runs unconditionally. + +If install size becomes the bottleneck (it currently isn't — +the existing `nodejs-project/` tree is dominated by V8 + native +addons), Phase 6 adds a second backend bundle with the Sentry +chunks stripped at build time, and `scripts/build-backend.ts` +selects which to copy based on whether the consumer's +`app.json` registered the plugin with a DSN. Not in v1. + +**Sourcemaps — generate here, upload from the consumer.** +Rollup already emits `.map` files alongside each output (the +existing config has `sourcemap: true`). We: + +1. **Ship the sourcemaps in the npm package** by including + them in `package.json`'s `files` field (or leaving the + default — they're already inside `android/src/main/assets/` + and `ios/nodejs-project/` after `backend:build`). +2. **Strip them from the shipped APK/IPA** at build time. The + consumer's gradle / Xcode build excludes + `nodejs-project/**/*.map` from the packaged assets via a + small build step (documented in README). Sourcemaps on + device are dead weight — only Sentry needs them — and + shipping them inflates the install size and exposes + readable backend source to anyone who unpacks the APK. +3. **Document the upload step** for consumers. Each consumer's + release CI calls `sentry-cli sourcemaps upload` (or the + equivalent EAS hook) against the `.map` files in + `node_modules/@comapeo/core-react-native/android/src/main/assets/nodejs-project/`, + tagged with the same release string the plugin baked into + the manifest (`versionName + "+" + versionCode`). README + includes a copy-paste snippet. + +We do **not** add `@sentry/rollup-plugin` here. Comapeo-mobile +uses it because that codebase owns the build *and* the upload; +this module owns only the build, and pushing uploads from a +library's CI to a consumer-owned Sentry project would require +the consumer to surface their `SENTRY_AUTH_TOKEN` to this +module's CI — wrong direction. Consumers run upload in their +own CI with their own credentials. + +### 5.2 `loader.mjs` — `Sentry.init()` and dynamic import + +```js +// backend/loader.mjs (new) — sketch, mirroring +// comapeo-mobile/src/backend/loader.js + +import { parseArgs } from "node:util"; + +const { values } = parseArgs({ + options: { + sentryDsn: { type: "string" }, + sentryEnvironment: { type: "string" }, + sentryRelease: { type: "string" }, + sentryTracesSampleRate: { type: "string" }, + sentrySampleRate: { type: "string" }, + sentryRpcArgsBytes: { type: "string" }, + captureApplicationData: { type: "boolean", default: false }, + }, + // The three positional args (comapeoSocketPath, controlSocketPath, + // privateStorageDir) flow through unchanged for index.mjs to read. + allowPositionals: true, + strict: false, +}); + +if (values.sentryDsn) { + // Dynamic import so the rollup chunk only loads when needed. + // Sentry.init() must complete before index.mjs imports anything, + // because OpenTelemetry's import-in-the-middle hook can only + // patch modules loaded after init. + const Sentry = await import("@sentry/node"); + Sentry.init({ + dsn: values.sentryDsn, + environment: values.sentryEnvironment ?? "production", + release: values.sentryRelease, + sampleRate: Number(values.sentrySampleRate ?? 1.0), + tracesSampleRate: values.captureApplicationData + ? Number(values.sentryTracesSampleRate ?? 0.1) + : 0, + integrations: [ + Sentry.consoleLoggingIntegration(), + ], + initialScope: { tags: { layer: "backend" } }, + }); + + // Stash the parsed config so index.mjs can read it back + // (RPC arg-truncation cap, capture-application-data toggle, + // etc.) without re-parsing argv. + globalThis.__comapeoSentryConfig = { + rpcArgsBytes: Number(values.sentryRpcArgsBytes ?? 0), + captureApplicationData: values.captureApplicationData, + }; +} + +// Always run the app, with or without Sentry. +await import("./index.mjs"); +``` + +**Order matters**: `Sentry.init()` runs before +`await import('./index.mjs')`. Inside `index.mjs`, the existing +top-level imports (`Fastify`, `@comapeo/core`, etc.) execute in +the patched module loader — Sentry's OpenTelemetry hook +intercepts each one and applies its instrumentation. If we +flipped the order, none of those modules would be patched. + +**No-op path**. If `--sentryDsn` is absent, the `if (values.sentryDsn)` +block is skipped entirely, the `await import('@sentry/node')` is +never reached, the rollup-emitted Sentry chunk is never loaded, +and `index.mjs` runs identically to today. Zero overhead for +unconfigured consumers. + +**Sentry's instrumentation patching nuances** (from +comapeo-mobile experience): + +- The `consoleLoggingIntegration` requires `debug` to call + `console.log` directly. The comapeo-mobile loader rebinds + `debug.log = (...args) => console.log(...)` for non-production + environments to make `debug('mapeo:*')` output flow through + Sentry breadcrumbs. We replicate when `debug` is in our + bundle (it's a transitive dep of `@comapeo/core`). +- The default Sentry transport hits the network synchronously + on flush. For mobile we likely want + `sentry-offline-transport-better-sqlite` (which comapeo-mobile + uses) to queue events when offline. Folded into Phase 3 + if we ship offline transport, otherwise events are lost + while offline (acceptable for v1). +- `Sentry.consoleLoggingIntegration({ levels: ["error"] })` + in production keeps log volume sane; widening to + `["error", "warn", "log"]` in dev mirrors comapeo-mobile. + +### 5.3 `index.mjs` — read parsed config, no Sentry init + +`backend/index.js` (renamed `index.mjs` for the multi-entry +layout) gets a small read of `globalThis.__comapeoSentryConfig` +for the RPC hook + toggle flags it needs. It does **not** call +`Sentry.init()` (that already happened in the loader) and the +control-socket `init` frame no longer carries any `sentry` +field: + +```js +// backend/index.js (sketch — minimal additions) +import * as Sentry from "@sentry/node"; // resolved if loader ran init; otherwise unused + +const sentryConfig = globalThis.__comapeoSentryConfig; +const sentryActive = sentryConfig != null; + +// ... existing code unchanged ... + +// In ComapeoRpcServer construction (§5.5): +comapeoRpcServer = new ComapeoRpcServer(comapeo, { + sentry: sentryActive ? Sentry : null, + rpcArgsBytes: sentryConfig?.rpcArgsBytes ?? 0, + captureApplicationData: sentryConfig?.captureApplicationData ?? false, +}); +``` + +When the loader didn't init Sentry, `import * as Sentry from "@sentry/node"` +in `index.mjs` would normally still load the chunk. To keep +the lazy behaviour intact, we use one of two patterns: + +1. **Conditional import inside `index.mjs`**: + ```js + const Sentry = sentryActive ? await import("@sentry/node") : null; + ``` + The chunk is only loaded if the loader already loaded it + (cached in Node's module registry). Net: still zero work + for the no-Sentry path. +2. **Adapter object on globalThis**: the loader stashes + `globalThis.__comapeoSentry = Sentry` after init, and + `index.mjs` reads from globalThis instead of importing. + No further chunk-load risk. + +Pick (2) for simplicity — `index.mjs` never names +`@sentry/node` at all, and the rollup chunk is unambiguously +gated by the loader's argv check. + +### 5.4 Error capture wiring + +Three failure surfaces in `backend/index.js` to retrofit: + +1. **`handleFatal(phase, error)`** — already the single funnel for + uncaught exceptions, unhandled rejections, and boot-phase + throws (`listen-control`/`init`/`construct`/`runtime`). Add: + + ```js + if (sentryActive) { + Sentry.captureException(err, { + tags: { phase, layer: "backend" }, + }); + // Ensure the event is flushed before process.exit(1). + await Sentry.flush(100).catch(() => {}); + } + ``` + + The 100 ms flush window aligns with the existing + `broadcastError` flush — both run inside the same + pre-exit window, in parallel. + +2. **`error-native` handler** — frames forwarded from Android + FGS-local failures (rootkey, watchdog) reach `handleFatal` + with the FGS-supplied phase, so they get captured by #1 + automatically. We add a `tags: { source: "native" }` so + Sentry can filter cross-process forwarding. + +3. **Per-RPC errors** — handled in §5.5. + +### 5.5 RPC tracing — server side + +Replicates the `onRequestHook` from +`comapeo-mobile/src/backend/src/app.js`, called from +`backend/lib/comapeo-rpc.js`: + +```js +// backend/lib/comapeo-rpc.js (sketch) +import * as Sentry from "@sentry/node"; + +export class ComapeoRpcServer extends ServerHelper { + constructor(manager, { sentry } = {}) { + super((socket) => { + const messagePort = new SocketMessagePort(socket); + messagePort.start(); + const server = createMapeoServer(manager, messagePort, { + onRequestHook: sentry ? makeSentryRequestHook() : undefined, + }); + messagePort.on("close", () => server.close()); + }); + } +} + +function makeSentryRequestHook() { + return (request, next) => { + const sentryTrace = request.metadata?.["sentry-trace"]; + const baggage = request.metadata?.baggage; + return Sentry.continueTrace({ sentryTrace, baggage }, () => + Sentry.startSpan( + { + op: "rpc", + name: request.method.join("."), + forceTransaction: true, + attributes: { + "rpc.method": request.method.join("."), + // args intentionally omitted unless rpcArgsBytes>0 + }, + }, + async (span) => { + try { + await next(request); + span.setStatus({ code: 1, message: "ok" }); + } catch (error) { + span.setStatus({ code: 2, message: "internal_error" }); + Sentry.captureException(error, { + tags: { layer: "backend", op: "rpc" }, + }); + throw error; + } + }, + ), + ); + }; +} +``` + +Differences from the comapeo-mobile reference: + +- The hook is only registered when Sentry is active; absent + config, `createMapeoServer` is called without + `onRequestHook` and there is zero overhead. +- We rethrow after `captureException` so the IPC error path + still returns a rejection to the JS caller. The reference + swallows it inside `startSpan`'s callback, which silently + resolves the RPC promise — that loses error visibility + on the JS side. +- `request.args` is not serialized by default. In CoMapeo data + the args can be project-scoped content (observation fields, + attachments). PII risk is high, so opt-in only via + `rpcArgsBytes`. + +### 5.6 OpenTelemetry forwarding (PR #1051) + +When `comapeo-core` PR #1051 merges, `@comapeo/core` will emit +OpenTelemetry spans through the global `@opentelemetry/api` +provider. `@sentry/node` v8+ is built on OpenTelemetry: spans +emitted via `@opentelemetry/api` are picked up automatically by +the Sentry span processor. + +Concretely, after `Sentry.init()`, no further wiring is needed — +`@comapeo/core`'s spans become children of the active Sentry +transaction (the RPC span from §5.5) and ship to the configured +DSN. + +If PR #1051 lands before this integration, we should verify the +parent span linkage in a manual smoke test (see §10). + +--- + +## 6. JS / RN module instrumentation (`src/`) + +### 6.1 New files + +- `src/sentry.ts` — public sub-export. Exposes + `configureSentry()`, types, and the wrapped client. +- `src/sentry-internal.ts` — module-private state holding the + active adapter (or `null`), keyed reads for the RPC wrapper. + +The main barrel (`src/index.ts`) is unchanged so consumers who +don't import the sub-export get no Sentry types or runtime code +linked in. + +### 6.2 RPC client tracing — request side + +`createMapeoClient` already accepts an `onRequestHook` of the +same shape as the server's. The CoMapeo Mobile frontend uses +this verbatim in +[`src/frontend/lib/createMapeoApi.ts`](https://github.com/digidem/comapeo-mobile/blob/develop/src/frontend/lib/createMapeoApi.ts); +we port it directly. + +The hook starts a span for the RPC call, reads +`sentry-trace`/`baggage` from the active span via +`@sentry/core`'s `getTraceData`, and stuffs them into +`request.metadata` so the server-side `onRequestHook` (§5.5) +can `Sentry.continueTrace` them as the parent context. The +`Sentry.getActiveSpan()` short-circuit is what makes the hook +inert when tracing isn't enabled — no try/catch, no allocation, +no overhead beyond one function call and one falsy check: + +```ts +// src/ComapeoCoreModule.ts (changed) +import { getTraceData } from "@sentry/core"; +import { activeAdapter } from "./sentry-internal"; + +export const comapeo: MapeoClientApi = createMapeoClient(messagePort, { + timeout: Infinity, + onRequestHook: (request, next) => { + const Sentry = activeAdapter(); + const parentSpan = Sentry?.getActiveSpan(); + if (!Sentry || !parentSpan) { + // Tracing disabled or no active root span — pass through + // untouched, no metadata injection. This is the no-op path + // when configureSentry was never called. + next(request).catch(noop); + return; + } + Sentry.startSpan( + { name: request.method.join("."), op: "ipc" }, + async (span) => { + const traceHeaders = getTraceData({ span }); + if (traceHeaders) { + request.metadata = { + "sentry-trace": traceHeaders["sentry-trace"], + baggage: traceHeaders["baggage"], + }; + } + try { + await next(request); + span.setStatus({ code: 1, message: "ok" }); + } catch (error) { + span.setStatus({ code: 2, message: "internal_error" }); + Sentry.captureException(error); + } + }, + ); + }, +}); +``` + +Three things to call out: + +- **The hook is registered unconditionally at module load.** + We can't lazily install it later because `comapeo` is a + module-scoped const; consumers may have imported and called + it before `configureSentry` runs. Registering up-front and + short-circuiting on `!parentSpan` is exactly the pattern + comapeo-mobile uses and costs essentially nothing per call + when Sentry is off. +- **`activeAdapter()`** is a tiny indirection that returns the + adapter passed to `configureSentry`, or `null`. It's read + per call; updates take effect the next time the hook runs. +- **`getTraceData` comes from `@sentry/core`**, not + `@sentry/react-native`, because the helper lives in core + and re-exporting it through the adapter type would force + consumers to surface it. We import it directly. Adds a + `@sentry/core` peer dependency entry (already a transitive + dep of `@sentry/react-native`). + +### 6.3 State observer capture + +`state` already surfaces every error condition the JS layer +sees. `configureSentry()` registers two listeners: + +```ts +state.addListener("stateChange", (s, info) => { + if (s !== "ERROR" || !info) return; + const e = new Error(info.errorMessage); + e.name = `ComapeoError:${info.errorPhase}`; + adapter.captureException(e, { + tags: { + layer: "rn", + "comapeo.phase": info.errorPhase, + "comapeo.state": s, + }, + }); +}); + +state.addListener("messageerror", (err) => { + adapter.captureException(err, { + tags: { layer: "rn", source: "control-socket" }, + level: "warning", + }); +}); +``` + +Phase tags align with the values produced in +`src/ComapeoCore.types.ts` and the native sources +(`rootkey`, `node-runtime-unexpected`, `shutdown-timeout`, +`starting-timeout`, `ipc`, `listen-control`, `init`, +`construct`, `runtime`). They become Sentry filterable tags so +the team can dashboard "rootkey load failure rate" or "FGS +watchdog timeout rate" without parsing message strings. + +### 6.4 Public client error capture + +The IPC client surfaces RPC errors as rejected promises. Most +captures happen on the backend side (§5.5) and reach Sentry from +there with full context. The JS side adds a thin +`captureException` for client-perceived errors (e.g. RPC timeouts, +disconnect mid-call) that the backend never observed: + +```ts +// inside the wrapper or proxy from §6.2 +async (...args) => { + return Sentry.startSpan({ op: "rpc.client", name: method }, async () => { + try { + return await underlying[method](...args); + } catch (e) { + // Only capture if it didn't originate from a backend + // event we already see in §5.5 — the backend tags its + // captures with `layer: "backend"`. Backend RPC failures + // arrive here as plain errors, but Sentry de-dupes if + // the same exception is captured twice with different + // contexts. Acceptable. + Sentry.captureException(e, { + tags: { layer: "rn", op: "rpc.client", "rpc.method": method }, + }); + throw e; + } + }); +} +``` + +--- + +## 7. Native instrumentation (`ios/`, `android/`) + +The host app's `@sentry/react-native` already configures the +underlying `sentry-cocoa` and `sentry-android` SDKs for the main +process. What's left for this module: + +### 7.1 Loading config and forwarding to the backend + +Native reads `SentryConfig` from the manifest / Info.plist +(§4.2) at process start. There is no JS bridge call required; +config is in place before RN can boot. + +- **iOS**: `AppLifecycleDelegate.application(_:didFinishLaunchingWithOptions:)` + reads `Bundle.main.infoDictionary` and stores `sentryConfig` on + `NodeJSService` before `runNode()`. +- **Android (FGS)**: `ComapeoCoreService.onCreate` reads + `packageManager.getApplicationInfo(...).metaData` and stores + `sentryConfig` on `NodeJSService` before `start()`. +- **Android (main process)**: reads the same metaData when the + `ComapeoCoreModule` first instantiates, used only for the + control-IPC observer to add §7.4 breadcrumbs/events from the + main process. The main-process Sentry SDK is already + initialized by `@sentry/react-native` reading the same values + via its own pathway — we don't re-init. + +The stored config is embedded in the `init` frame +(§4.5) when `NodeJSService.sendInit(rootKey)` runs. The +runtime opt-in toggle (§9) is read from native preferences at the +same moment and merged into the same payload. + +### 7.2 Android FGS process + +The FGS runs in the `:ComapeoCore` process — see +`ARCHITECTURE.md §2.2`. `Sentry.init()` in the host app's +`MainApplication` runs only in the main process; the FGS process +gets a fresh `Application` and needs its own init. + +Two options: + +1. **Host-app responsibility.** Document that the host app's + `MainApplication.onCreate` should detect the FGS process and + call `SentryAndroid.init(...)` with the same DSN there. + `@sentry/react-native` does not handle multi-process + automatically. +2. **Module convenience.** Add a helper + `ComapeoCoreInit.installSentryInFgs(application, options)` that + the host calls from its `MainApplication`. The helper detects + `getProcessName().endsWith(":ComapeoCore")` and conditionally + inits `SentryAndroid`. + +Option 2 keeps the cross-process detail inside the module that +introduced the second process. Recommended. + +### 7.3 Native error tagging — see §7.4.7 + +The cross-process error attribution detail moved into §7.4.7 +alongside the rest of the native telemetry data design. + +### 7.4 Native telemetry data design + +This is the heart of the native instrumentation. Sentry has a +small set of primitives, each suited to different kinds of data. +We design the captures around them rather than dumping logs: + +| Sentry primitive | Use for | Example | +|---|---|---| +| **Breadcrumb** | Lightweight ordered context — what led up to an event. Cheap, capped at ~100 by default, attached to the next event. | "state STARTING→STARTED at t+312ms", "ipc connected", "FGS notification posted" | +| **Transaction** (root span) | A timed unit of work with a clear start/end and a name. Indexed; dashboards can chart durations and counts. | `comapeo.boot` (start→started), `comapeo.shutdown` (stop→stopped) | +| **Span** (child) | A nested timed sub-step inside a transaction. | `boot.listen-control`, `boot.init`, `boot.construct`, `boot.ipc-connect` | +| **Event** (`captureMessage` / `captureException`) | A discrete error or notable occurrence; full stacktrace + context. | rootkey load failure, watchdog timeout fired, FGS killed by OS | +| **Tag** | Indexed key/value pair on events — used for dashboard filtering. | `phase:rootkey`, `proc:fgs`, `comapeo.state:ERROR`, `platform:android` | +| **Context** (custom) | Structured but non-indexed — appears on event detail pages. | `{"comapeo": {"abi": "arm64-v8a", "nodejs_mobile_version": "...", "ipc_socket_age_ms": 1234}}` | +| **User** (anonymized) | A stable but non-identifying user/session id. | host-app-supplied install ID; never the rootkey | + +The remainder of this section walks through what each layer of +the native architecture (state machine, boot phases, timeouts, +IPC, FGS lifecycle) maps onto. + +#### 7.4.1 State transitions → breadcrumbs + +Every `ComapeoState` transition (`STOPPED`/`STARTING`/`STARTED`/ +`STOPPING`/`ERROR`) is captured as a breadcrumb on both the +FGS-side and main-process Sentry scopes: + +```kotlin +// android/.../NodeJSService.kt (FGS), inside the state-derivation +// callsite that already runs deriveState(...) +Sentry.addBreadcrumb(Breadcrumb().apply { + category = "comapeo.state" + level = if (newState == STARTED || newState == STOPPED) + SentryLevel.INFO + else if (newState == ERROR) + SentryLevel.ERROR + else SentryLevel.INFO + message = "$oldState → $newState" + setData("from", oldState.name) + setData("to", newState.name) + setData("backendState", backendState.javaClass.simpleName) + setData("nodeRuntime", nodeRuntime.javaClass.simpleName) + setData("stopRequested", stopRequested) +}) +``` + +These never trigger an upload by themselves — they ride along +on the next event. When something does fire (an ERROR transition, +a captured exception), the dashboard shows the last ~30 seconds of +state history leading up to it. That's exactly the data needed +to debug "why did this end up in ERROR" questions. Always-on. + +#### 7.4.2 Boot as a transaction with phase spans + +Boot is the single most error-prone path in the system. We model +it as a Sentry transaction that spans from `start()` to either +`STARTED` or `ERROR`: + +``` +Transaction: comapeo.boot (op = "boot") +├─ Span: boot.listen-control +├─ Span: boot.ipc-connect (control) +├─ Span: boot.rootkey-load (FGS only) +├─ Span: boot.init (rootkey handshake) +├─ Span: boot.construct (MapeoManager + RPC bind) +└─ Span: boot.ipc-connect (comapeo) +``` + +Each phase corresponds to a stage already named in +`backend/index.js` (the catch tags `phase` on errors with these +exact strings). On the native side, each phase is bracketed by +the existing log calls — we just add `Sentry.startSpan` around +them. Phases that throw set the span status to `internal_error` +and capture the exception; phases that succeed set `ok`. + +The transaction is **always-on essential telemetry**: durations +at boot are first-class signal for performance regressions +(rootkey load took 2s instead of 50ms? new device security +hardware quirk). Native sample rate is independent of +`tracesSampleRate` — we sample boot at 100% even when +`tracesSampleRate=0.01` for RPC because boot fires once per +process and is high-value. Implemented via +`Sentry.startSpan({ ..., forceTransaction: true })` and a +dedicated boot-tag inspected by an event processor that lifts +its sample rate to 1.0. + +#### 7.4.3 Shutdown as a transaction + +Symmetric: `comapeo.shutdown` transaction from `stop()` to +final `STOPPED` (or `ERROR` if shutdown timed out). Spans +for `shutdown.broadcast-stopping`, `shutdown.close-rpc`, +`shutdown.node-join`. Surfaces the difference between graceful +shutdowns (under the 10 s budget) and watchdog-killed ones. + +#### 7.4.4 Timeouts → events (always) + +Every timeout enumerated in `ARCHITECTURE.md §5.7` becomes a +Sentry event when it fires, tagged with which timeout it was: + +| Timeout | Sentry shape | Tags | +|---|---|---| +| iOS `startupTimeout` (30s) | `captureMessage("comapeo: startup timeout fired")` `level=error` | `timeout:startup, platform:ios, layer:native` | +| iOS `stop(timeout:)` | `captureMessage("comapeo: stop timeout fired")` `level=warning` | `timeout:shutdown, platform:ios` | +| iOS `waitForFile` | `captureMessage("comapeo: waitForFile timeout")` `level=error` | `timeout:waitForFile, socket:` | +| iOS `connectWithRetry` exhausted | event with `attempts` context | `timeout:connectRetry` | +| Android `startupTimeoutMs` (30s) | `captureMessage(...)` `level=error` | `timeout:startup, platform:android, proc:fgs` | +| Android FGS `withTimeout` (10s) on stop | `captureMessage(...)` `level=error` | `timeout:fgsStop, proc:fgs` | +| Android `SEND_ERROR_NATIVE_TIMEOUT_MS` (2s) | breadcrumb + `level=warning` event | `timeout:errorNativeForward` | +| Android `waitForFile` (30s) | `captureMessage(...)` `level=error` | `timeout:waitForFile` | + +Timeouts are the most actionable signal for "something is +silently broken" — they always fire something we never want +to pre-emptively recover from. Always-on essential telemetry. + +#### 7.4.5 IPC connection lifecycle → breadcrumbs + events + +`NodeJSIPC.State` transitions +(`Connecting`/`Connected`/`Disconnecting`/`Disconnected`/`Error`) +become breadcrumbs at `category: "comapeo.ipc"`. Disconnects from +a `Connected` state in non-stopping conditions also fire an +event tagged `ipc.unexpected_disconnect:true` with the +pre-disconnect JS state — that's the path that derives `ERROR` +phase `node-runtime-unexpected` (`ARCHITECTURE.md §5.4`), +useful to surface separately from controlled disconnects. + +#### 7.4.6 FGS lifecycle → breadcrumbs + +Android-only: the `ComapeoCoreService` lifecycle hooks +(`onCreate`, `onStartCommand`, `onTaskRemoved`, `onDestroy`) and +notification post/cancel become breadcrumbs at +`category: "comapeo.fgs"`. FGS-killed-by-OS scenarios (the FGS +process dies without `onDestroy` running) appear in +`sentry-android`'s session-replay-style detection if it's +enabled — we don't add custom code for that. + +#### 7.4.7 Native error tagging (was §7.3) + +When `NodeJSService` enters ERROR locally (rootkey load, +watchdog), it already populates `_lastError` and emits +`stateChange`. The JS-visible capture happens in §6.3, but on +Android FGS that capture lands in the *main* process — the +FGS's own context (logcat tail, foreground state, notification +ID) is in the *FGS* process's Sentry scope. + +If the FGS-side Sentry SDK is initialised (§4.2), we also call +`Sentry.captureException` from the FGS error handler, tagged +`proc:fgs phase:`, **before** forwarding the +`error-native` frame to Node. The duplicate event (FGS-side + +backend-side via `error-native` re-broadcast + main-process +JS-side via `stateChange`) is deduplicated by Sentry's +fingerprinting; the three captures together carry the FGS +context, the backend stack, and the main-process state-machine +trail. + +iOS doesn't need this — the FGS doesn't exist there, everything +runs in the host app process and the host app's +`@sentry/react-native` already covers it. + +#### 7.4.8 Categorization: essential vs opt-in + +| Capture | Tier | Rationale | +|---|---|---| +| State transition breadcrumbs | **Essential** | Cheap, ride on existing events. Required to debug ERROR paths. | +| Boot transaction + phase spans | **Essential** | Once-per-process, high-value perf signal. Forced 100% sample. | +| Shutdown transaction + phase spans | **Essential** | Same reasoning — once-per-process. | +| Timeout events | **Essential** | Always actionable; never silent recovery. | +| ERROR `captureException` (FGS, backend, main) | **Essential** | Already fires; this plan just structures it. | +| IPC connection breadcrumbs | **Essential** | Cheap; required to attribute disconnect-derived ERROR. | +| Unexpected-disconnect event | **Essential** | High-signal failure mode. | +| FGS lifecycle breadcrumbs | **Essential** | Cheap; required to debug FGS-killed-by-OS scenarios. | +| Per-RPC method spans (sampled) | **Opt-in** (capture application data on) | High volume; usable for performance dashboards but only when the user consented. | +| Sync session transaction (start → ready → finish, with peer count) | **Opt-in** | Reveals usage cadence. Counts only — no peer identities. | +| Background/foreground transitions | **Opt-in** | Reveals usage patterns. | +| Backend memory/heap snapshots (periodic) | **Opt-in** | Cost is non-trivial; only needed for memory-leak hunts. | +| Storage size of `privateStorageDir` (periodic) | **Opt-in** | Dataset-size signal. | + +#### 7.4.9 Hard never-capture list + +Independent of any toggle, these are off by construction — +not behind a config option, not behind `rpcArgsBytes>0`, not +ever: + +- The 16-byte rootkey, in any encoding. +- Identity public/secret keypairs derived from the rootkey. +- Observation contents (text, attachments, attachment paths). +- Precise location (lat/lng). If we ever want geographic + distribution data, it goes through quantization to + ~country/region resolution at the host-app layer, never + here. +- User-entered text from any settings UI. +- Project IDs in raw form. If included as a tag, must be + hashed (SHA-256, truncated to 16 chars) at capture site. +- Peer device identities or discovered peer counts above + bucketed thresholds (e.g. record `peers_bucket: 1-3 / 4-10 / 10+`, + not raw counts). +- File paths under `Application Support` or + `getFilesDir()` that include the rootkey or project IDs. + +A `before_send` event processor enforces the list +defensively: it walks the event tree for known sensitive +substrings (`rootKey`, base64-shaped 22-char strings, `lat=`, +`lng=`, `latitude:`, `longitude:`) and either redacts or +drops the event. This is belt-and-suspenders — the fix is +always at the capture site, but the processor catches +mistakes before they ship. + +### 7.5 Hard-crash reporting + +Crashes that bypass JS (SIGSEGV in a native addon, OOM kill, +`process.abort()`) are documented in `ARCHITECTURE.md §6` as +"belong in a separate channel". `sentry-cocoa` and +`sentry-android` already handle native crashes for the host app +process; on Android the FGS process needs its own init (§7.2) +to capture FGS-process crashes. + +We do not bundle `sentry-native` into the embedded `nodejs-mobile` +runtime. A V8 abort or libnode crash will not produce a Sentry +event from inside Node — but it will produce an Android-process +crash (since the FGS process dies) which `sentry-android` will +capture with a stacktrace from the JNI side. + +--- + +## 8. PII, sampling, and privacy + +CoMapeo data is sensitive (observation locations, attachments, +device identities). Defaults must avoid leaking it into Sentry: + +- **`request.args` is never serialized** unless + `rpcArgsBytes > 0` is explicitly set. Method names and + metadata only. +- **No project IDs in span names**; only RPC method paths + (`project.observation.create`, etc.). If we later want + per-project breakdowns, hash the project ID before adding + it as a tag. +- **No rootkey, no public/secret keypair, no observation + contents** in event payloads. The `error-native` frame + carries phase + message; the backend `Sentry.captureException` + call does too. +- **Stacktraces** are fine — they may include filenames from + inside `@comapeo/core` and the bundled backend. No user data + unless an `Error.message` was constructed with one (audit + these on integration). +- **`tracesSampleRate`** defaults to `0` if unspecified. The + host app must opt into RPC tracing volume explicitly. +- **`sendDefaultPii`** (Sentry option) is left to the host + app's `Sentry.init()` and the backend init we forward; we + don't override it. + +A pre-merge checklist (§10) includes a `before_send` hook that +greps every outbound event for known sensitive substrings +(`rootKey`, `dsn`, base64-shaped 22-char strings) as a +defense-in-depth check during integration smoke tests. + +--- + +## 9. Runtime "capture application data" toggle + +A persisted boolean preference, off by default, that the host +app's settings UI exposes to the end user. When on, the +**opt-in** captures from §7.4.8 are emitted; when off (the +default), only the essential captures are. Crucially, this +never unlocks anything in the §7.4.9 never-capture list — the +two layers are independent. + +### 9.1 Persistence + +A native preference, written and read entirely on the native +side so it survives app uninstall-resistant in the same way +existing user prefs do (and is not a special concern at the +backup/restore layer): + +- **Android**: stored in plain + `SharedPreferences("comapeo-core-prefs", MODE_PRIVATE)`. Key: + `sentry.captureApplicationData`. Read by both the main process + and the FGS process. (The earlier draft considered + `EncryptedSharedPreferences`; for a non-sensitive boolean + there's no value in the encryption overhead, and plain prefs + are cross-process-safe via `MODE_MULTI_PROCESS` if both + processes need the live value — though the toggle is only + read at process start so even that's unnecessary.) +- **iOS**: stored in `UserDefaults.standard` keyed + `com.comapeo.core.sentry.captureApplicationData`. Read at + app delegate init. + +### 9.2 JS API + +The toggle is exposed alongside `configureSentry`: + +```ts +// File: src/sentry.ts (additions) +/** + * Read the persisted opt-in flag. Resolves with the + * current native-side value. Reads are sync-fast on both + * platforms but the API is async to match the bridge. + */ +export function getCaptureApplicationData(): Promise; + +/** + * Write the persisted opt-in flag. Returns when the write has + * been durably committed on the native side. + * + * IMPORTANT: the new value does NOT take effect until the next + * app launch. The current process keeps emitting whatever it + * was emitting at boot. This is documented in the host app's + * settings UI so the user knows to restart for the change to + * apply. + */ +export function setCaptureApplicationData(enabled: boolean): Promise; +``` + +### 9.3 Why restart-to-activate + +Two reasons: + +1. **Snapshot-at-boot semantics.** The flag's value is read + once, at process start, and embedded in the `init` frame + to the backend (`captureApplicationData: bool`). The + backend wires its `onRequestHook`, OTel sampler, and + custom span emitters based on that snapshot. Hot-toggling + would mean re-registering hooks on a live RPC server, + which adds a class of bugs (in-flight requests with one + instrumentation, new requests with another) for marginal + value. +2. **Predictable user expectation.** The user toggling + "capture more data for debugging" should reasonably + expect a clear before/after, not a partial transition + in the middle of an active sync session. + +A minor cost: if the user has an active issue right now, they +need to flip the toggle and restart the app to start +collecting. The host-app UI says exactly that. + +### 9.4 What the toggle gates + +When `captureApplicationData == true`, the following turn on +in addition to the essential set: + +- **Per-RPC client + server spans.** `tracesSampleRate` + effectively goes from 0 → its configured value (default + 0.1). Method names only; never args. Span attributes + include `rpc.method`, `rpc.status`, `rpc.duration_ms`. +- **Sync session lifecycle transaction.** A + `comapeo.sync.session` transaction from `connectPeers` + (or first peer-connected event) through to + `syncFinished`/`disconnect`. Spans inside for + `discover`, `handshake`, `replicate`. Counts only: + number of peers (bucketed), bytes transferred (bucketed), + duration. **No peer identities, no project IDs in raw + form.** +- **Background/foreground transitions** — host-app `pause` + and `resume` events become `comapeo.app.background` / + `comapeo.app.foreground` breadcrumbs that ride on + subsequent events, helping correlate timing + ("error fired 3s after app backgrounded"). +- **Backend memory checkpoint.** Once at `STARTED` and + every 60s thereafter, a custom context entry on the + next event with `process.memoryUsage()` snapshot + (rss, heapTotal, heapUsed). No event capture by + itself — context only. +- **`privateStorageDir` size sample.** Once at `STARTED`, + the on-disk size of dbFolder + indexFolder + customMaps + as a numeric `du`-style integer. Bucketed (`<10MB`, + `10–100MB`, `100MB–1GB`, `>1GB`) before sending to + avoid leaking the exact size of a sensitive dataset. + +### 9.5 Plumbing path + +``` +[user toggles in app settings] + │ + ▼ +setCaptureApplicationData(true) ─── JS ─── + │ + ▼ +ComapeoCoreModule.setCaptureApplicationData ─── Native bridge ─── + │ + ▼ +SharedPreferences write (Android) ─── Persisted ─── +UserDefaults.set (iOS) + │ + ▼ +[user is told: restart required] + +============= NEXT LAUNCH ============= + +NodeJSService starts ─── Native ─── + │ + ▼ +read persisted toggle (SharedPreferences / UserDefaults) + │ + ▼ +spawn Node with argv: + loader.mjs <…sockets…> --sentryDsn=… [--captureApplicationData] + │ + ▼ +loader.mjs parseArgs ─── Node argv ─── + │ + ▼ +Sentry.init({ tracesSampleRate: toggle ? 0.1 : 0, … }) +globalThis.__comapeoSentryConfig = { captureApplicationData: true } + │ + ▼ +await import('./index.mjs') ─── Node ─── + │ + ▼ +- onRequestHook registered (per-RPC spans) +- sync-session emitter registered +- memory-snapshot timer started +- (tracesSampleRate already set in Sentry.init) +``` + +Note that the toggle is read **once** at native process start +and passed as a Node argv flag (`--captureApplicationData`). +The control-socket `init` frame doesn't carry it. Restart-to- +activate is the natural consequence: the loader's argv is +fixed for the life of the Node process. + +### 9.6 What the toggle never unlocks + +The §7.4.9 never-capture list applies regardless. Specifically: + +- The toggle does not raise `rpcArgsBytes` from 0; raw RPC + args remain off. (`rpcArgsBytes` is a separate **build-time** + config-plugin option for developer debug builds.) +- The toggle does not start capturing observation contents. +- The toggle does not start capturing precise location. +- The toggle does not start capturing peer identities. + +If a future requirement wants any of those, it lands as a +*separate*, more-restrictive opt-in (and likely never ships +to production at all). + +### 9.7 Default and migration + +The default-when-unset is **per-environment, decided by the +consumer at build time** via a new plugin field +`captureApplicationDataDefault`: + +```json +{ + "expo": { + "plugins": [["@comapeo/core-react-native", { + "sentry": { + "dsn": "...", + "environment": "development", + "captureApplicationDataDefault": true // dev/qa builds + } + }]] + } +} +``` + +The plugin writes a manifest meta-data / +plist key (`com.comapeo.core.sentry.captureApplicationDataDefault`) +that the native preference read consults as a fallback when the +user has never explicitly written the toggle. Once the user +flips the switch in the host app's settings UI, their explicit +choice wins forever — the per-build default only applies on the +first launch after install (or after a clear-data). + +Recommended consumer config, wired through EAS env vars: + +```js +// app.config.js +captureApplicationDataDefault: + (process.env.SENTRY_ENVIRONMENT ?? "production") !== "production", +``` + +so internal/test builds opt in by default without any user +action, while production ships off-by-default. If the field is +omitted, native treats it as `false` everywhere — safer +fallback for the example app and any consumer that doesn't +actively configure it. + +--- + +## 10. Phasing + +### Status snapshot + +| Phase | Status | Notes | +|---|---|---| +| 10.1 — Phase 1 (JS adapter) | **landed** | `src/sentry.ts` + `src/sentry-internal.ts`; `configureSentry` sub-export. | +| 10.2 — Phase 2a (plugin + native readers) | **landed** | `app.plugin.js`; `SentryConfig.{kt,swift}` + tests. | +| 10.2 — Phase 2b (Android FGS native captures) | **landed** | `SentryFgsBridge.{kt,Impl}` + bridge wired into `ComapeoCoreService` and `NodeJSService`; 9 JVM tests. iOS Phase 2b not needed (single-process app — JS adapter covers it). | +| 10.3 — Phase 3 (backend loader + RPC tracing) | **pending** | Biggest remaining piece. Needs `@sentry/node` + `import-in-the-middle` + multi-entry rollup + `loader.mjs`. Native already passes argv (see bench branch's `comapeoBackendArgs`); backend just needs to consume it. | +| 10.4 — Phase 4 (`@comapeo/core` OTel forwarding) | **pending** | Blocked on `@comapeo/core` PR #1051 landing. Verification work only. | +| 10.5 — Phase 5 (capture-application-data toggle) | **pending** | `SharedPreferences` / `UserDefaults` store, restart-to-activate semantics. | +| 10.6 — Phase 6 (refinements) | **pending** | Sample-rate tuning from real data; optional dual-bundle if size matters. | + +What's *not* shipping with the integration even after all phases: `sentry-native` inside `nodejs-mobile` (V8 abort / addon SIGSEGV stays a host-process crash; sentry-android catches those at the JNI layer — see plan §7.5). + +--- + +### 10.1 Phase 1 — JS-side error capture (smallest delivery) — landed + +- `configureSentry({ sentry })` adapter handoff (§4.3). +- `state` listeners capture ERROR transitions and + `messageerror` events via `@sentry/react-native` (§6.3). +- Ship as `@comapeo/core-react-native/sentry` sub-export. +- Host app (CoMapeo Mobile) calls `Sentry.init` itself. + +Value: immediate visibility into rootkey failures, watchdog +timeouts, IPC errors, and `messageerror` parse failures — +provided RN is alive when they fire. (The FGS-cold-start gap +is closed in Phase 2.) + +Cost: ~50 LOC in `src/sentry.ts`, no native or backend +changes, zero risk to other consumers. + +### 10.2 Phase 2 — Expo config plugin + native config consumption — landed + +Phase 2 splits in two because the plugin-and-readers part has +zero dependency cost and the native-side direct-Sentry-SDK part +adds a non-trivial Gradle / podspec coupling. The phasing +reflects that: + +**Phase 2a — plugin + native config readers — landed** + +- New `app.plugin.js` at module root (§4.1). +- iOS reads Info.plist into `SentryConfig` at load time; + Android reads manifest meta-data into `SentryConfig`. +- JVM unit tests + Swift `XCTest` cases pinning the parsers' + contract (DSN-absent → null, missing environment → throw, + versionName/versionCode default release, numeric coercion, + strict bool parsing). +- JS-side state-transition breadcrumbs + ERROR + `captureException` fire through the configured adapter + immediately (§6.3 — already in §10.1, but the plugin makes + the host app's manifest carry the same DSN/environment + values `@sentry/react-native` reads, so the host-supplied + adapter is correctly tagged). + +Cost: ~150 LOC plugin + Kotlin + Swift + tests. Zero new +runtime deps. + +**Phase 2b — FGS-process direct Sentry calls (Android only) — landed** + +iOS doesn't need a Phase 2b — it's a single-process app and +the host's `@sentry/react-native` already covers everything +the §6.3 JS adapter forwards. Phase 2b is Android-specific. + +Shipped: + +- `io.sentry:sentry-android-core:7.20.1` added to + `android/build.gradle` as `compileOnly` so this module never + pulls sentry-android into consumers who don't use Sentry. + The runtime classes come transitively from + `@sentry/react-native@^6`. Pin matches the API surface RN + v6.x exposes; bumping should be done in lock-step with the + RN peer-dep range. +- `SentryFgsBridge.kt` / `SentryFgsBridgeImpl.kt` — guard / + impl split. The guard's `Class.forName` probe (with + `initialize=false` so the SDK's `` doesn't run on + the JVM unit-test classpath where `SystemClock` is unmocked) + gates every public method. Impl freely imports `io.sentry.*`; + it's only loaded when the guard says the SDK is present. +- FGS-side `SentryAndroid.init` in + `ComapeoCoreService.onCreate`. Sets `proc:fgs` and + `layer:native` as process-level tags so dashboards split + FGS captures from main-process captures (which carry + `proc:main` from `src/sentry.ts`). +- State-transition breadcrumbs (§7.4.1) on every + `applyAndEmit` transition. +- `comapeo.boot` transaction (§7.4.2) opened in `start()`, + closed on first STARTED (`ok`) / ERROR (`internal_error`). + In-flight phase spans are closed on the same terminal. +- `boot.rootkey-load` span around `RootKeyStore.loadOrInitialize()`, + `boot.init-frame` span from "init frame sent" to "ready + control frame received". Span names match the bench + backend's `boot.` taxonomy + (`apps/benchmark/backend/lib/boot-spans.js` on + `claude/benchmark-uds-rpc-bridge-1Zahz`) so a single Sentry + dashboard query charts both sides. +- Timeout events (§7.4.4): + `comapeo: startup timeout fired` (level=error, + `timeout:startup`), + `comapeo: FGS stop timeout fired` (level=error, + `timeout:fgsStop`). +- Control-frame breadcrumbs (§7.4.5): `received: started`, + `received: ready`, `received: stopping`, `received: error`, + `malformed control frame`. Plus FGS lifecycle breadcrumbs + (§7.4.6): `ComapeoCoreService.onCreate`, `onStartCommand`, + `onDestroy`. +- FGS-side `captureException` on rootkey-load failure (§7.4.7), + with `comapeo.phase:rootkey` and `source:rootkey-store` + tags. Fires before `sendErrorNativeFrame` so the FGS scope + has the original logcat/notification context; the same + exception is re-broadcast to Node and re-captured by the + main-process JS adapter for the cross-process triple. + +Cost: ~250 LOC native + bridge + tests. Bundle delta: +sentry-android-core is only on the runtime classpath when +`@sentry/react-native` brings it; no impact on consumers +without Sentry. + +Reused from the bench branch: the `boot.` span name +taxonomy from +`apps/benchmark/backend/lib/{boot-spans,telemetry-sink}.js`. +When the bench branch's `TelemetrySink` interface lands on +main, the production backend (Phase 3) can implement it as a +`SentryAdapterSink` — the comment in `telemetry-sink.js` +already foreshadows this. + +### 10.3 Phase 3 — backend loader + RPC tracing — pending + +- Add `@sentry/node@^8`, `@sentry/core@^8`, and + `import-in-the-middle` to `backend/package.json`. +- Confirm `package.json`'s `files` field surfaces the built + `*.map` files into the published npm package; document + consumer's APK/IPA exclusion + sourcemap upload step. +- Restructure `backend/rollup.config.ts` for multi-entry + output (`loader`, `index`, `importHook`, `lib/register`). +- New `backend/loader.mjs` parses argv, conditionally inits + Sentry, dynamically imports `index.mjs`. +- Native side (iOS + Android) passes `loader.mjs` as the + spawn target with `--sentry*` argv flags from + `SentryConfig` (§4.2). +- `handleFatal` and `onRequestHook` wired (§5.4, §5.5). +- Client-side `getMetadata` (§6.2) for distributed tracing + (or accept JS-side spans without parent linkage if + `@comapeo/ipc` doesn't yet support it — track upstream). + +Value: RPC method-level errors and durations in Sentry; +backend boot failures with proper stacktraces; baseline +distributed tracing; auto-instrumentation works because +`Sentry.init()` runs before any other module loads. + +Cost: ~300 LOC across loader/rollup config/native/JS; +~150–250 KB bundle delta on every consumer **on disk** but +zero runtime cost when DSN is absent (§5.1). + +### 10.4 Phase 4 — `@comapeo/core` OpenTelemetry forwarding — pending + +- Bump `@comapeo/core` once PR #1051 lands. +- Verify Sentry's OTel integration picks up the spans + with the RPC transaction as parent. +- Document any required tracing-config overrides. + +Value: deep traces inside core operations (sync, indexing, +hypercore) — the data Sentry's performance tab is designed +to surface. + +### 10.5 Phase 5 — capture-application-data toggle — pending + +- Native preference store (Android `SharedPreferences`, + iOS `UserDefaults`) with `getCaptureApplicationData` / + `setCaptureApplicationData` JS API (§9.2). +- Read on boot, passed as `--captureApplicationData` argv + flag (§9.5), gates the §7.4.8 opt-in captures (per-RPC + method spans, sync session transaction, background/foreground + breadcrumbs, memory checkpoints, storage size sample). +- `before_send` privacy processor (§7.4.9 enforcement). + +Value: opt-in detailed observability for users who consent, +useful for performance investigations and usage-pattern +debugging without exposing PII. + +Cost: ~150 LOC native + JS + backend. + +### 10.6 Phase 6 — refinements — pending + +- Tune sample rates from production data. +- Optional: dual backend bundles for Sentry-free consumers + if bundle size becomes a concern. + +--- + +## 11. Test plan + +### 11.1 Unit / integration + +- `src/sentry.ts` accepts a fake adapter; assert + `captureException` is called for synthetic ERROR + `stateChange` events with the correct phase tag. +- `src/sentry.ts` is a no-op if `configureSentry` was never + called: the existing `comapeo` client should produce + identical wire frames (no `metadata` injected). +- Backend loader: spawn `loader.mjs` without `--sentryDsn` + and assert `@sentry/node` is never resolved (check + `require.cache` / module-graph instrumentation in a test + harness). Spawn with `--sentryDsn=...` and assert + `Sentry.init` was called with the parsed values **before** + `index.mjs`'s top-level imports ran. +- Backend rollup output: assert the multi-entry build + produces `loader.mjs`, `index.mjs`, `importHook.js`, and + `lib/register.js`; that `loader.mjs` does not statically + reference `@sentry/node`; that the rewritten + `module.register('./importHook.js', ...)` call is in the + bundled output (no bare `import-in-the-middle/hook.mjs` + reference). +- Toggle gating in loader: build with `--captureApplicationData` + and assert `tracesSampleRate` is non-zero in the captured + init options; build without it and assert `tracesSampleRate` + is `0`. +- Config plugin: snapshot test that running the plugin with + a `sentry` argument writes the expected manifest + meta-data and Info.plist keys. Run without argument → + no entries written. +- Native config store: synthetic manifest / plist with + partial keys decode into `SentryConfig` with `null` for + missing optional fields; total absence returns `null`. +- Native breadcrumb emission: drive `NodeJSService` through a + scripted state-machine sequence and assert the breadcrumbs + posted to a fake Sentry SDK match the expected shape and + level mapping. +- Toggle persistence: write `setCaptureApplicationData(true)`, + read it back, kill the process, read it back again — value + survives. Re-launch and confirm the flag flows into the + Node argv as `--captureApplicationData`. +- `before_send` privacy processor: feed it events containing + base64-shaped strings, latitude/longitude markers, and raw + project IDs; assert each is redacted or dropped. + +### 11.2 Manual smoke + +- Run the example app with a temporary DSN (a test Sentry + project) configured via the plugin. Trigger: + - A deliberate JS-side throw inside a `comapeo.*` callback + → JS-layer event in Sentry. + - A backend throw via a debug RPC method → backend-layer + event with parent transaction. + - An Android FGS rootkey-store corruption (delete the + keystore alias) → ERROR event with `phase:rootkey` + from both FGS-process and main-process scopes, with + state-transition breadcrumbs in the trail. + - A node abort (`process.abort()` via a debug RPC) → + `sentry-android` native crash event. + - Force the FGS startup watchdog to fire (e.g. by + blocking `initPromise` in a test build) → timeout + event with `timeout:startup` tag. + - **FGS cold-start path**: from a freshly-killed app + state, trigger an FGS-only launch (background sync + intent) without bringing RN up. Verify boot + transaction lands in Sentry from the FGS process + alone. +- Toggle "capture application data" on, restart, and run + a scripted sync session. Confirm `comapeo.sync.session` + transaction appears with bucketed peer count and no + raw peer identities. Toggle off, restart, and confirm + the transaction stops appearing. +- Confirm no PII in events: open each event, scan for + base64-shaped 22-char strings, file paths under + `Application Support`, project secrets. +- Confirm distributed trace shows JS-client span → backend + RPC transaction → (with PR #1051) core operation spans. + +### 11.3 Regression + +- Run the existing `e2e/run-instrumented-tests.sh` and the + iOS `swift test` / `xcodebuild test` suite with + `configureSentry` *not* called → no behaviour change. +- Build size delta tracked: compare `android/src/main/assets/nodejs-project/` + bundle size before and after Phase 2. + +--- + +## 12. Decisions and remaining questions + +### 12.1 Decided + +| Question | Decision | +|---|---| +| Sentry SDK versions | `@sentry/node@^8`, `@sentry/react-native@^6`, `@sentry/core@^8`. OpenTelemetry-first majors so PR #1051 forwarding works without glue (§5.1). | +| `@comapeo/ipc@^8` client-side hook | Confirmed: `createMapeoClient` accepts `onRequestHook` directly. Pattern lifted from [`comapeo-mobile/src/frontend/lib/createMapeoApi.ts`](https://github.com/digidem/comapeo-mobile/blob/develop/src/frontend/lib/createMapeoApi.ts) (§6.2). | +| `release` source | Default to `versionName + "+" + versionCode` (Android) / `CFBundleShortVersionString + "+" + CFBundleVersion` (iOS). Successive EAS builds of the same marketing version produce distinct releases. Plugin override always wins (§4.1). | +| Boot transaction sample rate | Force 100% even when overall `tracesSampleRate` is low. Boot is once-per-process and high-value. Document quota implications (§7.4.2). | +| Bundle size strategy | Single bundle with rollup chunk-splitting — accept the disk cost. No dual-bundle build for v1 (§5.1). | +| Plugin behaviour with no `sentry` arg | No-op silently. Treat absent meta-data / plist keys as Sentry off. Used by `apps/example/` (§4.1). | +| Sourcemap upload | Consumer responsibility. Module ships `*.map` in npm package; consumer excludes from APK/IPA and runs `sentry-cli sourcemaps upload` against `node_modules/.../nodejs-project/` in their own CI with their own credentials (§5.1). | +| Toggle UI surface | Out of scope for this module. Module exposes `getCaptureApplicationData` / `setCaptureApplicationData` only; consumer builds the settings UI and the restart prompt (§9.2). | +| Capture-application-data default | Per-environment, decided by consumer at build time via `captureApplicationDataDefault` plugin field. EAS env var pattern: default to `true` when `environment !== "production"`. Once user flips the switch their explicit choice wins (§9.7). | + +### 12.2 Still open / verify-during-build + +1. **Cross-process scope on Android**: Phase 2 smoke test must + verify FGS-process Sentry events carry `proc:fgs` and that + `@sentry/react-native`'s main-process tags don't override + them in the dashboard. +2. **Lazy chunk on iOS `--jitless`**: dynamic `import()` of a + separate ESM chunk should work but isn't proven for our + specific config. Phase 3 CI smoke test exercises both + with-Sentry and without-Sentry loader paths on iOS; block + the phase landing if either fails. iOS is also the platform + we already stub `@comapeo/core`'s maps plugin to keep + undici out (see `backend/lib/maps-stub.js`); the Sentry + chunk is an additional surface for this kind of iOS-only + quirk. +3. **Offline transport**: deferred to a later phase. v1 drops + Sentry events when the device is offline. CoMapeo is heavily + used in the field; this is a known limitation that needs + addressing before the integration is genuinely production-ready + for fieldwork. Tracked as a follow-up rather than v1 scope. + Reference: `sentry-offline-transport-better-sqlite` is + what comapeo-mobile uses today. + +--- + +## 13. Summary of file changes + +Concrete touch list, by phase, for code review. + +**Phase 1 — landed** + +- `src/sentry.ts` (new) — `configureSentry`, hand-rolled + `SentryAdapter` interface (no compile-time `@sentry/*` import), + state listeners that emit a breadcrumb on every transition and + a `captureException` on ERROR. +- `src/sentry-internal.ts` (new) — module-private adapter holder + read by Phase 3's RPC `onRequestHook`. +- `package.json` — add `@sentry/react-native` to + `peerDependencies` with `peerDependenciesMeta.optional: true`, + add `exports` field exposing `./sentry` sub-export and + `./app.plugin`. + +**Phase 2a — landed** + +- `app.plugin.js` (new, module root) — ESM Expo plugin (because + this package is `"type": "module"`). `withAndroidManifest` + upserts ``; `withInfoPlist` upserts plist keys. + Validates `dsn` + `environment` are present; throws at + prebuild on misconfiguration. No-op when registered without + a `sentry` argument. +- `android/src/main/java/com/comapeo/core/SentryConfig.kt` + (new) — typed manifest reader. Pure `load(metaString, + defaultRelease)` overload for unit tests; production + `loadFromManifest(context)` reads + `PackageManager.getApplicationInfo(...).metaData`. Default + release = `versionName + "+" + versionCode` (longVersionCode + on API 28+). +- `android/src/test/java/com/comapeo/core/SentryConfigTest.kt` + (new) — JVM unit tests covering DSN-absent, missing-env + throw, plugin-release override, numeric coercion, + unparseable-numerics drop to null, captureApplicationDataDefault + strict bool. +- `ios/SentryConfig.swift` (new) — typed plist reader. Pure + `load(from: [String: Any], defaultRelease)` for unit tests; + production `loadFromMainBundle()` reads + `Bundle.main.infoDictionary`. Accepts both string-coerced + values (the plugin's normal output) and native plist types + (defensive against hand-edits). Default release = + `CFBundleShortVersionString + "+" + CFBundleVersion`. +- `ios/Tests/SentryConfigTests.swift` (new) — XCTest cases + mirroring the Kotlin tests. +- `ios/Package.swift` — add `SentryConfig.swift` to the SPM + target's `sources` list so the macOS-native test suite + compiles it. + +**Phase 2b — landed (Android only)** + +- `android/build.gradle` — add `compileOnly` + + `testImplementation` on `io.sentry:sentry-android-core:7.20.1`. +- `android/src/main/java/com/comapeo/core/SentryFgsBridge.kt` + (new) — guard layer: `Class.forName` probe (with + `initialize=false`) gates every public method; no + `io.sentry.*` imports here. +- `android/src/main/java/com/comapeo/core/SentryFgsBridgeImpl.kt` + (new) — impl: `SentryAndroid.init`, `addBreadcrumb`, + `captureException`, `captureMessage`, + `startBootTransaction` / `startBootSpan` / `finishSpan`. + Loaded only when the guard says the SDK is present. +- `android/.../ComapeoCoreService.kt` — read config in + `onCreate`, init the FGS-process Sentry hub via the bridge. + FGS lifecycle breadcrumbs on `onCreate` / `onStartCommand` + / `onDestroy`. Capture `timeout:fgsStop` on stop-timeout. +- `android/.../NodeJSService.kt` — open `comapeo.boot` + transaction in `start()`; emit state-transition + breadcrumbs in `applyAndEmit`; close transaction + + in-flight phase spans on STARTED / ERROR; wrap + `RootKeyStore.loadOrInitialize` in a `boot.rootkey-load` + span; open `boot.init-frame` after init send and close on + `ready` frame; control-frame breadcrumbs on + `started`/`ready`/`stopping`/`error`/malformed; capture + `timeout:startup` on watchdog fire; FGS-side + `captureException` on rootkey failure tagged + `phase:rootkey`. +- `android/src/test/java/com/comapeo/core/SentryFgsBridgeTest.kt` + (new) — JVM unit tests pinning the no-op guard contract: + pre-init calls (addBreadcrumb / captureException / + captureMessage / startBootTransaction / startBootSpan / + finishSpan) all return cleanly without throwing; the + `Class.forName` probe finds sentry-android on the test + classpath and is idempotent. +- `eslint.config.js` — ignore `.claude/**/*` so leftover + worktree artifacts don't break the lint cache. + +**Phase 2b — iOS deferred (likely never needed)** + +iOS is a single-process app. The host's `@sentry/react-native` +runs `SentrySDK.start(...)` in-process and the §6.3 JS +adapter feeds state transitions / errors into that hub. The +"FGS-process scope" concern that motivates the Android +bridge doesn't exist on iOS. If we later want native-side +boot spans on iOS for symmetry, we'd add a thin +`SentrySDK.startTransaction(...)` wrapper in +`ios/NodeJSService.swift` — but that needs the `Sentry` pod +linked (the host transitively brings it via +`@sentry/react-native`), and the value over the JS adapter +is small. Tracked as a soft follow-up only. + +**Phase 3 — backend instrumentation (loader + multi-entry bundle)** + +- `backend/package.json` — `@sentry/node@^8`, `@sentry/core@^8`, + `import-in-the-middle` dependencies. +- `backend/loader.mjs` (new) — argv-driven `Sentry.init`, + dynamic import of `index.mjs`. Mirrors + `comapeo-mobile/src/backend/loader.js`. +- `backend/rollup.config.ts` — multi-entry input (`loader`, + `index`, `importHook`, `lib/register`). `sourcemap: true` + stays; no Sentry rollup plugin (consumer uploads). +- `package.json` (module root) — verify built sourcemaps + (`*.map` files alongside the bundles in + `android/src/main/assets/nodejs-project/` and + `ios/nodejs-project/`) are included in the npm package + `files` field. +- `README.md` — new section documenting the consumer's + responsibilities: APK/IPA `.map` exclusion (small gradle / + Xcode snippet) and `sentry-cli sourcemaps upload` invocation + tagged with `release = versionName + "+" + versionCode`. +- `backend/rollup-plugins/rollup-plugin-import-hook.mjs` (new) + — port of comapeo-mobile's path-rewrite plugin so + `module.register('import-in-the-middle/hook.mjs', …)` lands + on the bundled `./importHook.js`. +- `scripts/build-backend.ts` — pass `loader.mjs` as the new + spawn target through to native asset trees; ensure the + Sentry chunk and `importHook`/`lib/register` files are + copied alongside `index.mjs`. +- `ios/NodeJSService.swift`, `android/.../NodeJSService.kt` + — change the `runNode` / `startWithArgs` call to pass + `loader.mjs` as the entry script (was `index.mjs`). +- `backend/index.js` — read + `globalThis.__comapeoSentryConfig` (set by loader); + hook `handleFatal` with `Sentry.captureException`; remove + any `sentry` field handling from the `init` control-frame + handler (the field is no longer sent — argv carries it). +- `backend/lib/comapeo-rpc.js` — accept `sentry` option, + register `onRequestHook`. +- `src/ComapeoCoreModule.ts` — pass `getMetadata` to + `createMapeoClient` (or wrapper fallback). + +**Phase 4 — OpenTelemetry forwarding** + +- `backend/package.json` — bump `@comapeo/core` once PR #1051 + ships. +- Smoke test verification, no code changes expected. + +**Phase 5 — capture-application-data toggle** + +- `android/src/main/java/com/comapeo/core/SentryPrefsStore.kt` + (new) — `SharedPreferences` read/write of the toggle, + plus `getCaptureApplicationData` / + `setCaptureApplicationData` bridge. +- `ios/SentryPrefsStore.swift` (new) — `UserDefaults` + equivalent. +- `android/.../ComapeoCoreModule.kt`, `ios/ComapeoCoreModule.swift` + — Expo bridge `Function` entries for the two methods. +- `src/sentry.ts` — JS exports `getCaptureApplicationData`, + `setCaptureApplicationData`. +- `backend/lib/comapeo-rpc.js` — wire `tracesSampleRate` + conditionally on the toggle; register sync-session emitter + only when on. +- `backend/index.js` — accept `captureApplicationData` in + init payload; gate memory-checkpoint timer and storage + sampling. +- `backend/before-send.js` (new) — `before_send` privacy + processor (the §7.4.9 redaction belt-and-suspenders). + +--- diff --git a/eslint.config.js b/eslint.config.js index 937e2f7..5d91e01 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -24,7 +24,11 @@ export default defineConfig([ includeIgnoreFile(gitExcludePath), { name: "ignores", - ignores: ["android/**/*", "apps/**/*", "ios/**/*"], + // `.claude/**` excludes claude-code worktrees, which carry + // their own `apps/`, `backend/`, etc. trees that the + // root-level `apps/**` / etc. ignores don't match (they're + // nested under `.claude/worktrees//`). + ignores: ["android/**/*", "apps/**/*", "ios/**/*", ".claude/**/*"], }, js.configs.recommended, tseslint.configs.recommended, diff --git a/ios/Package.swift b/ios/Package.swift index 89cc43a..785add4 100644 --- a/ios/Package.swift +++ b/ios/Package.swift @@ -26,6 +26,7 @@ let package = Package( "NodeJSService.swift", "ControlFrame.swift", "Log.swift", + "SentryConfig.swift", ] ), .testTarget( diff --git a/ios/SentryConfig.swift b/ios/SentryConfig.swift new file mode 100644 index 0000000..95b3de1 --- /dev/null +++ b/ios/SentryConfig.swift @@ -0,0 +1,154 @@ +import Foundation + +/// Phase 2 of the Sentry integration plan +/// (docs/sentry-integration-plan.md §4.1, §4.2). Typed view of the +/// Info.plist keys the Expo plugin (`app.plugin.js`) writes at +/// prebuild time. +/// +/// `loadFromBundle` returns `nil` when the DSN key is absent, which +/// is the consumer's signal that Sentry was not configured (the +/// plugin omits all entries when invoked without a `sentry` +/// argument, or when not registered at all). Treat nil as "Sentry +/// off" — do not init the SDK, do not pass `--sentryDsn` argv flags +/// to the embedded backend (Phase 3). +/// +/// Phase 2 ships the data type and reader; the actual native-side +/// breadcrumb / span / event calls in `NodeJSService` are a Phase +/// 2.5 follow-up because they require linking against `Sentry` +/// (sentry-cocoa), which this PR deliberately doesn't add to the +/// podspec or SwiftPM target. iOS doesn't have the FGS-process +/// init split that Android has — the host app's +/// `@sentry/react-native` already covers the single-process SDK. +struct SentryConfig: Equatable { + let dsn: String + let environment: String + let release: String + let sampleRate: Double? + let tracesSampleRate: Double? + /// Cap on RPC argument bytes captured to Sentry. `nil` (or 0) + /// means RPC arguments are never captured — the default. Only + /// developer debug builds are expected to set this; see plan + /// §7.4.9 for the never-capture list. + let rpcArgsBytes: Int? + /// Per-environment default for the §9 capture-application-data + /// toggle when the user has not yet set it explicitly. `nil` + /// means absent → native treats as `false`. Wired via the + /// plugin so a consumer can opt internal/test builds in by + /// default without changing JS code. + let captureApplicationDataDefault: Bool? + + /// Info.plist keys. Must stay in sync with app.plugin.js's + /// `IOS_KEYS`. Prefixed with `ComapeoCore` to avoid colliding + /// with `@sentry/react-native`'s auto-config keys (`SentryDsn`, + /// …) — those belong to the host's Sentry SDK init. + enum Key { + static let dsn = "ComapeoCoreSentryDsn" + static let environment = "ComapeoCoreSentryEnvironment" + static let release = "ComapeoCoreSentryRelease" + static let sampleRate = "ComapeoCoreSentrySampleRate" + static let tracesSampleRate = "ComapeoCoreSentryTracesSampleRate" + static let rpcArgsBytes = "ComapeoCoreSentryRpcArgsBytes" + static let captureApplicationDataDefault = "ComapeoCoreSentryCaptureApplicationDataDefault" + } + + /// Production entry point. Reads `Bundle.main.infoDictionary` + /// and falls back to `CFBundleShortVersionString + + /// CFBundleVersion` for the release tag when the plugin didn't + /// supply one (§4.1) — successive EAS builds of the same + /// marketing version then get distinct release strings because + /// EAS auto-increments `CFBundleVersion`. + static func loadFromMainBundle() -> SentryConfig? { + let info = Bundle.main.infoDictionary ?? [:] + return load( + from: info, + defaultRelease: { resolveDefaultRelease(info: info) } + ) + } + + /// Pure variant for unit-testing. Takes the plist-equivalent + /// dictionary and a producer for the `release` fallback so + /// tests can run without a real `Bundle.main.infoDictionary`. + /// + /// Returns nil when DSN is absent (sentry-off state). + /// + /// Returns nil with an `NSLog` warning when DSN is present but + /// `environment` is missing. The plugin refuses to prebuild in + /// that state, but a stale prebuild from before the requirement + /// was added would still ship — the original `fatalError` + /// behaviour crashed every cold start with no way to recover. + /// Treat the misconfigured combination as Sentry-off and let + /// the next `expo prebuild` rewrite the plist correctly. + static func load( + from info: [String: Any], + defaultRelease: () -> String + ) -> SentryConfig? { + guard let dsn = info[Key.dsn] as? String, !dsn.isEmpty else { + return nil + } + guard let environment = info[Key.environment] as? String, !environment.isEmpty else { + NSLog( + "[ComapeoCore.SentryConfig] %@ missing from Info.plist while " + + "%@ is set. Re-run `expo prebuild` so the plugin can " + + "rewrite the plist. Sentry disabled until then.", + Key.environment, Key.dsn + ) + return nil + } + let release = (info[Key.release] as? String) ?? defaultRelease() + return SentryConfig( + dsn: dsn, + environment: environment, + release: release, + sampleRate: parseDouble(info[Key.sampleRate]), + tracesSampleRate: parseDouble(info[Key.tracesSampleRate]), + rpcArgsBytes: parseInt(info[Key.rpcArgsBytes]), + captureApplicationDataDefault: parseStrictBool( + info[Key.captureApplicationDataDefault] + ) + ) + } + + /// Default release tag when the plugin didn't supply one + /// (§4.1): `CFBundleShortVersionString + "+" + CFBundleVersion`. + /// Falls back to "unknown+0" if either is absent — never + /// reached on a properly-prebuilt app, but defensive against a + /// misconfigured Info.plist crashing the loader. + private static func resolveDefaultRelease(info: [String: Any]) -> String { + let version = (info["CFBundleShortVersionString"] as? String) ?? "unknown" + let build = (info["CFBundleVersion"] as? String) ?? "0" + return "\(version)+\(build)" + } + + /// Plugin coerces numeric values to strings on the way in to + /// keep parity with the Android side (manifest meta-data is + /// string-typed). Accept either a `String` or a `Double`/`Int` + /// in case a hand-written plist uses native plist types. + private static func parseDouble(_ value: Any?) -> Double? { + if let s = value as? String { return Double(s) } + if let d = value as? Double { return d } + if let i = value as? Int { return Double(i) } + return nil + } + + private static func parseInt(_ value: Any?) -> Int? { + if let s = value as? String { return Int(s) } + if let i = value as? Int { return i } + return nil + } + + /// Strict bool parse: only "true"/"false" (or a real `Bool`) + /// resolve to a value. A stray "1"/"yes" returns nil so the + /// native default isn't silently flipped by a hand-edited + /// plist. + private static func parseStrictBool(_ value: Any?) -> Bool? { + if let b = value as? Bool { return b } + if let s = value as? String { + switch s { + case "true": return true + case "false": return false + default: return nil + } + } + return nil + } +} diff --git a/ios/Tests/SentryConfigTests.swift b/ios/Tests/SentryConfigTests.swift new file mode 100644 index 0000000..5c1cf2a --- /dev/null +++ b/ios/Tests/SentryConfigTests.swift @@ -0,0 +1,185 @@ +import XCTest +@testable import ComapeoCore + +/// Pure unit tests for `SentryConfig.load`. Mirrors the Android +/// `SentryConfigTest` JVM unit tests — same fixture cases, same +/// pure-getter pattern. Runs as part of the swift-test target with +/// no simulator dependency. +/// +/// Coverage rationale: this is the deserialization seam between the +/// Expo plugin's plist writes and the native consumers (SDK init, +/// argv flags). A regression here would silently disable Sentry or +/// ship the wrong environment tag — both of which produce useless +/// Sentry projects rather than visible failures. +final class SentryConfigTests: XCTestCase { + + private let defaultRelease: () -> String = { "1.2.3+42" } + + func testReturnsNilWhenDsnMissing() { + // Mirrors the "plugin not registered, or registered without + // a sentry argument" case: no plist keys were written, so + // loading produces nil — the documented "Sentry off" state. + let config = SentryConfig.load(from: [:], defaultRelease: defaultRelease) + XCTAssertNil(config) + } + + func testReturnsNilWhenDsnEmptyString() { + // Defensive: a plist round-trip can produce an empty-string + // key. Treat that the same as absent. + let config = SentryConfig.load( + from: [SentryConfig.Key.dsn: ""], + defaultRelease: defaultRelease + ) + XCTAssertNil(config) + } + + func testLoadsRequiredFields() { + let config = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://abc@sentry.example/1", + SentryConfig.Key.environment: "production", + ], + defaultRelease: defaultRelease + ) + XCTAssertNotNil(config) + XCTAssertEqual(config?.dsn, "https://abc@sentry.example/1") + XCTAssertEqual(config?.environment, "production") + XCTAssertEqual(config?.release, "1.2.3+42") + XCTAssertNil(config?.sampleRate) + XCTAssertNil(config?.tracesSampleRate) + XCTAssertNil(config?.rpcArgsBytes) + XCTAssertNil(config?.captureApplicationDataDefault) + } + + func testPluginReleaseOverridesDefault() { + // When the consumer passes `release` to the plugin, that + // value wins over CFBundleShortVersionString+CFBundleVersion. + // Used to embed git SHAs from EAS_BUILD_GIT_COMMIT_HASH + // (plan §4.1). + let config = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "qa", + SentryConfig.Key.release: "deadbeef", + ], + defaultRelease: defaultRelease + ) + XCTAssertEqual(config?.release, "deadbeef") + } + + func testParsesNumericStringFields() { + let config = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "production", + SentryConfig.Key.sampleRate: "0.5", + SentryConfig.Key.tracesSampleRate: "0.1", + SentryConfig.Key.rpcArgsBytes: "0", + ], + defaultRelease: defaultRelease + ) + XCTAssertEqual(config?.sampleRate, 0.5) + XCTAssertEqual(config?.tracesSampleRate, 0.1) + XCTAssertEqual(config?.rpcArgsBytes, 0) + } + + func testParsesNumericNativePlistTypes() { + // The plugin coerces values to strings, but a hand-written + // plist may use native plist types (real /). + // Accept both so a developer hand-editing for a one-off + // build doesn't have to remember to quote. + let config = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "production", + SentryConfig.Key.sampleRate: 0.5, + SentryConfig.Key.rpcArgsBytes: 0, + ], + defaultRelease: defaultRelease + ) + XCTAssertEqual(config?.sampleRate, 0.5) + XCTAssertEqual(config?.rpcArgsBytes, 0) + } + + func testUnparseableNumericFieldsAreNil() { + // The plugin coerces values to strings on the way in; if a + // future plugin bug or a hand-edited plist produces an + // unparseable value, we'd rather drop the field than crash. + let config = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "production", + SentryConfig.Key.sampleRate: "not-a-number", + SentryConfig.Key.rpcArgsBytes: "1.5", + ], + defaultRelease: defaultRelease + ) + XCTAssertNil(config?.sampleRate) + XCTAssertNil(config?.rpcArgsBytes) + } + + func testCaptureApplicationDataDefaultParsesString() { + let on = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "qa", + SentryConfig.Key.captureApplicationDataDefault: "true", + ], + defaultRelease: defaultRelease + ) + XCTAssertEqual(on?.captureApplicationDataDefault, true) + + let off = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "production", + SentryConfig.Key.captureApplicationDataDefault: "false", + ], + defaultRelease: defaultRelease + ) + XCTAssertEqual(off?.captureApplicationDataDefault, false) + } + + func testCaptureApplicationDataDefaultParsesNativeBool() { + let config = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "production", + SentryConfig.Key.captureApplicationDataDefault: true, + ], + defaultRelease: defaultRelease + ) + XCTAssertEqual(config?.captureApplicationDataDefault, true) + } + + func testCaptureApplicationDataDefaultStrictness() { + // Only "true"/"false" (or a real Bool) parse. A stray + // "1"/"yes" returns nil → native treats absence as false. + // Defensive against hand-written plists silently flipping + // the default. + let config = SentryConfig.load( + from: [ + SentryConfig.Key.dsn: "https://x@sentry.io/1", + SentryConfig.Key.environment: "qa", + SentryConfig.Key.captureApplicationDataDefault: "yes", + ], + defaultRelease: defaultRelease + ) + XCTAssertNil(config?.captureApplicationDataDefault) + } + + func testMissingEnvironmentReturnsNilNotFatal() { + // The plugin refuses to prebuild without environment (§4.1), + // but a stale prebuild from before that validation was added + // would still ship. The original `fatalError` behaviour + // crashed every cold start with no way to recover. Now we + // log loud and return nil (Sentry off) so the host app + // keeps running; the misconfiguration becomes visible the + // next time someone re-runs `expo prebuild`. + let config = SentryConfig.load( + from: [SentryConfig.Key.dsn: "https://x@sentry.io/1"], + defaultRelease: defaultRelease + ) + XCTAssertNil(config, "DSN-without-environment should disable Sentry rather than crash") + } +} diff --git a/package.json b/package.json index 821c212..60d69b7 100644 --- a/package.json +++ b/package.json @@ -5,8 +5,23 @@ "main": "build/index.js", "types": "build/index.d.ts", "type": "module", + "exports": { + ".": { + "types": "./build/index.d.ts", + "import": "./build/index.js", + "default": "./build/index.js" + }, + "./sentry": { + "types": "./build/sentry.d.ts", + "import": "./build/sentry.js", + "default": "./build/sentry.js" + }, + "./app.plugin": "./app.plugin.js", + "./package.json": "./package.json" + }, "files": [ "expo-module.config.json", + "app.plugin.js", "android/build.gradle", "android/CMakeLists.txt", "android/src/debug/", @@ -75,7 +90,13 @@ "peerDependencies": { "expo": "*", "react": "*", - "react-native": "*" + "react-native": "*", + "@sentry/react-native": "^6" + }, + "peerDependenciesMeta": { + "@sentry/react-native": { + "optional": true + } }, "dependencies": { "@comapeo/ipc": "^8.0.0" diff --git a/src/sentry-internal.ts b/src/sentry-internal.ts new file mode 100644 index 0000000..5c73a97 --- /dev/null +++ b/src/sentry-internal.ts @@ -0,0 +1,11 @@ +import type { SentryAdapter } from "./sentry"; + +let adapter: SentryAdapter | null = null; + +export function setActiveAdapter(next: SentryAdapter | null): void { + adapter = next; +} + +export function activeAdapter(): SentryAdapter | null { + return adapter; +} diff --git a/src/sentry.ts b/src/sentry.ts new file mode 100644 index 0000000..6330631 --- /dev/null +++ b/src/sentry.ts @@ -0,0 +1,259 @@ +/** + * Phase 1 of the Sentry integration (see docs/sentry-integration-plan.md): + * a small JS-side adapter handoff so this module can capture the errors it + * already surfaces via `state` into the host app's already-initialized + * `@sentry/react-native`. This file is the only public Sentry surface and + * is reachable as the `@comapeo/core-react-native/sentry` sub-export. + * + * Imports of this file should never trigger the Sentry SDK to load. The + * SentryAdapter interface is hand-rolled rather than picked from + * `@sentry/react-native`'s type tree so consumers that don't install the + * (optional) peer dep don't get a typecheck error. + */ +import { activeAdapter, setActiveAdapter } from "./sentry-internal"; +import { state } from "./ComapeoCoreModule"; +import type { ComapeoErrorInfo, ComapeoState } from "./ComapeoCore.types"; + +/** + * Sentry severity levels we use. Subset of `@sentry/types`' + * `SeverityLevel` — listed by hand so this file has no compile-time + * dependency on a Sentry package being installed. + */ +type SentrySeverityLevel = "fatal" | "error" | "warning" | "info" | "debug"; + +/** + * Subset of Sentry's `Breadcrumb` shape we use. Plain object literal at + * the call site, not a class — accepted by both `@sentry/react-native` + * and `@sentry/node`. + */ +interface SentryBreadcrumb { + category?: string; + type?: string; + level?: SentrySeverityLevel; + message?: string; + data?: Record; + timestamp?: number; +} + +/** + * Subset of Sentry's `ScopeContext` we pass as the `captureContext` + * second argument to `captureException` / `captureMessage`. Sentry v8 + * accepts this shorthand in place of an `EventHint`. + */ +interface SentryCaptureContext { + level?: SentrySeverityLevel; + tags?: Record; + extra?: Record; + fingerprint?: string[]; +} + +/** + * Opaque span/transaction handle returned by `startSpan`. Treated as + * unknown by this module — Phase 3 (RPC client tracing) will use the + * span via `getTraceData({ span })` from `@sentry/core`, not via this + * type. + */ +interface SentrySpan { + setStatus?(status: { code: number; message?: string }): void; +} + +/** + * The methods this module calls on Sentry. Hand-rolled rather than + * `Pick` so: + * + * 1. Consumers who don't install the optional `@sentry/react-native` + * peer dep don't get a typecheck error from importing this module. + * 2. The contract is explicit: changes to Sentry's type tree don't + * silently widen our coupling. + * + * The signatures match `@sentry/react-native@^6` (which re-exports + * `@sentry/core@^8`) — passing `Sentry` directly satisfies this type + * via structural compatibility. + */ +export interface SentryAdapter { + captureException( + exception: unknown, + captureContext?: SentryCaptureContext, + ): string; + captureMessage( + message: string, + captureContext?: SentryCaptureContext | SentrySeverityLevel, + ): string; + addBreadcrumb(breadcrumb: SentryBreadcrumb): void; + /** + * Phase 3 (RPC client tracing) will read this. Phase 1 doesn't call + * it but the type is part of the contract so consumers see the full + * set up-front. + */ + startSpan( + options: { name: string; op?: string; forceTransaction?: boolean }, + callback: (span: SentrySpan) => T, + ): T; + getActiveSpan(): SentrySpan | undefined; + /** + * Phase 3: continues a trace from incoming `sentry-trace` / `baggage` + * headers. Listed for forward-compat; not called in Phase 1. + */ + continueTrace( + headers: { sentryTrace?: string; baggage?: string }, + callback: () => T, + ): T; +} + +export interface ComapeoSentryConfig { + /** + * The host app's already-initialized `@sentry/react-native` (or any + * object satisfying {@link SentryAdapter}). The module never calls + * `Sentry.init()`; the host app does, and the native SDK is initialized + * from manifest/plist values written by the config plugin (Phase 2). + */ + sentry: SentryAdapter; +} + +/** + * Disposer returned by {@link configureSentry}. Detaches the listeners + * and clears the stored adapter. Idempotent. Mostly useful for tests; + * production code wires `configureSentry` once at app start and never + * tears it down. + */ +export type ComapeoSentryDisposer = () => void; + +/** + * Hand the host app's Sentry adapter to this module. Idempotent — + * calling twice replaces the previous adapter and detaches the previous + * listeners. Returns a disposer for tests. + * + * Must be called once at app start (after the host has called + * `Sentry.init()`). State observers attach immediately; the §6.2 RPC + * client tracing path (Phase 3) reads {@link activeAdapter} at call + * time, so the order of `configureSentry` vs. first `comapeo.*` call + * does not matter for that path. + * + * This does NOT configure DSN / environment / release. Those are baked + * into native config at build time by the Expo plugin (`app.plugin.js`, + * §4.1) and read by both `@sentry/react-native` (in the main process) + * and the embedded backend (Phase 3). + */ +export function configureSentry( + config: ComapeoSentryConfig, +): ComapeoSentryDisposer { + const adapter = config.sentry; + + // Detach any previous listeners — calling configureSentry twice is + // valid (e.g. a host that re-inits Sentry with a fresh DSN), and the + // previous closures captured the previous adapter, so we have to + // unregister them or duplicate breadcrumbs would land on every event. + detachListeners(); + + setActiveAdapter(adapter); + attachListeners(adapter); + + return () => { + detachListeners(); + if (activeAdapter() === adapter) setActiveAdapter(null); + }; +} + +// ── State listeners ───────────────────────────────────────────── + +let stateChangeListener: + | ((s: ComapeoState, info: ComapeoErrorInfo | null) => void) + | null = null; +let messageErrorListener: ((err: Error) => void) | null = null; + +function attachListeners(adapter: SentryAdapter): void { + // Per §7.4.1 every transition becomes a breadcrumb. Phase 1 emits + // these from the JS adapter (main process only); Phase 2b adds + // FGS-side emissions from `NodeJSService` so the FGS-process scope + // gets logcat / foreground-state context. The two sources land on + // different scopes and Sentry de-dupes via fingerprinting on the + // eventual `captureException`. + stateChangeListener = (s, info) => { + adapter.addBreadcrumb({ + category: "comapeo.state", + type: "state", + level: s === "ERROR" ? "error" : "info", + message: `comapeo state → ${s}`, + data: info + ? { + state: s, + errorPhase: info.errorPhase, + errorMessage: info.errorMessage, + } + : { state: s }, + }); + + // Per §6.3 ERROR transitions also fire a captureException. The + // synthesized Error encodes the phase in the name so Sentry's + // grouping treats e.g. rootkey vs. starting-timeout as distinct + // issues without us having to maintain a fingerprint table. The + // `proc:main` tag pairs with Phase 2b's `proc:fgs` capture from + // the FGS process, so a single error fanned out to multiple + // scopes is still filterable per-process in the dashboard. + if (s === "ERROR" && info) { + const e = new Error(info.errorMessage); + e.name = `ComapeoError:${info.errorPhase}`; + adapter.captureException(e, { + tags: { + layer: "rn", + proc: "main", + "comapeo.phase": info.errorPhase, + "comapeo.state": s, + }, + }); + } + }; + + // Per §6.3 messageerror is a control-socket parse failure; it never + // changes the lifecycle state, so we capture as a warning rather + // than escalating to error. `state.messageerror` already wraps the + // native payload in an Error for ergonomics — but the wrapped + // message can include arbitrary bytes from a corrupted control + // frame (the parser surfaces the offending input verbatim for + // debugging). Truncate before forwarding to Sentry so a runaway + // payload can't blow event size limits or smuggle binary data + // into the dashboard. + messageErrorListener = (err) => { + const truncated = truncateForSentry(err.message); + const wrapped = new Error(truncated); + wrapped.name = err.name; + adapter.captureException(wrapped, { + tags: { + layer: "rn", + proc: "main", + source: "control-socket", + }, + level: "warning", + }); + }; + + state.addListener("stateChange", stateChangeListener); + state.addListener("messageerror", messageErrorListener); +} + +/** + * Cap on payload bytes we forward into a Sentry event. The control + * socket framing (`backend/lib/message-port.js`) accepts up to + * ~16 MB per frame; a corrupted frame the parser couldn't decode + * could include arbitrary binary bytes. Sentry's per-event size + * limit is much smaller, and binary noise is useless on the + * dashboard. 256 chars retains the human-debuggable prefix while + * keeping events cheap. + */ +const MESSAGE_ERROR_MAX_LEN = 256; + +function truncateForSentry(input: string): string { + if (input.length <= MESSAGE_ERROR_MAX_LEN) return input; + return `${input.slice(0, MESSAGE_ERROR_MAX_LEN)}… [truncated ${input.length - MESSAGE_ERROR_MAX_LEN} chars]`; +} + +function detachListeners(): void { + if (stateChangeListener) { + state.removeListener("stateChange", stateChangeListener); + stateChangeListener = null; + } + if (messageErrorListener) { + state.removeListener("messageerror", messageErrorListener); + messageErrorListener = null; + } +}