diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py
index f4a674cb5d..00cc2d77d0 100644
--- a/dimos/core/native_module.py
+++ b/dimos/core/native_module.py
@@ -183,11 +183,17 @@ def stop(self) -> None:
                 )
                 self._process.kill()
                 self._process.wait(timeout=5)
-        if self._watchdog is not None and self._watchdog is not threading.current_thread():
-            self._watchdog.join(timeout=2)
-        self._watchdog = None
         self._process = None
         super().stop()
+        # Join the watchdog AFTER super().stop() so all module threads are
+        # cleaned up first.  When the watchdog itself is the caller (crash
+        # path), it skips joining itself — but the thread exits naturally
+        # right after this returns.  A second stop() from external code
+        # (e.g. test teardown) will reach here and join the now-finished
+        # watchdog thread, preventing monitor_threads from seeing a leak.
+        if self._watchdog is not None and self._watchdog is not threading.current_thread():
+            self._watchdog.join(timeout=2)
+            self._watchdog = None
 
     def _watch_process(self) -> None:
         """Block until the native process exits; trigger stop() if it crashed."""
diff --git a/dimos/core/test_native_module.py b/dimos/core/test_native_module.py
index e77b8f9a53..a7e6bd2b9a 100644
--- a/dimos/core/test_native_module.py
+++ b/dimos/core/test_native_module.py
@@ -107,6 +107,10 @@ def test_process_crash_triggers_stop() -> None:
 
     assert mod._process is None, f"Watchdog did not clean up after process {pid} died"
 
+    # Join the watchdog thread. stop() is idempotent but will now join the
+    # watchdog on the second call since the reference is preserved.
+    mod.stop()
+
 
 @pytest.mark.slow
 def test_manual(dimos_cluster: ModuleCoordinator, args_file: str) -> None: