From af751d69fd73c951a3c28796c95bd5f38aeb9f2c Mon Sep 17 00:00:00 2001
From: JarbasAi <jarbasai@mailfence.com>
Date: Sun, 28 Jun 2026 00:34:10 +0100
Subject: [PATCH 1/3] fix: MockTTS destructor must not stop the shared playback
 thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TTS.playback is a class-level attribute shared by every TTS instance in the
process. The inherited TTS.__del__ chains into TTS.stop() -> TTS.playback.stop(),
so when an earlier PlaybackServiceHarness's MockTTS is garbage-collected its
destructor terminated whatever PlaybackThread was *currently* registered there
— which by then belongs to a later, still-running harness. The victim thread
had _terminated set and exited its loop, so its queued speak never played and
ovos.audio.output.ended was never emitted, hanging the next speak() until
timeout. GC timing made this a flaky TimeoutError that surfaced only after
several harness create/destroy cycles (e.g. mid-file in a consumer's
test/end2end suite).

Override MockTTS.__del__ as a no-op: the harness already manages playback-thread
lifecycle explicitly via PlaybackService.shutdown() on context exit, so a mock
instance must never tear down the shared thread on collection.

Add regression tests: a deterministic guard that fires a stale mock's
destructor while a later harness owns TTS.playback and asserts the live
thread is neither terminated nor unable to keep speaking, plus a
many-sequential-harnesses smoke test.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 ovoscope/audio.py                    | 20 ++++++++
 test/unittests/test_audio_harness.py | 74 ++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/ovoscope/audio.py b/ovoscope/audio.py
index a1d816e..bd504c1 100644
--- a/ovoscope/audio.py
+++ b/ovoscope/audio.py
@@ -512,6 +512,26 @@ def reset(self) -> None:
         """Clear the list of recorded spoken utterances."""
         self.spoken_utterances.clear()
 
+    def __del__(self) -> None:
+        """No-op destructor.
+
+        ``TTS.__del__`` chains into ``TTS.shutdown() -> TTS.stop() ->
+        TTS.playback.stop()``. ``TTS.playback`` is a **class-level** attribute
+        shared by every TTS instance in the process, so when an earlier
+        harness's MockTTS is garbage-collected its inherited destructor stops
+        whatever PlaybackThread is *currently* registered there — which, by
+        then, belongs to a later, still-running harness. The victim thread sets
+        ``_terminated`` and exits mid-run, so its queued speak never plays and
+        ``ovos.audio.output.ended`` is never emitted, hanging the next
+        ``speak()`` wait.
+
+        GC timing is nondeterministic, so the failure surfaces as a flaky
+        ``TimeoutError`` only after several harness instances have been created
+        and collected. The harness already manages thread lifecycle explicitly
+        via ``PlaybackService.shutdown()`` on context exit, so a MockTTS
+        instance must never tear down the shared playback thread on collection.
+        """
+
 
 # ---------------------------------------------------------------------------
 # PlaybackServiceHarness
diff --git a/test/unittests/test_audio_harness.py b/test/unittests/test_audio_harness.py
index 2405fde..009c05c 100644
--- a/test/unittests/test_audio_harness.py
+++ b/test/unittests/test_audio_harness.py
@@ -34,6 +34,7 @@
 from ovos_utils.fakebus import FakeBus
 
 if AUDIO_AVAILABLE:
+    from ovos_plugin_manager.templates.tts import TTS
     from ovoscope.audio import (
         AudioCaptureSession,
         AudioServiceHarness,
@@ -493,5 +494,78 @@ def test_speak_lifecycle_via_bridging(self) -> None:
             h.assert_audio_output_ended()
 
 
+@unittest.skipUnless(AUDIO_AVAILABLE, "ovos-audio (audio extra) not installed")
+class TestPlaybackServiceHarnessIsolation(unittest.TestCase):
+    """Repeated, independent harness instances must not interfere.
+
+    Regression for the shared ``TTS.playback`` class-attribute hazard: a
+    garbage-collected MockTTS from an earlier harness used to stop the
+    PlaybackThread of a *later*, still-running harness (via the inherited
+    ``TTS.__del__`` -> ``TTS.stop`` -> ``TTS.playback.stop()`` chain). The
+    victim thread terminated mid-run, its queued speak never played, and the
+    next ``speak()`` hung until timeout. Because GC timing is nondeterministic
+    this manifested as a flaky ``TimeoutError`` only after several
+    create/destroy cycles.
+    """
+
+    def test_many_sequential_harnesses_each_complete_speaks(self) -> None:
+        """Boot and tear down many harnesses, forcing GC between them, and
+        require every speak in every harness to complete deterministically."""
+        import gc
+
+        for i in range(12):
+            with PlaybackServiceHarness() as h:
+                for tag in ("a", "b", "c"):
+                    # unique sentences so the persistent TTS cache never
+                    # short-circuits synthesis — each must drive real playback
+                    h.speak(f"iter {i} part {tag}", timeout=8.0)
+                    self.assertIn(f"iter {i} part {tag}",
+                                  h.mock_tts.spoken_utterances)
+            # provoke collection of the just-exited MockTTS *now*, while a
+            # fresh harness will shortly own TTS.playback. Pre-fix, this is
+            # exactly what killed the next harness's playback thread.
+            gc.collect()
+
+    def test_stale_mock_destructor_does_not_kill_live_thread(self) -> None:
+        """A finished harness's MockTTS destructor must not terminate the
+        playback thread that a *later* harness now owns.
+
+        Deterministic reproduction of the GC race: keep a reference to harness
+        A's MockTTS so it outlives A, open harness B (which registers its own
+        thread on the shared ``TTS.playback`` class attribute), then run A's
+        destructor. Pre-fix, ``MockTTS.__del__`` chained into
+        ``TTS.playback.stop()`` and terminated B's live thread; B's next speak
+        would then hang. With the no-op destructor, B is unaffected.
+        """
+        # Harness A — produce a MockTTS that survives the context exit.
+        with PlaybackServiceHarness() as ha:
+            ha.speak("harness A warmup", timeout=8.0)
+        stale_mock = ha.mock_tts
+
+        # Harness B now owns the shared TTS.playback thread.
+        with PlaybackServiceHarness() as hb:
+            self.assertIs(TTS.playback, hb.svc.playback_thread)
+            self.assertTrue(hb.svc.playback_thread.is_alive())
+
+            # Fire harness A's destructor explicitly (what GC would do).
+            stale_mock.__del__()
+
+            # The precise invariant: A's destructor must not have flagged B's
+            # thread for termination. ``_terminated`` is checked at the top of
+            # the playback loop, so a single in-flight speak can still slip
+            # through even when set — but the thread would then exit on its next
+            # iteration, hanging a subsequent speak. Assert the flag directly.
+            self.assertFalse(
+                hb.svc.playback_thread._terminated,
+                "stale MockTTS destructor terminated the live playback thread",
+            )
+
+            # And B must keep working across multiple speaks (the loop must not
+            # have exited).
+            for n in range(3):
+                hb.speak(f"harness B speak {n}", timeout=8.0)
+            self.assertTrue(hb.svc.playback_thread.is_alive())
+
+
 if __name__ == "__main__":
     unittest.main()

From e4227be4a3852fa1699a85271e8d1216b6f28968 Mon Sep 17 00:00:00 2001
From: JarbasAi <jarbasai@mailfence.com>
Date: Mon, 29 Jun 2026 02:25:14 +0100
Subject: [PATCH 2/3] =?UTF-8?q?feat:=20MockTTS=20=E2=80=94=20emit=20audio?=
 =?UTF-8?q?=5Foutput=5Fend=20on=20delay=20for=20speak=5Fdialog(wait=3DTrue?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Skills calling speak_dialog(..., wait=True) block on
recognizer_loop:audio_output_end via SessionManager.wait_while_speaking.
Without a real TTS the handler thread blocks for 15+s, tripping the §8.3
10s handler backstop and spurious handler.error.

MockTTS schedules audio_output_end on a 0.1s Timer from the speak handler.
Uses bus.ee.emit (not bus.emit) to bypass FakeBus namespace-migration and
on_message side effects so the synthetic event is invisible to test captures.
---
 AGENTS.md            | 49 ++++++++++++++++++++++++++++++++++++++++++++
 TODO.md              | 11 ++++++++++
 ovoscope/__init__.py | 18 ++++++++++++++++
 3 files changed, 78 insertions(+)
 create mode 100644 AGENTS.md
 create mode 100644 TODO.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..5b35db1
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,49 @@
+# ovoscope — agent guide
+
+End-to-end test framework for OpenVoiceOS skills: boots a full OVOS Core intent pipeline in-process on a `FakeBus` (no server, no audio stack, no network), emits a test utterance, and asserts on every bus message that comes back.
+
+## Setup
+```bash
+pip install -e .            # core (pulls ovos-core>=2.0.4a2)
+pip install -e .[dev]       # + ovos-audio, ovos-pydantic-models, pytest, pytest-cov
+```
+Optional extras: `[audio]` (listener/audio/playback harnesses), `[pydantic]` (typed message bridge to `ovos-pydantic-models`).
+
+## Test
+```bash
+pytest test/unittests/
+```
+`pyproject.toml` sets `testpaths = ["test"]` and a 60s per-test timeout. CI runs with `install_extras: audio,pydantic`.
+
+## Lint/Typecheck
+A `lint.yml` workflow exists (via gh-automations). No local lint/typecheck config in `pyproject.toml`.
+
+## Layout
+- `ovoscope/__init__.py` — core API: `MiniCroft` (subclasses `SkillManager`, runs on `FakeBus`), `get_minicroft()`, `CaptureSession`, `End2EndTest`, `GUICaptureSession`, pipeline stage-group constants (`ADAPT_PIPELINE`, `PADATIOUS_PIPELINE`, `PADACIOSO_PIPELINE`, `FALLBACK_PIPELINE`, `PERSONA_PIPELINE`, `M2V_PIPELINE`, `DEFAULT_TEST_PIPELINE`, `LIGHT_TEST_PIPELINE`), `is_pipeline_available()`, and global bus-coverage monkey-patching of `FakeBus`/`OVOSSkill`.
+- `ovoscope/pytest_plugin.py` — `minicroft` and `bus_coverage_session` fixtures; registered via the `pytest11` entry point (auto-loaded when installed).
+- `ovoscope/cli.py` — `ovoscope` console script: `record`, `run`, `diff`, `validate`, `coverage` subcommands.
+- `ovoscope/setup_skill.py` — `ovoscope-setup` console script that installs the ovoscope helper skill into AI coding assistants.
+- Specialised harnesses: `listener.py` (MiniListener: STT/VAD/WakeWord), `audio.py` (audio/playback/TTS mocks), `ocp.py` + `media.py` (OCP/media), `phal.py` (PHAL plugins), `pipeline.py` (pipeline plugins).
+- `bus_coverage.py` / `coverage.py` — per-test bus-message coverage and workspace-wide E2E coverage scanning.
+- `diff.py`, `remote_recorder.py`, `pydantic_helpers.py` — fixture diffing, live-bus fixture recording, typed-model bridge.
+- `test/unittests/` — unit tests.
+
+Entry-point groups: `console_scripts` (`ovoscope`, `ovoscope-setup`) and `pytest11` (`ovoscope`). This is a testing tool, not an OPM plugin or skill.
+
+## Conventions (Org hard rules)
+- Branches: `dev` for work, `master` for stable. NEVER `main`.
+- Never edit `ovoscope/version.py`; gh-automations bumps semver from conventional-commit prefixes (`feat:`/`fix:`/`feat!:`).
+- New repos private by default.
+- Commit identity: JarbasAI <jarbasai@mailfence.com>.
+- Reference `OpenVoiceOS/gh-automations` reusable workflows at `@dev`.
+- No Neon / `neon-*` references.
+- No meta-commentary (no history, dates, or design-decision narration) in code, docs, commits, or PRs.
+- CI is provided by `OpenVoiceOS/gh-automations`.
+
+## Gotchas
+- Depends on an alpha pin `ovos-core>=2.0.4a2` (stable 2.0.4 not yet released) for FakeBus-compatible `SkillManager`.
+- `MiniCroft` mutates the global `Configuration()` singleton dict cache and `SessionManager.default_session` (pipeline, lang, blacklists) and restores them in `stop()` — always pair construction with `stop()` (or use `get_minicroft`/managed `End2EndTest`). `Configuration.reload()` does not invalidate the live dict cache, so it patches the singleton directly.
+- Importing `ovoscope` immediately monkey-patches `FakeBus.on/once/emit` and `OVOSSkill.add_event/bind` for global bus coverage.
+- Pipeline auto-selection: with `isolate_config=True` (default) it uses `DEFAULT_TEST_PIPELINE` if Adapt+Padatious are installed, else falls back to `LIGHT_TEST_PIPELINE` (pure-Python, no swig). `DEFAULT_TEST_PIPELINE` deliberately excludes persona/Ollama/OCP/m2v stages.
+- `End2EndTest` checks only the keys you list in `expected.data`/`expected.context`; extra keys in received messages are ignored. GUI messages are ignored by default (`ignore_gui=True`).
+- `audio` and `listener` submodule imports are guarded: missing optional deps are silenced, but a genuine import error in those modules is re-raised.
diff --git a/TODO.md b/TODO.md
new file mode 100644
index 0000000..881caeb
--- /dev/null
+++ b/TODO.md
@@ -0,0 +1,11 @@
+# TODO — ovoscope
+
+## Open issues
+- [ ] #33 Dependency Dashboard (Renovate bot meta-issue)
+
+## Gaps
+- [ ] Core dependency is an alpha pin (`ovos-core>=2.0.4a2`); revisit once stable 2.0.4 ships.
+- [ ] No local lint/typecheck config in `pyproject.toml` (a `lint.yml` workflow exists via gh-automations, but there is no flake8/ruff/mypy setting to run locally).
+
+## Code TODOs
+None found.
diff --git a/ovoscope/__init__.py b/ovoscope/__init__.py
index 88cdf21..bdc5b78 100644
--- a/ovoscope/__init__.py
+++ b/ovoscope/__init__.py
@@ -15,6 +15,7 @@
 from ovos_utils.fakebus import FakeBus
 from ovos_utils.log import LOG
 from ovos_utils.process_utils import ProcessState
+from ovos_spec_tools import SpecMessage
 from ovos_workshop.skills.ovos import OVOSSkill
 
 SerializedMessage = Dict[str, Union[str, Dict[str, Any]]]
@@ -391,6 +392,23 @@ def __init__(self, skill_ids,
         bus = FakeBus(modernize=self._modernize,
                       emit_legacy=self._emit_legacy)
         bus.on("message", self.handle_boot_message)
+
+        # TTS mock: speak_dialog(…, wait=True) blocks on
+        # recognizer_loop:audio_output_end. Since there is no real TTS we
+        # schedule a short-delay emit to unblock the handler.
+        # This uses bus.ee.emit (not bus.emit) to bypass FakeBus's
+        # namespace-migration and on_message side effects so the synthetic
+        # event does not appear in test captures or reset session state.
+        def _mock_tts(message):
+            sess = SessionManager.get(message)
+            threading.Timer(0.1, lambda: bus.ee.emit(
+                "recognizer_loop:audio_output_end",
+                Message("recognizer_loop:audio_output_end",
+                        context={"session": sess.serialize()})
+            )).start()
+
+        bus.on(SpecMessage.SPEAK, _mock_tts)
+
         self.skill_ids = skill_ids
         self.extra_skills = extra_skills or {}
 

From a4c8efaffb4d271133c6f22125933dd0b0e26a90 Mon Sep 17 00:00:00 2001
From: JarbasAi <jarbasai@mailfence.com>
Date: Mon, 29 Jun 2026 02:48:07 +0100
Subject: [PATCH 3/3] chore: drop agent scratch (AGENTS.md, TODO.md) from the
 PR

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 AGENTS.md | 49 -------------------------------------------------
 TODO.md   | 11 -----------
 2 files changed, 60 deletions(-)
 delete mode 100644 AGENTS.md
 delete mode 100644 TODO.md

diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index 5b35db1..0000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# ovoscope — agent guide
-
-End-to-end test framework for OpenVoiceOS skills: boots a full OVOS Core intent pipeline in-process on a `FakeBus` (no server, no audio stack, no network), emits a test utterance, and asserts on every bus message that comes back.
-
-## Setup
-```bash
-pip install -e .            # core (pulls ovos-core>=2.0.4a2)
-pip install -e .[dev]       # + ovos-audio, ovos-pydantic-models, pytest, pytest-cov
-```
-Optional extras: `[audio]` (listener/audio/playback harnesses), `[pydantic]` (typed message bridge to `ovos-pydantic-models`).
-
-## Test
-```bash
-pytest test/unittests/
-```
-`pyproject.toml` sets `testpaths = ["test"]` and a 60s per-test timeout. CI runs with `install_extras: audio,pydantic`.
-
-## Lint/Typecheck
-A `lint.yml` workflow exists (via gh-automations). No local lint/typecheck config in `pyproject.toml`.
-
-## Layout
-- `ovoscope/__init__.py` — core API: `MiniCroft` (subclasses `SkillManager`, runs on `FakeBus`), `get_minicroft()`, `CaptureSession`, `End2EndTest`, `GUICaptureSession`, pipeline stage-group constants (`ADAPT_PIPELINE`, `PADATIOUS_PIPELINE`, `PADACIOSO_PIPELINE`, `FALLBACK_PIPELINE`, `PERSONA_PIPELINE`, `M2V_PIPELINE`, `DEFAULT_TEST_PIPELINE`, `LIGHT_TEST_PIPELINE`), `is_pipeline_available()`, and global bus-coverage monkey-patching of `FakeBus`/`OVOSSkill`.
-- `ovoscope/pytest_plugin.py` — `minicroft` and `bus_coverage_session` fixtures; registered via the `pytest11` entry point (auto-loaded when installed).
-- `ovoscope/cli.py` — `ovoscope` console script: `record`, `run`, `diff`, `validate`, `coverage` subcommands.
-- `ovoscope/setup_skill.py` — `ovoscope-setup` console script that installs the ovoscope helper skill into AI coding assistants.
-- Specialised harnesses: `listener.py` (MiniListener: STT/VAD/WakeWord), `audio.py` (audio/playback/TTS mocks), `ocp.py` + `media.py` (OCP/media), `phal.py` (PHAL plugins), `pipeline.py` (pipeline plugins).
-- `bus_coverage.py` / `coverage.py` — per-test bus-message coverage and workspace-wide E2E coverage scanning.
-- `diff.py`, `remote_recorder.py`, `pydantic_helpers.py` — fixture diffing, live-bus fixture recording, typed-model bridge.
-- `test/unittests/` — unit tests.
-
-Entry-point groups: `console_scripts` (`ovoscope`, `ovoscope-setup`) and `pytest11` (`ovoscope`). This is a testing tool, not an OPM plugin or skill.
-
-## Conventions (Org hard rules)
-- Branches: `dev` for work, `master` for stable. NEVER `main`.
-- Never edit `ovoscope/version.py`; gh-automations bumps semver from conventional-commit prefixes (`feat:`/`fix:`/`feat!:`).
-- New repos private by default.
-- Commit identity: JarbasAI <jarbasai@mailfence.com>.
-- Reference `OpenVoiceOS/gh-automations` reusable workflows at `@dev`.
-- No Neon / `neon-*` references.
-- No meta-commentary (no history, dates, or design-decision narration) in code, docs, commits, or PRs.
-- CI is provided by `OpenVoiceOS/gh-automations`.
-
-## Gotchas
-- Depends on an alpha pin `ovos-core>=2.0.4a2` (stable 2.0.4 not yet released) for FakeBus-compatible `SkillManager`.
-- `MiniCroft` mutates the global `Configuration()` singleton dict cache and `SessionManager.default_session` (pipeline, lang, blacklists) and restores them in `stop()` — always pair construction with `stop()` (or use `get_minicroft`/managed `End2EndTest`). `Configuration.reload()` does not invalidate the live dict cache, so it patches the singleton directly.
-- Importing `ovoscope` immediately monkey-patches `FakeBus.on/once/emit` and `OVOSSkill.add_event/bind` for global bus coverage.
-- Pipeline auto-selection: with `isolate_config=True` (default) it uses `DEFAULT_TEST_PIPELINE` if Adapt+Padatious are installed, else falls back to `LIGHT_TEST_PIPELINE` (pure-Python, no swig). `DEFAULT_TEST_PIPELINE` deliberately excludes persona/Ollama/OCP/m2v stages.
-- `End2EndTest` checks only the keys you list in `expected.data`/`expected.context`; extra keys in received messages are ignored. GUI messages are ignored by default (`ignore_gui=True`).
-- `audio` and `listener` submodule imports are guarded: missing optional deps are silenced, but a genuine import error in those modules is re-raised.
diff --git a/TODO.md b/TODO.md
deleted file mode 100644
index 881caeb..0000000
--- a/TODO.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# TODO — ovoscope
-
-## Open issues
-- [ ] #33 Dependency Dashboard (Renovate bot meta-issue)
-
-## Gaps
-- [ ] Core dependency is an alpha pin (`ovos-core>=2.0.4a2`); revisit once stable 2.0.4 ships.
-- [ ] No local lint/typecheck config in `pyproject.toml` (a `lint.yml` workflow exists via gh-automations, but there is no flake8/ruff/mypy setting to run locally).
-
-## Code TODOs
-None found.