From af751d69fd73c951a3c28796c95bd5f38aeb9f2c Mon Sep 17 00:00:00 2001 From: JarbasAi Date: Sun, 28 Jun 2026 00:34:10 +0100 Subject: [PATCH 1/3] fix: MockTTS destructor must not stop the shared playback thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TTS.playback is a class-level attribute shared by every TTS instance in the process. The inherited TTS.__del__ chains into TTS.stop() -> TTS.playback.stop(), so when an earlier PlaybackServiceHarness's MockTTS is garbage-collected its destructor terminated whatever PlaybackThread was *currently* registered there — which by then belongs to a later, still-running harness. The victim thread had _terminated set and exited its loop, so its queued speak never played and ovos.audio.output.ended was never emitted, hanging the next speak() until timeout. GC timing made this a flaky TimeoutError that surfaced only after several harness create/destroy cycles (e.g. mid-file in a consumer's test/end2end suite). Override MockTTS.__del__ as a no-op: the harness already manages playback-thread lifecycle explicitly via PlaybackService.shutdown() on context exit, so a mock instance must never tear down the shared thread on collection. Add regression tests: a deterministic guard that fires a stale mock's destructor while a later harness owns TTS.playback and asserts the live thread is neither terminated nor unable to keep speaking, plus a many-sequential-harnesses smoke test. Co-Authored-By: Claude Opus 4.8 --- ovoscope/audio.py | 20 ++++++++ test/unittests/test_audio_harness.py | 74 ++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/ovoscope/audio.py b/ovoscope/audio.py index a1d816e..bd504c1 100644 --- a/ovoscope/audio.py +++ b/ovoscope/audio.py @@ -512,6 +512,26 @@ def reset(self) -> None: """Clear the list of recorded spoken utterances.""" self.spoken_utterances.clear() + def __del__(self) -> None: + """No-op destructor. + + ``TTS.__del__`` chains into ``TTS.shutdown() -> TTS.stop() -> + TTS.playback.stop()``. ``TTS.playback`` is a **class-level** attribute + shared by every TTS instance in the process, so when an earlier + harness's MockTTS is garbage-collected its inherited destructor stops + whatever PlaybackThread is *currently* registered there — which, by + then, belongs to a later, still-running harness. The victim thread sets + ``_terminated`` and exits mid-run, so its queued speak never plays and + ``ovos.audio.output.ended`` is never emitted, hanging the next + ``speak()`` wait. + + GC timing is nondeterministic, so the failure surfaces as a flaky + ``TimeoutError`` only after several harness instances have been created + and collected. The harness already manages thread lifecycle explicitly + via ``PlaybackService.shutdown()`` on context exit, so a MockTTS + instance must never tear down the shared playback thread on collection. + """ + # --------------------------------------------------------------------------- # PlaybackServiceHarness diff --git a/test/unittests/test_audio_harness.py b/test/unittests/test_audio_harness.py index 2405fde..009c05c 100644 --- a/test/unittests/test_audio_harness.py +++ b/test/unittests/test_audio_harness.py @@ -34,6 +34,7 @@ from ovos_utils.fakebus import FakeBus if AUDIO_AVAILABLE: + from ovos_plugin_manager.templates.tts import TTS from ovoscope.audio import ( AudioCaptureSession, AudioServiceHarness, @@ -493,5 +494,78 @@ def test_speak_lifecycle_via_bridging(self) -> None: h.assert_audio_output_ended() +@unittest.skipUnless(AUDIO_AVAILABLE, "ovos-audio (audio extra) not installed") +class TestPlaybackServiceHarnessIsolation(unittest.TestCase): + """Repeated, independent harness instances must not interfere. + + Regression for the shared ``TTS.playback`` class-attribute hazard: a + garbage-collected MockTTS from an earlier harness used to stop the + PlaybackThread of a *later*, still-running harness (via the inherited + ``TTS.__del__`` -> ``TTS.stop`` -> ``TTS.playback.stop()`` chain). The + victim thread terminated mid-run, its queued speak never played, and the + next ``speak()`` hung until timeout. Because GC timing is nondeterministic + this manifested as a flaky ``TimeoutError`` only after several + create/destroy cycles. + """ + + def test_many_sequential_harnesses_each_complete_speaks(self) -> None: + """Boot and tear down many harnesses, forcing GC between them, and + require every speak in every harness to complete deterministically.""" + import gc + + for i in range(12): + with PlaybackServiceHarness() as h: + for tag in ("a", "b", "c"): + # unique sentences so the persistent TTS cache never + # short-circuits synthesis — each must drive real playback + h.speak(f"iter {i} part {tag}", timeout=8.0) + self.assertIn(f"iter {i} part {tag}", + h.mock_tts.spoken_utterances) + # provoke collection of the just-exited MockTTS *now*, while a + # fresh harness will shortly own TTS.playback. Pre-fix, this is + # exactly what killed the next harness's playback thread. + gc.collect() + + def test_stale_mock_destructor_does_not_kill_live_thread(self) -> None: + """A finished harness's MockTTS destructor must not terminate the + playback thread that a *later* harness now owns. + + Deterministic reproduction of the GC race: keep a reference to harness + A's MockTTS so it outlives A, open harness B (which registers its own + thread on the shared ``TTS.playback`` class attribute), then run A's + destructor. Pre-fix, ``MockTTS.__del__`` chained into + ``TTS.playback.stop()`` and terminated B's live thread; B's next speak + would then hang. With the no-op destructor, B is unaffected. + """ + # Harness A — produce a MockTTS that survives the context exit. + with PlaybackServiceHarness() as ha: + ha.speak("harness A warmup", timeout=8.0) + stale_mock = ha.mock_tts + + # Harness B now owns the shared TTS.playback thread. + with PlaybackServiceHarness() as hb: + self.assertIs(TTS.playback, hb.svc.playback_thread) + self.assertTrue(hb.svc.playback_thread.is_alive()) + + # Fire harness A's destructor explicitly (what GC would do). + stale_mock.__del__() + + # The precise invariant: A's destructor must not have flagged B's + # thread for termination. ``_terminated`` is checked at the top of + # the playback loop, so a single in-flight speak can still slip + # through even when set — but the thread would then exit on its next + # iteration, hanging a subsequent speak. Assert the flag directly. + self.assertFalse( + hb.svc.playback_thread._terminated, + "stale MockTTS destructor terminated the live playback thread", + ) + + # And B must keep working across multiple speaks (the loop must not + # have exited). + for n in range(3): + hb.speak(f"harness B speak {n}", timeout=8.0) + self.assertTrue(hb.svc.playback_thread.is_alive()) + + if __name__ == "__main__": unittest.main() From e4227be4a3852fa1699a85271e8d1216b6f28968 Mon Sep 17 00:00:00 2001 From: JarbasAi Date: Mon, 29 Jun 2026 02:25:14 +0100 Subject: [PATCH 2/3] =?UTF-8?q?feat:=20MockTTS=20=E2=80=94=20emit=20audio?= =?UTF-8?q?=5Foutput=5Fend=20on=20delay=20for=20speak=5Fdialog(wait=3DTrue?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skills calling speak_dialog(..., wait=True) block on recognizer_loop:audio_output_end via SessionManager.wait_while_speaking. Without a real TTS the handler thread blocks for 15+s, tripping the §8.3 10s handler backstop and spurious handler.error. MockTTS schedules audio_output_end on a 0.1s Timer from the speak handler. Uses bus.ee.emit (not bus.emit) to bypass FakeBus namespace-migration and on_message side effects so the synthetic event is invisible to test captures. --- AGENTS.md | 49 ++++++++++++++++++++++++++++++++++++++++++++ TODO.md | 11 ++++++++++ ovoscope/__init__.py | 18 ++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 AGENTS.md create mode 100644 TODO.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..5b35db1 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,49 @@ +# ovoscope — agent guide + +End-to-end test framework for OpenVoiceOS skills: boots a full OVOS Core intent pipeline in-process on a `FakeBus` (no server, no audio stack, no network), emits a test utterance, and asserts on every bus message that comes back. + +## Setup +```bash +pip install -e . # core (pulls ovos-core>=2.0.4a2) +pip install -e .[dev] # + ovos-audio, ovos-pydantic-models, pytest, pytest-cov +``` +Optional extras: `[audio]` (listener/audio/playback harnesses), `[pydantic]` (typed message bridge to `ovos-pydantic-models`). + +## Test +```bash +pytest test/unittests/ +``` +`pyproject.toml` sets `testpaths = ["test"]` and a 60s per-test timeout. CI runs with `install_extras: audio,pydantic`. + +## Lint/Typecheck +A `lint.yml` workflow exists (via gh-automations). No local lint/typecheck config in `pyproject.toml`. + +## Layout +- `ovoscope/__init__.py` — core API: `MiniCroft` (subclasses `SkillManager`, runs on `FakeBus`), `get_minicroft()`, `CaptureSession`, `End2EndTest`, `GUICaptureSession`, pipeline stage-group constants (`ADAPT_PIPELINE`, `PADATIOUS_PIPELINE`, `PADACIOSO_PIPELINE`, `FALLBACK_PIPELINE`, `PERSONA_PIPELINE`, `M2V_PIPELINE`, `DEFAULT_TEST_PIPELINE`, `LIGHT_TEST_PIPELINE`), `is_pipeline_available()`, and global bus-coverage monkey-patching of `FakeBus`/`OVOSSkill`. +- `ovoscope/pytest_plugin.py` — `minicroft` and `bus_coverage_session` fixtures; registered via the `pytest11` entry point (auto-loaded when installed). +- `ovoscope/cli.py` — `ovoscope` console script: `record`, `run`, `diff`, `validate`, `coverage` subcommands. +- `ovoscope/setup_skill.py` — `ovoscope-setup` console script that installs the ovoscope helper skill into AI coding assistants. +- Specialised harnesses: `listener.py` (MiniListener: STT/VAD/WakeWord), `audio.py` (audio/playback/TTS mocks), `ocp.py` + `media.py` (OCP/media), `phal.py` (PHAL plugins), `pipeline.py` (pipeline plugins). +- `bus_coverage.py` / `coverage.py` — per-test bus-message coverage and workspace-wide E2E coverage scanning. +- `diff.py`, `remote_recorder.py`, `pydantic_helpers.py` — fixture diffing, live-bus fixture recording, typed-model bridge. +- `test/unittests/` — unit tests. + +Entry-point groups: `console_scripts` (`ovoscope`, `ovoscope-setup`) and `pytest11` (`ovoscope`). This is a testing tool, not an OPM plugin or skill. + +## Conventions (Org hard rules) +- Branches: `dev` for work, `master` for stable. NEVER `main`. +- Never edit `ovoscope/version.py`; gh-automations bumps semver from conventional-commit prefixes (`feat:`/`fix:`/`feat!:`). +- New repos private by default. +- Commit identity: JarbasAI . +- Reference `OpenVoiceOS/gh-automations` reusable workflows at `@dev`. +- No Neon / `neon-*` references. +- No meta-commentary (no history, dates, or design-decision narration) in code, docs, commits, or PRs. +- CI is provided by `OpenVoiceOS/gh-automations`. + +## Gotchas +- Depends on an alpha pin `ovos-core>=2.0.4a2` (stable 2.0.4 not yet released) for FakeBus-compatible `SkillManager`. +- `MiniCroft` mutates the global `Configuration()` singleton dict cache and `SessionManager.default_session` (pipeline, lang, blacklists) and restores them in `stop()` — always pair construction with `stop()` (or use `get_minicroft`/managed `End2EndTest`). `Configuration.reload()` does not invalidate the live dict cache, so it patches the singleton directly. +- Importing `ovoscope` immediately monkey-patches `FakeBus.on/once/emit` and `OVOSSkill.add_event/bind` for global bus coverage. +- Pipeline auto-selection: with `isolate_config=True` (default) it uses `DEFAULT_TEST_PIPELINE` if Adapt+Padatious are installed, else falls back to `LIGHT_TEST_PIPELINE` (pure-Python, no swig). `DEFAULT_TEST_PIPELINE` deliberately excludes persona/Ollama/OCP/m2v stages. +- `End2EndTest` checks only the keys you list in `expected.data`/`expected.context`; extra keys in received messages are ignored. GUI messages are ignored by default (`ignore_gui=True`). +- `audio` and `listener` submodule imports are guarded: missing optional deps are silenced, but a genuine import error in those modules is re-raised. diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..881caeb --- /dev/null +++ b/TODO.md @@ -0,0 +1,11 @@ +# TODO — ovoscope + +## Open issues +- [ ] #33 Dependency Dashboard (Renovate bot meta-issue) + +## Gaps +- [ ] Core dependency is an alpha pin (`ovos-core>=2.0.4a2`); revisit once stable 2.0.4 ships. +- [ ] No local lint/typecheck config in `pyproject.toml` (a `lint.yml` workflow exists via gh-automations, but there is no flake8/ruff/mypy setting to run locally). + +## Code TODOs +None found. diff --git a/ovoscope/__init__.py b/ovoscope/__init__.py index 88cdf21..bdc5b78 100644 --- a/ovoscope/__init__.py +++ b/ovoscope/__init__.py @@ -15,6 +15,7 @@ from ovos_utils.fakebus import FakeBus from ovos_utils.log import LOG from ovos_utils.process_utils import ProcessState +from ovos_spec_tools import SpecMessage from ovos_workshop.skills.ovos import OVOSSkill SerializedMessage = Dict[str, Union[str, Dict[str, Any]]] @@ -391,6 +392,23 @@ def __init__(self, skill_ids, bus = FakeBus(modernize=self._modernize, emit_legacy=self._emit_legacy) bus.on("message", self.handle_boot_message) + + # TTS mock: speak_dialog(…, wait=True) blocks on + # recognizer_loop:audio_output_end. Since there is no real TTS we + # schedule a short-delay emit to unblock the handler. + # This uses bus.ee.emit (not bus.emit) to bypass FakeBus's + # namespace-migration and on_message side effects so the synthetic + # event does not appear in test captures or reset session state. + def _mock_tts(message): + sess = SessionManager.get(message) + threading.Timer(0.1, lambda: bus.ee.emit( + "recognizer_loop:audio_output_end", + Message("recognizer_loop:audio_output_end", + context={"session": sess.serialize()}) + )).start() + + bus.on(SpecMessage.SPEAK, _mock_tts) + self.skill_ids = skill_ids self.extra_skills = extra_skills or {} From a4c8efaffb4d271133c6f22125933dd0b0e26a90 Mon Sep 17 00:00:00 2001 From: JarbasAi Date: Mon, 29 Jun 2026 02:48:07 +0100 Subject: [PATCH 3/3] chore: drop agent scratch (AGENTS.md, TODO.md) from the PR Co-Authored-By: Claude Opus 4.8 --- AGENTS.md | 49 ------------------------------------------------- TODO.md | 11 ----------- 2 files changed, 60 deletions(-) delete mode 100644 AGENTS.md delete mode 100644 TODO.md diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index 5b35db1..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,49 +0,0 @@ -# ovoscope — agent guide - -End-to-end test framework for OpenVoiceOS skills: boots a full OVOS Core intent pipeline in-process on a `FakeBus` (no server, no audio stack, no network), emits a test utterance, and asserts on every bus message that comes back. - -## Setup -```bash -pip install -e . # core (pulls ovos-core>=2.0.4a2) -pip install -e .[dev] # + ovos-audio, ovos-pydantic-models, pytest, pytest-cov -``` -Optional extras: `[audio]` (listener/audio/playback harnesses), `[pydantic]` (typed message bridge to `ovos-pydantic-models`). - -## Test -```bash -pytest test/unittests/ -``` -`pyproject.toml` sets `testpaths = ["test"]` and a 60s per-test timeout. CI runs with `install_extras: audio,pydantic`. - -## Lint/Typecheck -A `lint.yml` workflow exists (via gh-automations). No local lint/typecheck config in `pyproject.toml`. - -## Layout -- `ovoscope/__init__.py` — core API: `MiniCroft` (subclasses `SkillManager`, runs on `FakeBus`), `get_minicroft()`, `CaptureSession`, `End2EndTest`, `GUICaptureSession`, pipeline stage-group constants (`ADAPT_PIPELINE`, `PADATIOUS_PIPELINE`, `PADACIOSO_PIPELINE`, `FALLBACK_PIPELINE`, `PERSONA_PIPELINE`, `M2V_PIPELINE`, `DEFAULT_TEST_PIPELINE`, `LIGHT_TEST_PIPELINE`), `is_pipeline_available()`, and global bus-coverage monkey-patching of `FakeBus`/`OVOSSkill`. -- `ovoscope/pytest_plugin.py` — `minicroft` and `bus_coverage_session` fixtures; registered via the `pytest11` entry point (auto-loaded when installed). -- `ovoscope/cli.py` — `ovoscope` console script: `record`, `run`, `diff`, `validate`, `coverage` subcommands. -- `ovoscope/setup_skill.py` — `ovoscope-setup` console script that installs the ovoscope helper skill into AI coding assistants. -- Specialised harnesses: `listener.py` (MiniListener: STT/VAD/WakeWord), `audio.py` (audio/playback/TTS mocks), `ocp.py` + `media.py` (OCP/media), `phal.py` (PHAL plugins), `pipeline.py` (pipeline plugins). -- `bus_coverage.py` / `coverage.py` — per-test bus-message coverage and workspace-wide E2E coverage scanning. -- `diff.py`, `remote_recorder.py`, `pydantic_helpers.py` — fixture diffing, live-bus fixture recording, typed-model bridge. -- `test/unittests/` — unit tests. - -Entry-point groups: `console_scripts` (`ovoscope`, `ovoscope-setup`) and `pytest11` (`ovoscope`). This is a testing tool, not an OPM plugin or skill. - -## Conventions (Org hard rules) -- Branches: `dev` for work, `master` for stable. NEVER `main`. -- Never edit `ovoscope/version.py`; gh-automations bumps semver from conventional-commit prefixes (`feat:`/`fix:`/`feat!:`). -- New repos private by default. -- Commit identity: JarbasAI . -- Reference `OpenVoiceOS/gh-automations` reusable workflows at `@dev`. -- No Neon / `neon-*` references. -- No meta-commentary (no history, dates, or design-decision narration) in code, docs, commits, or PRs. -- CI is provided by `OpenVoiceOS/gh-automations`. - -## Gotchas -- Depends on an alpha pin `ovos-core>=2.0.4a2` (stable 2.0.4 not yet released) for FakeBus-compatible `SkillManager`. -- `MiniCroft` mutates the global `Configuration()` singleton dict cache and `SessionManager.default_session` (pipeline, lang, blacklists) and restores them in `stop()` — always pair construction with `stop()` (or use `get_minicroft`/managed `End2EndTest`). `Configuration.reload()` does not invalidate the live dict cache, so it patches the singleton directly. -- Importing `ovoscope` immediately monkey-patches `FakeBus.on/once/emit` and `OVOSSkill.add_event/bind` for global bus coverage. -- Pipeline auto-selection: with `isolate_config=True` (default) it uses `DEFAULT_TEST_PIPELINE` if Adapt+Padatious are installed, else falls back to `LIGHT_TEST_PIPELINE` (pure-Python, no swig). `DEFAULT_TEST_PIPELINE` deliberately excludes persona/Ollama/OCP/m2v stages. -- `End2EndTest` checks only the keys you list in `expected.data`/`expected.context`; extra keys in received messages are ignored. GUI messages are ignored by default (`ignore_gui=True`). -- `audio` and `listener` submodule imports are guarded: missing optional deps are silenced, but a genuine import error in those modules is re-raised. diff --git a/TODO.md b/TODO.md deleted file mode 100644 index 881caeb..0000000 --- a/TODO.md +++ /dev/null @@ -1,11 +0,0 @@ -# TODO — ovoscope - -## Open issues -- [ ] #33 Dependency Dashboard (Renovate bot meta-issue) - -## Gaps -- [ ] Core dependency is an alpha pin (`ovos-core>=2.0.4a2`); revisit once stable 2.0.4 ships. -- [ ] No local lint/typecheck config in `pyproject.toml` (a `lint.yml` workflow exists via gh-automations, but there is no flake8/ruff/mypy setting to run locally). - -## Code TODOs -None found.