diff --git a/CLAUDE.md b/CLAUDE.md index 0917d335..badd950d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -104,6 +104,8 @@ Output verbosity: `VERBOSE_LEVEL` (default 1, range 0-2). Controls how much of C Voice transcription: `ENABLE_VOICE_MESSAGES` (default true), `VOICE_PROVIDER` (`mistral`|`openai`, default `mistral`), `MISTRAL_API_KEY`, `OPENAI_API_KEY`, `VOICE_TRANSCRIPTION_MODEL`. Provider implementation is in `src/bot/features/voice_handler.py`. +Voice responses (TTS): `ENABLE_VOICE_RESPONSES` (default false), `VOICE_RESPONSE_MODEL` (default `voxtral-mini-tts-2603`), `VOICE_RESPONSE_VOICE` (voice UUID from Mistral `/v1/audio/voices`), `VOICE_RESPONSE_FORMAT` (default `opus`), `VOICE_RESPONSE_MAX_LENGTH` (default 2000). Users toggle via `/voice on|off`. Long responses are summarized before TTS; on failure, falls back to text with note. + Feature flags in `src/config/features.py` control: MCP, git integration, file uploads, quick actions, session export, image uploads, voice messages, conversation mode, agentic mode, API server, scheduler. ### DateTime Convention @@ -122,7 +124,7 @@ All datetimes use timezone-aware UTC: `datetime.now(UTC)` (not `datetime.utcnow( ### Agentic mode -Agentic mode commands: `/start`, `/new`, `/status`, `/verbose`, `/repo`. If `ENABLE_PROJECT_THREADS=true`: `/sync_threads`. To add a new command: +Agentic mode commands: `/start`, `/new`, `/status`, `/verbose`, `/voice`, `/repo`. If `ENABLE_PROJECT_THREADS=true`: `/sync_threads`. To add a new command: 1. Add handler function in `src/bot/orchestrator.py` 2. Register in `MessageOrchestrator._register_agentic_handlers()` diff --git a/docs/superpowers/plans/2026-03-28-audio-response-messages.md b/docs/superpowers/plans/2026-03-28-audio-response-messages.md new file mode 100644 index 00000000..58caa3d9 --- /dev/null +++ b/docs/superpowers/plans/2026-03-28-audio-response-messages.md @@ -0,0 +1,1146 @@ +# Audio Response Messages Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add text-to-speech capability so the bot can send Claude's responses as Telegram voice messages using Mistral's Voxtral TTS API, with per-user toggle and graceful fallback. + +**Architecture:** Extend the existing `VoiceHandler` class with a `synthesize_speech()` method that calls Mistral's `client.audio.speech.complete()`. Add a `/voice on|off` command persisted per-user in SQLite. The orchestrator's `agentic_text()` method gains a `_maybe_send_voice_response()` helper that intercepts responses before the text-sending block, synthesizes audio, and sends via `reply_voice()`. Long responses trigger a summarization step before TTS. + +**Tech Stack:** Python 3.10+, mistralai SDK (^1.0.0), python-telegram-bot, aiosqlite, pytest-asyncio + +--- + +## File Map + +| Action | File | Responsibility | +|--------|------|----------------| +| Modify | `src/config/settings.py` | Add TTS settings (model, voice, format, max length, enable flag) | +| Modify | `src/config/features.py` | Add `voice_responses_enabled` feature flag | +| Modify | `src/storage/database.py` | Migration 5: add `voice_responses_enabled` column to users table | +| Modify | `src/storage/models.py` | Add `voice_responses_enabled` field to `UserModel` | +| Modify | `src/storage/repositories.py` | Add get/set methods for voice response preference | +| Modify | `src/bot/features/voice_handler.py` | Add `synthesize_speech()` TTS method | +| Modify | `src/bot/orchestrator.py` | Add `/voice` command handler + `_maybe_send_voice_response()` | +| Create | `tests/unit/test_voice_tts.py` | Tests for TTS synthesis | +| Create | `tests/unit/test_voice_command.py` | Tests for `/voice` toggle command | +| Create | `tests/unit/test_voice_response_flow.py` | Tests for orchestrator voice response flow | + +--- + +### Task 1: Configuration — Add TTS Settings + +**Files:** +- Modify: `src/config/settings.py` (add fields after line ~197, add computed property after line ~525) +- Test: `tests/unit/test_config.py` (existing file, add test) + +- [ ] **Step 1: Write the failing test** + +In `tests/unit/test_config.py`, add: + +```python +def test_voice_response_settings_defaults(): + """Voice response settings have correct defaults.""" + from src.config.settings import Settings + + config = Settings( + telegram_bot_token="test:token", + telegram_bot_username="testbot", + approved_directory="/tmp/test", + ) + assert config.enable_voice_responses is False + assert config.voice_response_model == "voxtral-4b-tts-2603" + assert config.voice_response_voice == "jessica" + assert config.voice_response_format == "opus" + assert config.voice_response_max_length == 2000 +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `python -m pytest tests/unit/test_config.py::test_voice_response_settings_defaults -v` +Expected: FAIL with `AttributeError: 'Settings' object has no attribute 'enable_voice_responses'` + +- [ ] **Step 3: Add TTS settings to Settings class** + +In `src/config/settings.py`, after the `voice_max_file_size_mb` field (around line 197), add: + +```python + # Voice response (TTS) settings + enable_voice_responses: bool = Field( + False, description="Enable text-to-speech voice responses" + ) + voice_response_model: str = Field( + "voxtral-4b-tts-2603", + description="Mistral TTS model for voice responses", + ) + voice_response_voice: str = Field( + "jessica", + description="Mistral TTS voice preset name", + ) + voice_response_format: str = Field( + "opus", + description="TTS output audio format (opus for Telegram voice compatibility)", + ) + voice_response_max_length: int = Field( + 2000, + description="Character threshold above which responses are summarized before TTS", + ge=100, + le=10000, + ) +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `python -m pytest tests/unit/test_config.py::test_voice_response_settings_defaults -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/config/settings.py tests/unit/test_config.py +git commit -m "feat: add TTS voice response settings" +``` + +--- + +### Task 2: Feature Flag — Add voice_responses_enabled + +**Files:** +- Modify: `src/config/features.py` (add property, update maps) +- Test: `tests/unit/test_config.py` (add feature flag test) + +- [ ] **Step 1: Write the failing test** + +In `tests/unit/test_config.py`, add: + +```python +def test_voice_responses_feature_flag_enabled(): + """voice_responses_enabled is True when enable_voice_responses and mistral_api_key set.""" + from unittest.mock import MagicMock + + from src.config.features import FeatureFlags + + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = MagicMock() # not None = key is set + flags = FeatureFlags(settings) + assert flags.voice_responses_enabled is True + + +def test_voice_responses_feature_flag_disabled_no_key(): + """voice_responses_enabled is False when mistral_api_key is None.""" + from unittest.mock import MagicMock + + from src.config.features import FeatureFlags + + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = None + flags = FeatureFlags(settings) + assert flags.voice_responses_enabled is False + + +def test_voice_responses_feature_flag_disabled_not_enabled(): + """voice_responses_enabled is False when enable_voice_responses is False.""" + from unittest.mock import MagicMock + + from src.config.features import FeatureFlags + + settings = MagicMock() + settings.enable_voice_responses = False + settings.mistral_api_key = MagicMock() + flags = FeatureFlags(settings) + assert flags.voice_responses_enabled is False +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `python -m pytest tests/unit/test_config.py -k "voice_responses_feature_flag" -v` +Expected: FAIL with `AttributeError: 'FeatureFlags' object has no attribute 'voice_responses_enabled'` + +- [ ] **Step 3: Add voice_responses_enabled property to FeatureFlags** + +In `src/config/features.py`, after the `voice_messages_enabled` property (after line 81), add: + +```python + @property + def voice_responses_enabled(self) -> bool: + """Check if text-to-speech voice responses are enabled.""" + if not self.settings.enable_voice_responses: + return False + return self.settings.mistral_api_key is not None +``` + +Update the `is_feature_enabled` map (inside the method around line 100) — add this entry: + +```python + "voice_responses": self.voice_responses_enabled, +``` + +Update `get_enabled_features()` (around line 131) — add before the `return`: + +```python + if self.voice_responses_enabled: + features.append("voice_responses") +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `python -m pytest tests/unit/test_config.py -k "voice_responses_feature_flag" -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/config/features.py tests/unit/test_config.py +git commit -m "feat: add voice_responses_enabled feature flag" +``` + +--- + +### Task 3: Storage — Migration and Repository Methods + +**Files:** +- Modify: `src/storage/database.py` (add migration 5) +- Modify: `src/storage/models.py` (add field to UserModel) +- Modify: `src/storage/repositories.py` (add get/set methods to UserRepository) +- Test: `tests/unit/test_storage.py` (or create `tests/unit/test_voice_preference.py`) + +- [ ] **Step 1: Write the failing test** + +Create `tests/unit/test_voice_preference.py`: + +```python +"""Tests for voice response preference storage.""" + +import pytest + +from src.storage.database import DatabaseManager +from src.storage.repositories import UserRepository +from src.storage.models import UserModel + + +@pytest.fixture +async def db_manager(tmp_path): + """Create an in-memory database manager.""" + db_path = str(tmp_path / "test.db") + manager = DatabaseManager(db_path) + await manager.initialize() + yield manager + await manager.close() + + +@pytest.fixture +async def user_repo(db_manager): + """Create a UserRepository with initialized DB.""" + return UserRepository(db_manager) + + +async def test_get_voice_responses_default_false(user_repo): + """New users have voice_responses_enabled = False by default.""" + user = UserModel(user_id=123, telegram_username="testuser") + await user_repo.create_user(user) + result = await user_repo.get_voice_responses_enabled(123) + assert result is False + + +async def test_set_voice_responses_enabled(user_repo): + """Setting voice_responses_enabled to True persists.""" + user = UserModel(user_id=456, telegram_username="testuser2") + await user_repo.create_user(user) + await user_repo.set_voice_responses_enabled(456, True) + result = await user_repo.get_voice_responses_enabled(456) + assert result is True + + +async def test_set_voice_responses_disabled(user_repo): + """Setting voice_responses_enabled back to False persists.""" + user = UserModel(user_id=789, telegram_username="testuser3") + await user_repo.create_user(user) + await user_repo.set_voice_responses_enabled(789, True) + await user_repo.set_voice_responses_enabled(789, False) + result = await user_repo.get_voice_responses_enabled(789) + assert result is False + + +async def test_get_voice_responses_nonexistent_user(user_repo): + """Nonexistent user returns False.""" + result = await user_repo.get_voice_responses_enabled(999) + assert result is False +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `python -m pytest tests/unit/test_voice_preference.py -v` +Expected: FAIL with `AttributeError: 'UserRepository' object has no attribute 'get_voice_responses_enabled'` + +- [ ] **Step 3: Add migration 5 to database.py** + +In `src/storage/database.py`, in the `_get_migrations()` method, after migration 4 (around line 312), add: + +```python + ( + 5, + """ + -- Add voice response preference to users + ALTER TABLE users ADD COLUMN voice_responses_enabled BOOLEAN DEFAULT FALSE; + """, + ), +``` + +- [ ] **Step 4: Add field to UserModel** + +In `src/storage/models.py`, add to the `UserModel` dataclass (after `session_count`): + +```python + voice_responses_enabled: bool = False +``` + +- [ ] **Step 5: Add repository methods to UserRepository** + +In `src/storage/repositories.py`, add to the `UserRepository` class (after `get_all_users`, around line 115): + +```python + async def get_voice_responses_enabled(self, user_id: int) -> bool: + """Get voice response preference for a user.""" + async with self.db.get_connection() as conn: + cursor = await conn.execute( + "SELECT voice_responses_enabled FROM users WHERE user_id = ?", + (user_id,), + ) + row = await cursor.fetchone() + return bool(row[0]) if row else False + + async def set_voice_responses_enabled(self, user_id: int, enabled: bool) -> None: + """Set voice response preference for a user.""" + async with self.db.get_connection() as conn: + await conn.execute( + "UPDATE users SET voice_responses_enabled = ? WHERE user_id = ?", + (enabled, user_id), + ) + await conn.commit() + logger.info( + "Updated voice response preference", + user_id=user_id, + enabled=enabled, + ) +``` + +- [ ] **Step 6: Run tests to verify they pass** + +Run: `python -m pytest tests/unit/test_voice_preference.py -v` +Expected: PASS + +- [ ] **Step 7: Commit** + +```bash +git add src/storage/database.py src/storage/models.py src/storage/repositories.py tests/unit/test_voice_preference.py +git commit -m "feat: add voice_responses_enabled column and repository methods" +``` + +--- + +### Task 4: TTS Synthesis — Add synthesize_speech() to VoiceHandler + +**Files:** +- Modify: `src/bot/features/voice_handler.py` (add method + dataclass) +- Create: `tests/unit/test_voice_tts.py` + +- [ ] **Step 1: Write the failing test** + +Create `tests/unit/test_voice_tts.py`: + +```python +"""Tests for VoiceHandler TTS synthesis.""" + +import sys +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from src.bot.features.voice_handler import VoiceHandler + + +@pytest.fixture +def tts_config(): + """Create a mock config with TTS settings.""" + cfg = MagicMock() + cfg.voice_provider = "mistral" + cfg.mistral_api_key_str = "test-api-key" + cfg.voice_response_model = "voxtral-4b-tts-2603" + cfg.voice_response_voice = "jessica" + cfg.voice_response_format = "opus" + cfg.resolved_voice_model = "voxtral-mini-latest" + cfg.voice_max_file_size_mb = 20 + cfg.voice_max_file_size_bytes = 20 * 1024 * 1024 + return cfg + + +@pytest.fixture +def voice_handler(tts_config): + return VoiceHandler(config=tts_config) + + +async def test_synthesize_speech_calls_mistral(voice_handler): + """synthesize_speech calls Mistral TTS API with correct params.""" + fake_audio = b"fake-audio-bytes" + + mock_speech = MagicMock() + mock_speech.complete_async = AsyncMock(return_value=fake_audio) + + mock_audio = MagicMock() + mock_audio.speech = mock_speech + + mock_client = MagicMock() + mock_client.audio = mock_audio + mistral_ctor = MagicMock(return_value=mock_client) + + with pytest.MonkeyPatch.context() as mp: + mp.setitem(sys.modules, "mistralai", SimpleNamespace(Mistral=mistral_ctor)) + result = await voice_handler.synthesize_speech("Hello world") + + assert result == fake_audio + mock_speech.complete_async.assert_called_once() + call_kwargs = mock_speech.complete_async.call_args.kwargs + assert call_kwargs["model"] == "voxtral-4b-tts-2603" + assert call_kwargs["voice"] == "jessica" + assert call_kwargs["input"] == "Hello world" + assert call_kwargs["response_format"] == "opus" + + +async def test_synthesize_speech_api_failure(voice_handler): + """synthesize_speech raises RuntimeError on API failure.""" + mock_speech = MagicMock() + mock_speech.complete_async = AsyncMock(side_effect=Exception("API down")) + + mock_audio = MagicMock() + mock_audio.speech = mock_speech + + mock_client = MagicMock() + mock_client.audio = mock_audio + mistral_ctor = MagicMock(return_value=mock_client) + + with pytest.MonkeyPatch.context() as mp: + mp.setitem(sys.modules, "mistralai", SimpleNamespace(Mistral=mistral_ctor)) + with pytest.raises(RuntimeError, match="Mistral TTS request failed"): + await voice_handler.synthesize_speech("Hello world") +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `python -m pytest tests/unit/test_voice_tts.py -v` +Expected: FAIL with `AttributeError: 'VoiceHandler' object has no attribute 'synthesize_speech'` + +- [ ] **Step 3: Add synthesize_speech() to VoiceHandler** + +In `src/bot/features/voice_handler.py`, add this method to the `VoiceHandler` class (after `_transcribe_mistral`, around line 128): + +```python + async def synthesize_speech(self, text: str) -> bytes: + """Synthesize text to audio using the Mistral TTS API. + + Returns raw audio bytes in the configured format. + """ + client = self._get_mistral_client() + try: + response = await client.audio.speech.complete_async( + model=self.config.voice_response_model, + voice=self.config.voice_response_voice, + input=text, + response_format=self.config.voice_response_format, + ) + except Exception as exc: + logger.warning( + "Mistral TTS request failed", + error_type=type(exc).__name__, + ) + raise RuntimeError("Mistral TTS request failed.") from exc + + return response +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `python -m pytest tests/unit/test_voice_tts.py -v` +Expected: PASS + +- [ ] **Step 5: Run existing voice handler tests to confirm no regression** + +Run: `python -m pytest tests/unit/test_bot/test_voice_handler.py -v` +Expected: All existing tests PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/bot/features/voice_handler.py tests/unit/test_voice_tts.py +git commit -m "feat: add synthesize_speech() TTS method to VoiceHandler" +``` + +--- + +### Task 5: /voice Command Handler + +**Files:** +- Modify: `src/bot/orchestrator.py` (add handler + register) +- Create: `tests/unit/test_voice_command.py` + +- [ ] **Step 1: Write the failing test** + +Create `tests/unit/test_voice_command.py`: + +```python +"""Tests for /voice toggle command.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +def _make_update_context(user_id=123, text="/voice"): + """Create mock Update and Context for command testing.""" + update = MagicMock() + update.effective_user.id = user_id + update.message.text = text + update.message.reply_text = AsyncMock() + update.message.message_id = 1 + + context = MagicMock() + context.bot_data = {} + context.user_data = {} + + storage = MagicMock() + storage.users = MagicMock() + context.bot_data["storage"] = storage + context.bot_data["features"] = MagicMock() + + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = MagicMock() # not None + + return update, context, storage, settings + + +async def test_voice_on_enables(monkeypatch): + """'/voice on' enables voice responses for the user.""" + update, context, storage, settings = _make_update_context(text="/voice on") + storage.users.set_voice_responses_enabled = AsyncMock() + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + storage.users.set_voice_responses_enabled.assert_called_once_with(123, True) + update.message.reply_text.assert_called_once() + reply_text = update.message.reply_text.call_args[0][0] + assert "on" in reply_text.lower() or "enabled" in reply_text.lower() + + +async def test_voice_off_disables(monkeypatch): + """'/voice off' disables voice responses for the user.""" + update, context, storage, settings = _make_update_context(text="/voice off") + storage.users.set_voice_responses_enabled = AsyncMock() + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + storage.users.set_voice_responses_enabled.assert_called_once_with(123, False) + update.message.reply_text.assert_called_once() + reply_text = update.message.reply_text.call_args[0][0] + assert "off" in reply_text.lower() or "disabled" in reply_text.lower() + + +async def test_voice_no_args_shows_status(): + """'/voice' with no args shows current status.""" + update, context, storage, settings = _make_update_context(text="/voice") + storage.users.get_voice_responses_enabled = AsyncMock(return_value=False) + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + storage.users.get_voice_responses_enabled.assert_called_once_with(123) + update.message.reply_text.assert_called_once() + + +async def test_voice_disabled_at_admin_level(): + """'/voice' when feature disabled at admin level shows unavailable message.""" + update, context, storage, settings = _make_update_context(text="/voice on") + settings.enable_voice_responses = False + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + update.message.reply_text.assert_called_once() + reply_text = update.message.reply_text.call_args[0][0] + assert "not enabled" in reply_text.lower() +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `python -m pytest tests/unit/test_voice_command.py -v` +Expected: FAIL with `AttributeError: 'MessageOrchestrator' object has no attribute 'agentic_voice_toggle'` + +- [ ] **Step 3: Add agentic_voice_toggle handler to orchestrator** + +In `src/bot/orchestrator.py`, add the handler method (near the `agentic_verbose` method, around line 616): + +```python + async def agentic_voice_toggle( + self, update: Update, context: ContextTypes.DEFAULT_TYPE + ) -> None: + """Toggle voice responses: /voice [on|off].""" + if not self.settings.enable_voice_responses: + await update.message.reply_text( + "Voice responses are not enabled on this instance.", + parse_mode="HTML", + ) + return + + user_id = update.effective_user.id + storage = context.bot_data.get("storage") + args = update.message.text.split()[1:] if update.message.text else [] + + if not args: + enabled = await storage.users.get_voice_responses_enabled(user_id) + status = "on" if enabled else "off" + await update.message.reply_text( + f"Voice responses: {status}\n\n" + "Usage: /voice on or /voice off", + parse_mode="HTML", + ) + return + + arg = args[0].lower() + if arg not in ("on", "off"): + await update.message.reply_text( + "Please use: /voice on or /voice off", + parse_mode="HTML", + ) + return + + enabled = arg == "on" + await storage.users.set_voice_responses_enabled(user_id, enabled) + status = "enabled" if enabled else "disabled" + await update.message.reply_text( + f"Voice responses {status}", + parse_mode="HTML", + ) +``` + +- [ ] **Step 4: Register the command** + +In `_register_agentic_handlers()` (around line 320), add to the `handlers` list: + +```python + ("voice", self.agentic_voice_toggle), +``` + +In `get_bot_commands()` (around line 460), add to the agentic commands list: + +```python + BotCommand("voice", "Toggle voice responses (on/off)"), +``` + +- [ ] **Step 5: Run tests to verify they pass** + +Run: `python -m pytest tests/unit/test_voice_command.py -v` +Expected: PASS + +- [ ] **Step 6: Commit** + +```bash +git add src/bot/orchestrator.py tests/unit/test_voice_command.py +git commit -m "feat: add /voice on|off toggle command" +``` + +--- + +### Task 6: Voice Response Flow in Orchestrator + +**Files:** +- Modify: `src/bot/orchestrator.py` (add `_maybe_send_voice_response()`, modify `agentic_text()`) +- Create: `tests/unit/test_voice_response_flow.py` + +- [ ] **Step 1: Write the failing test for short response path** + +Create `tests/unit/test_voice_response_flow.py`: + +```python +"""Tests for voice response flow in orchestrator.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +def _make_orchestrator_with_voice(): + """Create orchestrator with voice responses enabled.""" + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = MagicMock() + settings.voice_response_max_length = 2000 + settings.agentic_mode = True + + storage = MagicMock() + storage.users = MagicMock() + + deps = {"storage": storage} + + from src.bot.orchestrator import MessageOrchestrator + + orch = MessageOrchestrator(settings, deps) + return orch, storage + + +def _make_update_context(storage): + """Create mock Update and Context.""" + update = MagicMock() + update.effective_user.id = 123 + update.message.reply_voice = AsyncMock() + update.message.reply_text = AsyncMock() + update.message.message_id = 1 + + context = MagicMock() + context.bot_data = {"storage": storage} + return update, context + + +async def test_short_response_sends_voice(): + """Short response synthesizes and sends voice message.""" + orch, storage = _make_orchestrator_with_voice() + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=True) + + voice_handler = MagicMock() + voice_handler.synthesize_speech = AsyncMock(return_value=b"audio-data") + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text="Hello, this is a short response.", + user_id=123, + voice_handler=voice_handler, + ) + + assert result is True + voice_handler.synthesize_speech.assert_called_once_with( + "Hello, this is a short response." + ) + update.message.reply_voice.assert_called_once() + # Should send a short label text too + update.message.reply_text.assert_called_once() + + +async def test_voice_disabled_skips(): + """When user has voice off, returns False.""" + orch, storage = _make_orchestrator_with_voice() + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=False) + voice_handler = MagicMock() + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text="Some response", + user_id=123, + voice_handler=voice_handler, + ) + + assert result is False + voice_handler.synthesize_speech.assert_not_called() + + +async def test_tts_failure_falls_back_to_text(): + """TTS failure returns False so text path runs, and sends note.""" + orch, storage = _make_orchestrator_with_voice() + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=True) + + voice_handler = MagicMock() + voice_handler.synthesize_speech = AsyncMock( + side_effect=RuntimeError("TTS failed") + ) + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text="Some response", + user_id=123, + voice_handler=voice_handler, + ) + + assert result is False +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `python -m pytest tests/unit/test_voice_response_flow.py -v` +Expected: FAIL with `AttributeError: 'MessageOrchestrator' object has no attribute '_maybe_send_voice_response'` + +- [ ] **Step 3: Implement _maybe_send_voice_response()** + +In `src/bot/orchestrator.py`, add this method to `MessageOrchestrator` (before `agentic_text`): + +```python + async def _maybe_send_voice_response( + self, + update: Update, + context: ContextTypes.DEFAULT_TYPE, + response_text: str, + user_id: int, + voice_handler: Any, + ) -> bool: + """Try to send response as voice message. + + Returns True if voice was sent (caller should adjust text sending). + Returns False if voice was not sent (caller sends text as normal). + """ + if not self.settings.enable_voice_responses: + return False + + storage = context.bot_data.get("storage") + if not storage: + return False + + try: + enabled = await storage.users.get_voice_responses_enabled(user_id) + except Exception: + return False + + if not enabled: + return False + + if not voice_handler: + return False + + text_to_speak = response_text + is_long = len(response_text) > self.settings.voice_response_max_length + send_full_text = False + + if is_long: + # Summarize for spoken delivery + try: + claude_integration = context.bot_data.get("claude_integration") + if claude_integration: + from pathlib import Path + + summary_prompt = ( + "Summarize the following response in 2-3 sentences " + "suitable for being read aloud as a voice message. " + "Output ONLY the summary, nothing else.\n\n" + f"{response_text}" + ) + summary_response = await claude_integration.run_command( + prompt=summary_prompt, + working_directory=Path(self.settings.approved_directory), + user_id=user_id, + force_new=True, + ) + text_to_speak = summary_response.content or response_text + send_full_text = True + else: + # No Claude integration, truncate instead + text_to_speak = response_text[ + : self.settings.voice_response_max_length + ] + send_full_text = True + except Exception as exc: + logger.warning( + "Voice summary generation failed, falling back to text", + error=str(exc), + ) + return False + + try: + audio_bytes = await voice_handler.synthesize_speech(text_to_speak) + await update.message.reply_voice( + voice=audio_bytes, + reply_to_message_id=update.message.message_id, + ) + + if send_full_text: + # Long response: send full text alongside + from .utils.formatting import ResponseFormatter + + formatter = ResponseFormatter(self.settings) + formatted_messages = formatter.format_claude_response(response_text) + for message in formatted_messages: + if message.text and message.text.strip(): + try: + await update.message.reply_text( + message.text, + parse_mode=message.parse_mode, + reply_markup=None, + ) + except Exception: + await update.message.reply_text( + message.text, reply_markup=None + ) + else: + # Short response: just a label + await update.message.reply_text( + "Voice response", + reply_markup=None, + ) + + return True + + except Exception as exc: + logger.warning( + "TTS failed, falling back to text", + error_type=type(exc).__name__, + error=str(exc), + ) + return False +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `python -m pytest tests/unit/test_voice_response_flow.py -v` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/bot/orchestrator.py tests/unit/test_voice_response_flow.py +git commit -m "feat: add _maybe_send_voice_response() to orchestrator" +``` + +--- + +### Task 7: Wire Voice Response Into agentic_text() + +**Files:** +- Modify: `src/bot/orchestrator.py` (modify `agentic_text()` response-sending section) + +- [ ] **Step 1: Modify agentic_text() to call _maybe_send_voice_response()** + +In `src/bot/orchestrator.py`, in the `agentic_text()` method, find the block that starts sending text (around line 1094, the `# Send text messages` comment). Insert the voice response attempt **before** the text-sending block. + +Replace this section (lines ~1094-1132): + +```python + # Send text messages (skip if caption was already embedded in photos) + if not caption_sent: + for i, message in enumerate(formatted_messages): +``` + +With: + +```python + # Try voice response first (if enabled and user toggled on) + voice_sent = False + if not caption_sent and response_content: + features = context.bot_data.get("features") + voice_handler = features.get_voice_handler() if features else None + try: + voice_sent = await self._maybe_send_voice_response( + update=update, + context=context, + response_text=response_content, + user_id=user_id, + voice_handler=voice_handler, + ) + except Exception as voice_err: + logger.warning("Voice response attempt failed", error=str(voice_err)) + + if voice_sent and not caption_sent: + # Voice was sent (with text handled inside _maybe_send_voice_response) + # If TTS failure sent a note, voice_sent is False and we fall through + pass + + # Send text messages (skip if caption or voice was already sent) + if not caption_sent and not voice_sent: + for i, message in enumerate(formatted_messages): +``` + +The rest of the text-sending block remains unchanged. + +- [ ] **Step 2: Verify the existing orchestrator tests still pass** + +Run: `python -m pytest tests/unit/test_orchestrator.py -v` +Expected: PASS (no regression) + +- [ ] **Step 3: Run all voice-related tests** + +Run: `python -m pytest tests/unit/test_voice_tts.py tests/unit/test_voice_command.py tests/unit/test_voice_response_flow.py tests/unit/test_voice_preference.py -v` +Expected: All PASS + +- [ ] **Step 4: Commit** + +```bash +git add src/bot/orchestrator.py +git commit -m "feat: wire voice response into agentic_text() flow" +``` + +--- + +### Task 8: Long Response Path Test + +**Files:** +- Modify: `tests/unit/test_voice_response_flow.py` (add long response test) + +- [ ] **Step 1: Add test for long response summarization path** + +In `tests/unit/test_voice_response_flow.py`, add: + +```python +async def test_long_response_summarizes_then_speaks(): + """Long response triggers summarization, sends audio of summary + full text.""" + orch, storage = _make_orchestrator_with_voice() + orch.settings.voice_response_max_length = 50 # Low threshold for testing + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=True) + + voice_handler = MagicMock() + voice_handler.synthesize_speech = AsyncMock(return_value=b"audio-data") + + # Mock Claude integration for summarization + mock_claude = MagicMock() + mock_summary_response = MagicMock() + mock_summary_response.content = "This is a brief summary." + mock_claude.run_command = AsyncMock(return_value=mock_summary_response) + context.bot_data["claude_integration"] = mock_claude + + long_text = "A" * 100 # Exceeds threshold of 50 + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text=long_text, + user_id=123, + voice_handler=voice_handler, + ) + + assert result is True + # Should synthesize the SUMMARY, not the full text + voice_handler.synthesize_speech.assert_called_once_with("This is a brief summary.") + # Should send voice + text messages (full text) + update.message.reply_voice.assert_called_once() + assert update.message.reply_text.call_count >= 1 # Full text sent +``` + +- [ ] **Step 2: Run the test** + +Run: `python -m pytest tests/unit/test_voice_response_flow.py::test_long_response_summarizes_then_speaks -v` +Expected: PASS (implementation already handles this in Task 6) + +- [ ] **Step 3: Commit** + +```bash +git add tests/unit/test_voice_response_flow.py +git commit -m "test: add long response summarization path test" +``` + +--- + +### Task 9: Fallback Note on TTS Failure + +**Files:** +- Modify: `src/bot/orchestrator.py` (add note when falling back) +- Modify: `tests/unit/test_voice_response_flow.py` (update fallback test) + +- [ ] **Step 1: Update the fallback test to check for note** + +In `tests/unit/test_voice_response_flow.py`, update `test_tts_failure_falls_back_to_text`: + +```python +async def test_tts_failure_falls_back_to_text(): + """TTS failure returns False and sends fallback note.""" + orch, storage = _make_orchestrator_with_voice() + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=True) + + voice_handler = MagicMock() + voice_handler.synthesize_speech = AsyncMock( + side_effect=RuntimeError("TTS failed") + ) + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text="Some response", + user_id=123, + voice_handler=voice_handler, + ) + + assert result is False + # Should have sent a fallback note + update.message.reply_text.assert_called_once() + note = update.message.reply_text.call_args[0][0] + assert "audio unavailable" in note.lower() +``` + +- [ ] **Step 2: Update _maybe_send_voice_response() to send the note on TTS failure** + +In the `except` block at the end of `_maybe_send_voice_response()`, add the fallback note before returning False: + +```python + except Exception as exc: + logger.warning( + "TTS failed, falling back to text", + error_type=type(exc).__name__, + error=str(exc), + ) + try: + await update.message.reply_text( + "(Audio unavailable, sent as text)", + reply_markup=None, + ) + except Exception: + pass + return False +``` + +- [ ] **Step 3: Run the test** + +Run: `python -m pytest tests/unit/test_voice_response_flow.py::test_tts_failure_falls_back_to_text -v` +Expected: PASS + +- [ ] **Step 4: Commit** + +```bash +git add src/bot/orchestrator.py tests/unit/test_voice_response_flow.py +git commit -m "feat: add fallback note when TTS fails" +``` + +--- + +### Task 10: Final Verification + +- [ ] **Step 1: Run the full test suite** + +Run: `python -m pytest tests/ -v --tb=short` +Expected: All tests PASS, no regressions + +- [ ] **Step 2: Run linting** + +Run: `make lint` +Expected: No errors (run `make format` first if needed) + +- [ ] **Step 3: Run type checking** + +Run: `python -m mypy src` +Expected: No new type errors + +- [ ] **Step 4: Final commit if any formatting changes** + +```bash +git add -A +git commit -m "style: fix lint/formatting for voice response feature" +``` diff --git a/docs/superpowers/specs/2026-03-28-audio-response-messages-design.md b/docs/superpowers/specs/2026-03-28-audio-response-messages-design.md new file mode 100644 index 00000000..a75a3f28 --- /dev/null +++ b/docs/superpowers/specs/2026-03-28-audio-response-messages-design.md @@ -0,0 +1,138 @@ +# Audio Response Messages — Design Spec + +## Overview + +Add text-to-speech (TTS) capability so the bot can send Claude's responses back as Telegram voice messages using Mistral's Voxtral TTS API. The feature is opt-in at both the admin level (env var) and user level (toggle command). + +## Requirements + +- Users can toggle voice responses on/off via `/voice on` / `/voice off` +- When enabled, Claude's responses are synthesized to audio and sent as Telegram voice messages +- Short responses: voice message + brief text label (e.g. "Voice response") +- Long responses (above threshold): Claude summarizes for spoken delivery, audio of the summary is sent, full text is sent alongside +- On TTS failure: graceful fallback to text + "(Audio unavailable, sent as text)" +- Admin can disable the feature entirely; users cannot enable it if admin hasn't +- Uses Mistral Voxtral TTS API (same SDK already used for transcription) + +## Configuration + +### Environment Variables (admin-level) + +| Variable | Type | Default | Description | +|----------|------|---------|-------------| +| `ENABLE_VOICE_RESPONSES` | bool | `false` | Master switch for TTS feature | +| `VOICE_RESPONSE_MODEL` | str | `voxtral-4b-tts-2603` | Mistral TTS model name | +| `VOICE_RESPONSE_VOICE` | str | `jessica` | Mistral voice preset (see Mistral TTS docs for available voices) | +| `VOICE_RESPONSE_FORMAT` | str | `opus` | Audio output format (opus for Telegram voice compatibility) | +| `VOICE_RESPONSE_MAX_LENGTH` | int | `2000` | Character threshold for long response handling | + +### User Toggle + +- `/voice on` — enable voice responses for this user (persisted in SQLite) +- `/voice off` — disable voice responses +- `/voice` — show current status +- Command only available when `ENABLE_VOICE_RESPONSES=true`; otherwise responds: "Voice responses are not enabled on this instance" +- Register handler in `MessageOrchestrator._register_agentic_handlers()` and add to `get_bot_commands()` + +### Feature Flag + +New property `voice_responses_enabled` in `FeatureFlags`: +- Requires `ENABLE_VOICE_RESPONSES=true` AND `mistral_api_key` is set + +## Architecture + +### Approach: Extend VoiceHandler + +Add TTS methods to the existing `VoiceHandler` class in `src/bot/features/voice_handler.py`. This class already manages the Mistral client and handles audio concerns (transcription). Adding synthesis keeps audio logic in one place and reuses the lazy-loaded client. + +### New Method: `VoiceHandler.synthesize_speech(text: str) -> bytes` + +- Calls `client.audio.speech.complete()` with configured model, voice, and format +- Returns raw audio bytes +- Reuses existing `_get_mistral_client()` — same lazy-loaded Mistral client used for transcription +- Raises `RuntimeError` on API failure (caught by caller) + +### Response Flow in Orchestrator + +Modified flow in `agentic_text()`, after Claude returns a response: + +``` +1. Get claude_response text +2. Check: voice_responses feature enabled AND user has toggle on? + |-- NO --> send text as usual (existing path, unchanged) + |-- YES --> + 3. Is len(response) > VOICE_RESPONSE_MAX_LENGTH? + |-- YES (long response path): + | a. Call Claude to summarize for spoken delivery + | b. Synthesize summary via VoiceHandler.synthesize_speech() + | c. Send voice message via reply_voice() + | d. Send full text response as normal text message + | e. On TTS failure: send text + "(Audio unavailable, sent as text)" + | + |-- NO (short response path): + a. Synthesize full response via VoiceHandler.synthesize_speech() + b. Send voice message via reply_voice() + c. Send short label text (e.g. "Voice response") + d. On TTS failure: send text + "(Audio unavailable, sent as text)" +``` + +### New Orchestrator Method: `_maybe_send_voice_response()` + +Private method encapsulating the voice response logic (steps 2-3 above). Called from `agentic_text()` before the existing text-sending block. Returns `True` if voice was sent successfully (so the text path adjusts accordingly), `False` otherwise. + +### Summarization for Long Responses + +When the response exceeds `VOICE_RESPONSE_MAX_LENGTH`, a second lightweight Claude call generates a spoken summary: +- Uses the existing `ClaudeIntegration.run_command()` with a summarization prompt +- System prompt: "Summarize the following response in 2-3 sentences suitable for being read aloud as a voice message." +- The summary is synthesized to audio; the full text is sent as a normal text message alongside + +### Telegram API + +Use `update.message.reply_voice(voice=audio_bytes)` for sending voice messages. Telegram voice messages use OGG/Opus natively, so the default output format is `opus` for compatibility. + +## Storage + +### Users Table Change + +Add column `voice_responses_enabled` (boolean, default `false`) to the existing `users` table via the project's `_run_migrations()` pattern (ALTER TABLE). + +### Repository Methods + +Add to user repository: +- `get_voice_responses_enabled(user_id: int) -> bool` +- `set_voice_responses_enabled(user_id: int, enabled: bool) -> None` + +No new tables needed. + +## Error Handling + +- TTS API failure (error, timeout, rate limit): fall back to normal text response + brief note "(Audio unavailable, sent as text)" +- Logged at `warning` level via structlog with error type +- Summarization failure: fall back to sending full text (skip audio) +- Feature disabled at admin level: `/voice` command explains it's unavailable + +## Testing + +### Unit Tests + +- `VoiceHandler.synthesize_speech()` — mock Mistral client, verify correct params, verify bytes returned +- `/voice on|off` command — verify toggle persists in storage, verify response messages +- Long response detection — verify threshold triggers summarize path +- TTS failure fallback — mock API error, verify text fallback + note sent + +### Integration Points + +- `_maybe_send_voice_response()` — verify correct gating on feature flag + user toggle +- Verify existing voice transcription (STT) is unaffected + +### Not Tested + +- Actual Mistral API calls (mocked) +- Audio quality/playback + +## Rollout + +1. Feature is off by default (`ENABLE_VOICE_RESPONSES=false`) +2. Admin enables via env var (Mistral API key is already configured for transcription) +3. Individual users opt in via `/voice on` diff --git a/src/bot/core.py b/src/bot/core.py index 192bddd3..55b0352c 100644 --- a/src/bot/core.py +++ b/src/bot/core.py @@ -261,6 +261,27 @@ async def _error_handler( ) -> None: """Handle errors globally.""" error = context.error + + # Stale callback queries are benign — Telegram rejects answer() for + # buttons older than ~60s (e.g. after bot restart). Don't alarm the + # user or log a security violation for this. + from telegram.error import BadRequest + + if isinstance(error, BadRequest) and ( + "too old" in str(error).lower() + or "query id is invalid" in str(error).lower() + ): + logger.info( + "Ignored stale callback query in global handler", + error=str(error), + user_id=( + update.effective_user.id + if update and update.effective_user + else None + ), + ) + return + logger.error( "Global error handler triggered", error=str(error), diff --git a/src/bot/features/voice_handler.py b/src/bot/features/voice_handler.py index 11daa10c..ae488291 100644 --- a/src/bot/features/voice_handler.py +++ b/src/bot/features/voice_handler.py @@ -126,6 +126,53 @@ async def _transcribe_mistral(self, voice_bytes: bytes) -> str: raise ValueError("Mistral transcription returned an empty response.") return text + async def synthesize_speech(self, text: str) -> bytes: + """Synthesize text to audio using the Mistral TTS REST API. + + Uses httpx directly because mistralai SDK 1.x lacks audio.speech. + Returns raw audio bytes in the configured format. + """ + import base64 + + try: + import httpx + except ModuleNotFoundError as exc: + raise RuntimeError( + "httpx is required for TTS. It should be installed as a " + "dependency of mistralai." + ) from exc + + api_key = self.config.mistral_api_key_str + if not api_key: + raise RuntimeError("Mistral API key is not configured.") + + payload = { + "model": self.config.voice_response_model, + "input": text, + "voice_id": self.config.voice_response_voice, + "response_format": self.config.voice_response_format, + } + + try: + async with httpx.AsyncClient(timeout=30.0) as client: + resp = await client.post( + "https://api.mistral.ai/v1/audio/speech", + json=payload, + headers={"Authorization": f"Bearer {api_key}"}, + ) + resp.raise_for_status() + data = resp.json() + audio_b64 = data.get("audio_data", "") + if not audio_b64: + raise ValueError("Mistral TTS returned empty audio_data.") + return base64.b64decode(audio_b64) + except Exception as exc: + logger.warning( + "Mistral TTS request failed", + error_type=type(exc).__name__, + ) + raise RuntimeError("Mistral TTS request failed.") from exc + def _get_mistral_client(self) -> Any: """Create and cache a Mistral client on first use.""" if self._mistral_client is not None: diff --git a/src/bot/handlers/callback.py b/src/bot/handlers/callback.py index 66dd660c..6e8329d0 100644 --- a/src/bot/handlers/callback.py +++ b/src/bot/handlers/callback.py @@ -1,8 +1,17 @@ """Handle inline keyboard callbacks.""" +import os +import re +import shutil +import subprocess +from datetime import datetime, timezone from pathlib import Path from typing import Optional +import json as _json +import sqlite3 + +import requests as _requests import structlog from telegram import InlineKeyboardButton, InlineKeyboardMarkup, Update from telegram.ext import ContextTypes @@ -42,7 +51,22 @@ async def handle_callback_query( ) -> None: """Route callback queries to appropriate handlers.""" query = update.callback_query - await query.answer() # Acknowledge the callback + + # Acknowledge the callback — but don't fail on stale/old buttons. + # Telegram rejects query.answer() if the callback is older than ~60s + # (e.g. buttons from before a bot restart). We still want to process + # the action, so just log and continue. + try: + await query.answer() + except Exception as e: + if "too old" in str(e).lower() or "query id is invalid" in str(e).lower(): + logger.info( + "Stale callback query (button still works)", + user_id=query.from_user.id, + callback_data=query.data, + ) + else: + raise user_id = query.from_user.id data = query.data @@ -66,6 +90,8 @@ async def handle_callback_query( "conversation": handle_conversation_callback, "git": handle_git_callback, "export": handle_export_callback, + "check_match": handle_check_match_callback, + "investigate_trade": handle_investigate_trade_callback, } handler = handlers.get(action) @@ -1299,6 +1325,415 @@ async def handle_export_callback( ) +MATCH_SEPARATOR = "\u2501" * 14 # ━━━━━━━━━━━━━━ + + +async def handle_check_match_callback( + query, _param: str, context: ContextTypes.DEFAULT_TYPE +) -> None: + """Handle 'Check Match' button from escalation messages. + + Fetches live score from SofaScore API via the poly_dashboard DB, + then uses Claude (no tools) to assess the trade outcome. + """ + both_buttons = InlineKeyboardMarkup( + [[InlineKeyboardButton("\U0001f50d Check Match", callback_data="check_match"), + InlineKeyboardButton("\U0001f50e Investigate", callback_data="investigate_trade")]] + ) + + # Extract original message text (strip any previous verdict) + full_text = query.message.text or "" + if MATCH_SEPARATOR in full_text: + original_text = full_text[: full_text.index(MATCH_SEPARATOR)].rstrip() + else: + original_text = full_text + + # Step 1 -- Show "checking" state + checking_text = ( + f"{original_text}\n\n" + f"{MATCH_SEPARATOR}\n" + f"\U0001f50d Checking\u2026 (fetching score from SofaScore)" + ) + if len(checking_text) > 4096: + checking_text = checking_text[:4093] + "..." + try: + await query.edit_message_text(checking_text, reply_markup=both_buttons) + except Exception as e: + logger.error("check_match: failed to edit message for checking state", error=str(e)) + return + + # Step 2 -- Extract player names and market from message + player_match = re.search( + r"([A-Z][a-z]+(?:[ ][A-Z][a-z]+)*)\s+vs\s+([A-Z][a-z]+(?:[ ][A-Z][a-z]+)*)", + original_text, + ) + market_match = re.search(r"Market:\s*(\S+)", original_text) + market_type = market_match.group(1) if market_match else "unknown" + + if not player_match: + verdict = "\u2753 UNKNOWN\nScore: unavailable\nReason: Could not parse player names from message." + await _edit_check_verdict(query, original_text, verdict, both_buttons) + return + + p1_name = player_match.group(1) + p2_name = player_match.group(2) + + # Step 3 -- Look up match in poly_dashboard DB to get sofa_id + db_path = os.path.expanduser("~/poly_dashboard/data/app.db") + sofa_id = None + match_id = None + try: + conn = sqlite3.connect(db_path, timeout=5) + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT sofa_id, match_id FROM monitored_matches " + "WHERE player1 LIKE ? AND player2 LIKE ? AND sofa_id IS NOT NULL " + "ORDER BY created_at DESC LIMIT 1", + (f"%{p1_name}%", f"%{p2_name}%"), + ).fetchone() + if not row: + # Try reversed player order + row = conn.execute( + "SELECT sofa_id, match_id FROM monitored_matches " + "WHERE player1 LIKE ? AND player2 LIKE ? AND sofa_id IS NOT NULL " + "ORDER BY created_at DESC LIMIT 1", + (f"%{p2_name}%", f"%{p1_name}%"), + ).fetchone() + if row: + sofa_id = row["sofa_id"] + match_id = row["match_id"] + conn2 = conn # keep open for step 4 + except Exception as e: + logger.error("check_match: DB lookup failed", error=str(e)) + + if not sofa_id or not match_id: + if hasattr(conn, 'close'): + conn.close() + verdict = f"\u2753 UNKNOWN\nScore: unavailable\nReason: No match found for {p1_name} vs {p2_name}." + await _edit_check_verdict(query, original_text, verdict, both_buttons) + return + + # Step 4 -- Fetch score from poly_dashboard DB (latest snapshot) + score_data = None + try: + snap = conn2.execute( + "SELECT s.p1_sets, s.p2_sets, s.p1_games, s.p2_games, " + "s.p1_point, s.p2_point, s.sets_json, s.status, s.captured_at, " + "m.player1, m.player2, m.status as match_status " + "FROM match_snapshots s " + "JOIN monitored_matches m ON s.match_id = m.match_id " + "WHERE s.match_id = ? ORDER BY s.captured_at DESC LIMIT 1", + (match_id,), + ).fetchone() + conn2.close() + if snap: + sets = [] + if snap["sets_json"]: + try: + sets_raw = _json.loads(snap["sets_json"]) + for s in sets_raw: + if isinstance(s, dict): + sets.append({"home": s.get("p1", s.get("home", 0)), "away": s.get("p2", s.get("away", 0))}) + elif isinstance(s, (list, tuple)) and len(s) >= 2: + sets.append({"home": s[0], "away": s[1]}) + except Exception: + pass + + point_score = "" + if snap["p1_point"] and snap["p2_point"]: + point_score = f"{snap['p1_point']}-{snap['p2_point']}" + + match_status = snap["match_status"] or snap["status"] or "unknown" + + score_data = { + "status": match_status, + "status_description": match_status, + "home_name": snap["player1"] or p1_name, + "away_name": snap["player2"] or p2_name, + "home_sets": snap["p1_sets"] or 0, + "away_sets": snap["p2_sets"] or 0, + "sets": sets, + "point_score": point_score, + "home_games": snap["p1_games"] or 0, + "away_games": snap["p2_games"] or 0, + } + except Exception as e: + logger.error("check_match: DB snapshot lookup failed", error=str(e)) + + if not score_data: + verdict = f"\u2753 UNKNOWN\nScore: unavailable\nReason: No snapshot data found for match {match_id}." + await _edit_check_verdict(query, original_text, verdict, both_buttons) + return + + # Step 5 -- Build score summary string + sets_str = " | ".join(f"{s['home']}-{s['away']}" for s in score_data["sets"]) + score_summary = ( + f"Status: {score_data['status']} ({score_data['status_description']})\n" + f"{score_data['home_name']} vs {score_data['away_name']}\n" + f"Sets: {score_data['home_sets']}-{score_data['away_sets']} ({sets_str})" + ) + if score_data.get("home_games") is not None: + score_summary += f"\nCurrent set games: {score_data['home_games']}-{score_data['away_games']}" + if score_data["point_score"]: + score_summary += f"\nCurrent game: {score_data['point_score']}" + + # Step 6 -- Use Claude (no tools) for assessment + prompt = ( + "You output EXACTLY 3 lines. No analysis, no reasoning, no markdown, no extra text.\n\n" + "TRADE:\n" + f"{original_text[:1500]}\n\n" + "LIVE SCORE:\n" + f"{score_summary}\n\n" + "Output these 3 lines and NOTHING else:\n" + "\n" + "Score: \n" + "Reason: \n\n" + "STATUS must be exactly one of: \u2705 WON | \u274c LOST | \u2705 WINNING | \u274c LOSING | \u2753 UNKNOWN\n" + "Determine the trade outcome by comparing the Market field against the live score.\n" + "If match finished: WON or LOST. If live: WINNING or LOSING. If unclear: UNKNOWN.\n\n" + "IMPORTANT: Your entire response must be exactly 3 lines. Do not explain your reasoning." + ) + + verdict = f"\u2753 UNKNOWN\nScore: {sets_str or 'unavailable'}\nReason: Assessment failed." + try: + claude_path = shutil.which("claude") or "/usr/bin/claude" + env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"} + env["CLAUDE_CODE_ENTRYPOINT"] = "cli" + result = subprocess.run( + [claude_path, "-p", prompt, "--allowedTools", "", "--max-turns", "1"], + capture_output=True, + text=True, + timeout=30, + env=env, + cwd="/home/ubuntu/poly_dashboard", + ) + if result.stdout.strip(): + raw = result.stdout.strip() + # Extract only the 3 verdict lines, skip any preamble + lines = raw.split("\n") + verdict_lines = [] + for ln in lines: + if any(ln.startswith(p) for p in ("\u2705", "\u274c", "\u2753", "Score:", "Reason:")): + verdict_lines.append(ln) + verdict = "\n".join(verdict_lines) if len(verdict_lines) >= 2 else raw + except subprocess.TimeoutExpired: + verdict = f"\u2753 UNKNOWN\nScore: {sets_str or 'unavailable'}\nReason: Claude timed out." + except Exception as e: + logger.error("check_match: claude -p failed", error=str(e)) + verdict = f"\u2753 UNKNOWN\nScore: {sets_str or 'unavailable'}\nReason: {str(e)[:80]}" + + await _edit_check_verdict(query, original_text, verdict, both_buttons) + + +async def _edit_check_verdict(query, original_text, verdict, reply_markup): + """Edit the message with the final check verdict.""" + now_utc = datetime.now(timezone.utc).strftime("%H:%M UTC") + safe_verdict = escape_html(verdict) + + final_text = ( + f"{original_text}\n\n" + f"{MATCH_SEPARATOR}\n" + f"\U0001f50d Check: {now_utc}\n" + f"{safe_verdict}" + ) + if len(final_text) > 4096: + overflow = len(final_text) - 4096 + 3 + final_text = ( + f"{original_text[:len(original_text) - overflow]}...\n\n" + f"{MATCH_SEPARATOR}\n" + f"\U0001f50d Check: {now_utc}\n" + f"{safe_verdict}" + ) + try: + await query.edit_message_text(final_text, reply_markup=reply_markup) + except Exception as e: + logger.error("check_match: failed to edit message with verdict", error=str(e)) + + +async def handle_investigate_trade_callback( + query, _param: str, context: ContextTypes.DEFAULT_TYPE +) -> None: + """Handle 'Investigate' button from trade fill notifications. + + Runs a deep claude -p analysis that queries the database, reads logs, + checks price evolution, finds similar historical trades, and evaluates + whether this trade pattern is reliably profitable. The response is sent + as a reply message (not edited into the original) because investigation + output is substantially longer than a quick check. + """ + from telegram import InlineKeyboardButton, InlineKeyboardMarkup + + # Keep both buttons on the original message + both_buttons = InlineKeyboardMarkup( + [ + [ + InlineKeyboardButton( + "\U0001f50d Check Match", callback_data="check_match" + ), + InlineKeyboardButton( + "\U0001f50e Investigate", callback_data="investigate_trade" + ), + ] + ] + ) + + # Step 1 -- Acknowledge and send placeholder reply + try: + await query.answer("\U0001f50e Investigating trade\u2026") + except Exception: + pass + + try: + placeholder = await query.message.reply_text( + "\U0001f50e Investigating trade\u2026\n\n" + "Analyzing price history, similar trades, and logs. " + "This takes 30\u201360 seconds.", + parse_mode="HTML", + ) + except Exception as e: + logger.error("investigate_trade: failed to send placeholder", error=str(e)) + return + + # Step 2 -- Extract trade context from original message text + message_text = query.message.text or "" + + # Step 3 -- Build the investigation prompt + prompt = ( + "You are a trade investigation analyst for a tennis betting system on Polymarket.\n\n" + "A trade just filled. Here is the notification:\n" + "---\n" + f"{message_text[:2000]}\n" + "---\n\n" + "Your job: deep-dive into this trade using the local database and logs, then report findings.\n\n" + "## Tools Available\n" + "- Run `sqlite3 /home/ubuntu/poly_dashboard/data/app.db \"\"` for database queries\n" + "- Run `grep` / `tail` on log files in `/home/ubuntu/poly_dashboard/data/logs/`\n" + "- Use web search to find the current match score if needed\n\n" + "## Investigation Steps\n\n" + "### 1. Trade Details\n" + "Query the trades table for this specific trade (match players + market type + most recent filled_at):\n" + "```sql\n" + "SELECT t.*, mm.player1, mm.player2, mm.status as match_status, mm.winner\n" + "FROM trades t JOIN monitored_matches mm ON mm.match_id = t.match_id\n" + "WHERE (mm.player1 LIKE '%%' OR mm.player2 LIKE '%%')\n" + "AND t.status NOT IN ('cancelled') ORDER BY t.filled_at DESC LIMIT 1;\n" + "```\n" + "Replace with the first player's last name from the notification.\n\n" + "### 2. Price & Model Evolution\n" + "Query match_snapshots for this match to see how price and model probability evolved:\n" + "```sql\n" + "SELECT captured_at, set1_score, set2_score, set3_score,\n" + " moneyline_p1, market_p1_mid,\n" + " total_sets_over, ts_over_mid\n" + "FROM match_snapshots WHERE match_id = \n" + "ORDER BY captured_at;\n" + "```\n" + "Summarize: when did the edge appear? Did it grow or shrink? " + "What was happening in the match at trade time?\n\n" + "### 3. Similar Historical Trades\n" + "Find trades with the same market_type, similar price range (+/-0.05), and similar edge:\n" + "```sql\n" + "SELECT t.id, mm.player1, mm.player2, t.market_type, t.price, t.edge, t.model_prob,\n" + " t.cost, t.pnl, t.status, t.created_at\n" + "FROM trades t JOIN monitored_matches mm ON mm.match_id = t.match_id\n" + "WHERE t.market_type = ''\n" + "AND t.status IN ('won','lost','redeemed','sold')\n" + "AND t.price BETWEEN AND \n" + "ORDER BY t.filled_at DESC LIMIT 30;\n" + "```\n" + "Report: X won, Y lost, win rate, total PnL from similar trades.\n\n" + "### 4. Configuration Check\n" + "Query trading_config to see current thresholds for this market:\n" + "```sql\n" + "SELECT market_edges, market_thresholds, market_gains, market_start_scores,\n" + " market_max_prices, market_min_prices, market_side_filters\n" + "FROM trading_config WHERE id = 1;\n" + "```\n" + "Note which filters were active and whether this trade was borderline.\n\n" + "### 5. Log Context\n" + "Check trader.log around the trade time for signal evaluation details:\n" + "```bash\n" + "grep -A2 -B2 \"\" /home/ubuntu/poly_dashboard/data/logs/trader.log | tail -40\n" + "```\n\n" + "### 6. Current Match State\n" + "Web search for the current score of this match.\n\n" + "## Output Format (strict -- follow exactly)\n\n" + "\U0001f50e Trade Investigation\n" + "\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\u2501\n\n" + "\U0001f4cb This Trade\n" + "[1-2 lines: what was bought, at what price/edge, match state at trade time]\n\n" + "\U0001f4c8 Price Evolution\n" + "[2-3 lines: how model prob and market price moved before/after trade]\n\n" + "\U0001f4ca Similar Trades\n" + "[market_type] at price ~[X]: [N] total | [W] won | [L] lost | Win rate: [%]\n" + "Total PnL from similar: $[X]\n" + "[1 line: pattern observation]\n\n" + "\u2699\ufe0f Config Analysis\n" + "[1-2 lines: current thresholds, whether this trade was borderline]\n\n" + "\U0001f3df\ufe0f Current Status\n" + "[1 line: current score or final result]\n\n" + "\U0001f4a1 Suggestions\n" + "[2-4 bullet points: concrete parameter changes or observations]\n\n" + "Keep the entire response under 3500 characters (Telegram message limit is 4096).\n" + "Use HTML formatting (, , ). Do NOT use markdown.\n" + "Be direct and data-driven. No filler text.\n" + ) + + # Step 4 -- Run claude -p with investigation prompt + verdict = None + try: + claude_path = shutil.which("claude") or "/usr/bin/claude" + env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"} + env["CLAUDE_CODE_ENTRYPOINT"] = "cli" + result = subprocess.run( + [ + claude_path, + "-p", + prompt, + "--allowedTools", + "Bash,WebSearch", + ], + capture_output=True, + text=True, + env=env, + ) + if result.stdout.strip(): + raw = result.stdout.strip() + # Extract only the 3 verdict lines, skip any preamble + lines = raw.split("\n") + verdict_lines = [] + for ln in lines: + if any(ln.startswith(p) for p in ("\u2705", "\u274c", "\u2753", "Score:", "Reason:")): + verdict_lines.append(ln) + verdict = "\n".join(verdict_lines) if len(verdict_lines) >= 2 else raw + except FileNotFoundError: + verdict = ( + "\U0001f50e Investigation failed \u2014 Claude CLI not available on this machine." + ) + except Exception as e: + logger.error("investigate_trade: claude -p failed", error=str(e)) + verdict = f"\U0001f50e Investigation error: {str(e)[:100]}" + + if not verdict: + verdict = "\U0001f50e No findings \u2014 Claude returned empty." + + # Truncate to stay within Telegram's 4096 char limit + if len(verdict) > 4000: + verdict = verdict[:3997] + "..." + + # Step 5 -- Edit placeholder with the actual result + try: + await placeholder.edit_text(verdict, parse_mode="HTML") + except Exception: + # HTML parse failed -- retry without parse_mode + try: + await placeholder.edit_text(verdict) + except Exception as e: + logger.error( + "investigate_trade: failed to edit placeholder", error=str(e) + ) + + def _format_file_size(size: int) -> str: """Format file size in human-readable format.""" for unit in ["B", "KB", "MB", "GB"]: diff --git a/src/bot/orchestrator.py b/src/bot/orchestrator.py index 1124d006..01d108de 100644 --- a/src/bot/orchestrator.py +++ b/src/bot/orchestrator.py @@ -319,6 +319,7 @@ def _register_agentic_handlers(self, app: Application) -> None: ("new", self.agentic_new), ("status", self.agentic_status), ("verbose", self.agentic_verbose), + ("voice", self.agentic_voice_toggle), ("repo", self.agentic_repo), ("restart", command.restart_command), ] @@ -452,6 +453,7 @@ async def get_bot_commands(self) -> list: # type: ignore[type-arg] BotCommand("new", "Start a fresh session"), BotCommand("status", "Show session status"), BotCommand("verbose", "Set output verbosity (0/1/2)"), + BotCommand("voice", "Toggle voice responses (on/off)"), BotCommand("repo", "List repos / switch workspace"), BotCommand("restart", "Restart the bot"), ] @@ -615,6 +617,47 @@ async def agentic_verbose( parse_mode="HTML", ) + async def agentic_voice_toggle( + self, update: Update, context: ContextTypes.DEFAULT_TYPE + ) -> None: + """Toggle voice responses: /voice [on|off].""" + if not self.settings.enable_voice_responses: + await update.message.reply_text( + "Voice responses are not enabled on this instance.", + parse_mode="HTML", + ) + return + + user_id = update.effective_user.id + storage = context.bot_data.get("storage") + args = update.message.text.split()[1:] if update.message.text else [] + + if not args: + enabled = await storage.users.get_voice_responses_enabled(user_id) + status = "on" if enabled else "off" + await update.message.reply_text( + f"Voice responses: {status}\n\n" + "Usage: /voice on or /voice off", + parse_mode="HTML", + ) + return + + arg = args[0].lower() + if arg not in ("on", "off"): + await update.message.reply_text( + "Please use: /voice on or /voice off", + parse_mode="HTML", + ) + return + + enabled = arg == "on" + await storage.users.set_voice_responses_enabled(user_id, enabled) + status = "enabled" if enabled else "disabled" + await update.message.reply_text( + f"Voice responses {status}", + parse_mode="HTML", + ) + def _format_verbose_progress( self, activity_log: List[Dict[str, Any]], @@ -905,6 +948,115 @@ async def _send_images( return caption_sent + async def _maybe_send_voice_response( + self, + update: Update, + context: ContextTypes.DEFAULT_TYPE, + response_text: str, + user_id: int, + voice_handler: Any, + ) -> bool: + """Try to send response as voice message. + + Returns True if voice was sent (caller should adjust text sending). + Returns False if voice was not sent (caller sends text as normal). + """ + if not self.settings.enable_voice_responses: + return False + + storage = context.bot_data.get("storage") + if not storage: + return False + + try: + enabled = await storage.users.get_voice_responses_enabled(user_id) + except Exception: + return False + + if not enabled: + return False + + if not voice_handler: + return False + + text_to_speak = response_text + is_long = len(response_text) > self.settings.voice_response_max_length + send_full_text = False + + if is_long: + # Summarize for spoken delivery + try: + claude_integration = context.bot_data.get("claude_integration") + if claude_integration: + summary_prompt = ( + "Summarize the following response in 2-3 sentences " + "suitable for being read aloud as a voice message. " + "Output ONLY the summary, nothing else.\n\n" + f"{response_text}" + ) + summary_response = await claude_integration.run_command( + prompt=summary_prompt, + working_directory=Path(self.settings.approved_directory), + user_id=user_id, + force_new=True, + ) + text_to_speak = summary_response.content or response_text + send_full_text = True + else: + # No Claude integration, truncate instead + text_to_speak = response_text[ + : self.settings.voice_response_max_length + ] + send_full_text = True + except Exception as exc: + logger.warning( + "Voice summary generation failed, falling back to text", + error=str(exc), + ) + return False + + try: + audio_bytes = await voice_handler.synthesize_speech(text_to_speak) + await update.message.reply_voice( + voice=audio_bytes, + reply_to_message_id=update.message.message_id, + ) + + if send_full_text: + # Long response: send full text alongside + from .utils.formatting import ResponseFormatter + + formatter = ResponseFormatter(self.settings) + formatted_messages = formatter.format_claude_response(response_text) + for message in formatted_messages: + if message.text and message.text.strip(): + try: + await update.message.reply_text( + message.text, + parse_mode=message.parse_mode, + reply_markup=None, + ) + except Exception: + await update.message.reply_text( + message.text, reply_markup=None + ) + return True + + except Exception as exc: + logger.warning( + "TTS failed, falling back to text", + error_type=type(exc).__name__, + error=str(exc), + ) + try: + await update.message.reply_text( + "(Audio unavailable, sent as text)", + reply_markup=None, + ) + except Exception: + pass + return False + async def agentic_text( self, update: Update, context: ContextTypes.DEFAULT_TYPE ) -> None: @@ -998,6 +1150,7 @@ async def agentic_text( heartbeat = self._start_typing_heartbeat(chat) success = True + response_content: Optional[str] = None try: claude_response = await claude_integration.run_command( prompt=message_text, @@ -1091,8 +1244,24 @@ async def agentic_text( except Exception as img_err: logger.warning("Image+caption send failed", error=str(img_err)) - # Send text messages (skip if caption was already embedded in photos) - if not caption_sent: + # Try voice response first (if enabled and user toggled on) + voice_sent = False + if not caption_sent and response_content: + features = context.bot_data.get("features") + voice_handler = features.get_voice_handler() if features else None + try: + voice_sent = await self._maybe_send_voice_response( + update=update, + context=context, + response_text=response_content, + user_id=user_id, + voice_handler=voice_handler, + ) + except Exception as voice_err: + logger.warning("Voice response attempt failed", error=str(voice_err)) + + # Send text messages (skip if caption or voice was already sent) + if not caption_sent and not voice_sent: for i, message in enumerate(formatted_messages): if not message.text or not message.text.strip(): continue @@ -1499,7 +1668,24 @@ async def _handle_agentic_media_message( except Exception as img_err: logger.warning("Image+caption send failed", error=str(img_err)) - if not caption_sent: + # Try voice response first (if enabled and user toggled on) + voice_sent = False + response_content = claude_response.content + if not caption_sent and response_content: + features = context.bot_data.get("features") + voice_handler = features.get_voice_handler() if features else None + try: + voice_sent = await self._maybe_send_voice_response( + update=update, + context=context, + response_text=response_content, + user_id=user_id, + voice_handler=voice_handler, + ) + except Exception as voice_err: + logger.warning("Voice response attempt failed", error=str(voice_err)) + + if not caption_sent and not voice_sent: for i, message in enumerate(formatted_messages): if not message.text or not message.text.strip(): continue diff --git a/src/claude/sdk_integration.py b/src/claude/sdk_integration.py index ab9c4046..4277d532 100644 --- a/src/claude/sdk_integration.py +++ b/src/claude/sdk_integration.py @@ -187,6 +187,20 @@ def _stderr_callback(line: str) -> None: path=str(claude_md_path), ) + # Inform Claude about voice response capabilities when enabled + if self.config.enable_voice_responses: + base_prompt += ( + "\n\n## Telegram Bot Capabilities\n" + "You are accessed via a Telegram bot. " + "Users can send voice messages which the bot transcribes to text before they reach you. " + "When the user has enabled voice responses (via /voice on), " + "the bot automatically converts your text responses to audio " + "using text-to-speech and sends them as Telegram voice messages. " + "Just respond normally in text — the bot handles audio conversion. " + "Do NOT tell users you cannot send voice messages. " + "When asked about voice capabilities, tell them to use /voice on or /voice off." + ) + # When DISABLE_TOOL_VALIDATION=true, pass None for allowed/disallowed # tools so the SDK does not restrict tool usage (e.g. MCP tools). if self.config.disable_tool_validation: diff --git a/src/config/features.py b/src/config/features.py index 03b54a86..ae109167 100644 --- a/src/config/features.py +++ b/src/config/features.py @@ -80,6 +80,13 @@ def voice_messages_enabled(self) -> bool: return self.settings.openai_api_key is not None return self.settings.mistral_api_key is not None + @property + def voice_responses_enabled(self) -> bool: + """Check if text-to-speech voice responses are enabled.""" + if not self.settings.enable_voice_responses: + return False + return self.settings.mistral_api_key is not None + @property def stream_drafts_enabled(self) -> bool: """Check if streaming drafts via sendMessageDraft is enabled.""" @@ -100,6 +107,7 @@ def is_feature_enabled(self, feature_name: str) -> bool: "scheduler": self.scheduler_enabled, "agentic_mode": self.agentic_mode_enabled, "voice_messages": self.voice_messages_enabled, + "voice_responses": self.voice_responses_enabled, "stream_drafts": self.stream_drafts_enabled, } return feature_map.get(feature_name, False) @@ -129,6 +137,8 @@ def get_enabled_features(self) -> list[str]: features.append("scheduler") if self.voice_messages_enabled: features.append("voice_messages") + if self.voice_responses_enabled: + features.append("voice_responses") if self.stream_drafts_enabled: features.append("stream_drafts") return features diff --git a/src/config/settings.py b/src/config/settings.py index 77c34ea4..777a5610 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -195,6 +195,30 @@ class Settings(BaseSettings): ge=1, le=200, ) + + # Voice response (TTS) settings + enable_voice_responses: bool = Field( + False, description="Enable text-to-speech voice responses" + ) + voice_response_model: str = Field( + "voxtral-mini-tts-2603", + description="Mistral TTS model for voice responses", + ) + voice_response_voice: str = Field( + "c69964a6-ab8b-4f8a-9465-ec0925096ec8", + description="Mistral TTS voice ID (UUID from /v1/audio/voices)", + ) + voice_response_format: str = Field( + "opus", + description="TTS output audio format (opus for Telegram voice compatibility)", + ) + voice_response_max_length: int = Field( + 2000, + description="Character threshold above which responses are summarized before TTS", + ge=100, + le=10000, + ) + enable_quick_actions: bool = Field(True, description="Enable quick action buttons") agentic_mode: bool = Field( True, diff --git a/src/storage/database.py b/src/storage/database.py index 3050e046..ae62ea62 100644 --- a/src/storage/database.py +++ b/src/storage/database.py @@ -310,6 +310,13 @@ def _get_migrations(self) -> List[Tuple[int, str]]: ON project_threads(project_slug); """, ), + ( + 5, + """ + -- Add voice response preference to users + ALTER TABLE users ADD COLUMN voice_responses_enabled BOOLEAN DEFAULT FALSE; + """, + ), ] async def _init_pool(self): diff --git a/src/storage/models.py b/src/storage/models.py index 001195b9..4cfa5e82 100644 --- a/src/storage/models.py +++ b/src/storage/models.py @@ -38,6 +38,7 @@ class UserModel: total_cost: float = 0.0 message_count: int = 0 session_count: int = 0 + voice_responses_enabled: bool = False def to_dict(self) -> Dict[str, Any]: """Convert to dictionary.""" diff --git a/src/storage/repositories.py b/src/storage/repositories.py index 02492b8e..a86fee2b 100644 --- a/src/storage/repositories.py +++ b/src/storage/repositories.py @@ -114,6 +114,30 @@ async def get_all_users(self) -> List[UserModel]: rows = await cursor.fetchall() return [UserModel.from_row(row) for row in rows] + async def get_voice_responses_enabled(self, user_id: int) -> bool: + """Get voice response preference for a user.""" + async with self.db.get_connection() as conn: + cursor = await conn.execute( + "SELECT voice_responses_enabled FROM users WHERE user_id = ?", + (user_id,), + ) + row = await cursor.fetchone() + return bool(row[0]) if row else False + + async def set_voice_responses_enabled(self, user_id: int, enabled: bool) -> None: + """Set voice response preference for a user.""" + async with self.db.get_connection() as conn: + await conn.execute( + "UPDATE users SET voice_responses_enabled = ? WHERE user_id = ?", + (enabled, user_id), + ) + await conn.commit() + logger.info( + "Updated voice response preference", + user_id=user_id, + enabled=enabled, + ) + class SessionRepository: """Session data access.""" diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 6b20c6fe..aba84363 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -678,3 +678,61 @@ def test_configuration_error_handling(): "APPROVED_DIRECTORY", ]: os.environ.pop(key, None) + + +def test_voice_response_settings_defaults(tmp_path): + """Voice response settings have correct defaults.""" + from src.config.settings import Settings + + project_dir = tmp_path / "projects" + project_dir.mkdir() + + config = Settings( + telegram_bot_token="test:token", + telegram_bot_username="testbot", + approved_directory=str(project_dir), + ) + assert config.enable_voice_responses is False + assert config.voice_response_model == "voxtral-mini-tts-2603" + assert config.voice_response_voice == "c69964a6-ab8b-4f8a-9465-ec0925096ec8" + assert config.voice_response_format == "opus" + assert config.voice_response_max_length == 2000 + + +def test_voice_responses_feature_flag_enabled(): + """voice_responses_enabled is True when enable_voice_responses and mistral_api_key set.""" + from unittest.mock import MagicMock + + from src.config.features import FeatureFlags + + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = MagicMock() # not None = key is set + flags = FeatureFlags(settings) + assert flags.voice_responses_enabled is True + + +def test_voice_responses_feature_flag_disabled_no_key(): + """voice_responses_enabled is False when mistral_api_key is None.""" + from unittest.mock import MagicMock + + from src.config.features import FeatureFlags + + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = None + flags = FeatureFlags(settings) + assert flags.voice_responses_enabled is False + + +def test_voice_responses_feature_flag_disabled_not_enabled(): + """voice_responses_enabled is False when enable_voice_responses is False.""" + from unittest.mock import MagicMock + + from src.config.features import FeatureFlags + + settings = MagicMock() + settings.enable_voice_responses = False + settings.mistral_api_key = MagicMock() + flags = FeatureFlags(settings) + assert flags.voice_responses_enabled is False diff --git a/tests/unit/test_orchestrator.py b/tests/unit/test_orchestrator.py index ce5e419e..793409a0 100644 --- a/tests/unit/test_orchestrator.py +++ b/tests/unit/test_orchestrator.py @@ -82,8 +82,8 @@ def deps(): } -def test_agentic_registers_6_commands(agentic_settings, deps): - """Agentic mode registers start, new, status, verbose, repo, restart commands.""" +def test_agentic_registers_7_commands(agentic_settings, deps): + """Agentic mode registers start, new, status, verbose, voice, repo, restart commands.""" orchestrator = MessageOrchestrator(agentic_settings, deps) app = MagicMock() app.add_handler = MagicMock() @@ -100,7 +100,7 @@ def test_agentic_registers_6_commands(agentic_settings, deps): ] commands = [h[0][0].commands for h in cmd_handlers] - assert len(cmd_handlers) == 6 + assert len(cmd_handlers) == 7 assert frozenset({"start"}) in commands assert frozenset({"new"}) in commands assert frozenset({"status"}) in commands @@ -156,13 +156,15 @@ def test_agentic_registers_text_document_photo_handlers(agentic_settings, deps): async def test_agentic_bot_commands(agentic_settings, deps): - """Agentic mode returns 6 bot commands.""" + """Agentic mode returns 7 bot commands.""" orchestrator = MessageOrchestrator(agentic_settings, deps) commands = await orchestrator.get_bot_commands() - assert len(commands) == 6 + assert len(commands) == 7 cmd_names = [c.command for c in commands] - assert cmd_names == ["start", "new", "status", "verbose", "repo", "restart"] + assert cmd_names == [ + "start", "new", "status", "verbose", "voice", "repo", "restart", + ] async def test_classic_bot_commands(classic_settings, deps): diff --git a/tests/unit/test_voice_command.py b/tests/unit/test_voice_command.py new file mode 100644 index 00000000..616c05b6 --- /dev/null +++ b/tests/unit/test_voice_command.py @@ -0,0 +1,92 @@ +"""Tests for /voice toggle command.""" + +from unittest.mock import AsyncMock, MagicMock + + +def _make_update_context(user_id=123, text="/voice"): + """Create mock Update and Context for command testing.""" + update = MagicMock() + update.effective_user.id = user_id + update.message.text = text + update.message.reply_text = AsyncMock() + update.message.message_id = 1 + + context = MagicMock() + context.bot_data = {} + context.user_data = {} + + storage = MagicMock() + storage.users = MagicMock() + context.bot_data["storage"] = storage + context.bot_data["features"] = MagicMock() + + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = MagicMock() # not None + + return update, context, storage, settings + + +async def test_voice_on_enables(): + """'/voice on' enables voice responses for the user.""" + update, context, storage, settings = _make_update_context(text="/voice on") + storage.users.set_voice_responses_enabled = AsyncMock() + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + storage.users.set_voice_responses_enabled.assert_called_once_with(123, True) + update.message.reply_text.assert_called_once() + reply_text = update.message.reply_text.call_args[0][0] + assert "on" in reply_text.lower() or "enabled" in reply_text.lower() + + +async def test_voice_off_disables(): + """'/voice off' disables voice responses for the user.""" + update, context, storage, settings = _make_update_context(text="/voice off") + storage.users.set_voice_responses_enabled = AsyncMock() + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + storage.users.set_voice_responses_enabled.assert_called_once_with(123, False) + update.message.reply_text.assert_called_once() + reply_text = update.message.reply_text.call_args[0][0] + assert "off" in reply_text.lower() or "disabled" in reply_text.lower() + + +async def test_voice_no_args_shows_status(): + """'/voice' with no args shows current status.""" + update, context, storage, settings = _make_update_context(text="/voice") + storage.users.get_voice_responses_enabled = AsyncMock(return_value=False) + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + storage.users.get_voice_responses_enabled.assert_called_once_with(123) + update.message.reply_text.assert_called_once() + + +async def test_voice_disabled_at_admin_level(): + """'/voice' when feature disabled at admin level shows unavailable message.""" + update, context, storage, settings = _make_update_context(text="/voice on") + settings.enable_voice_responses = False + + from src.bot.orchestrator import MessageOrchestrator + + deps = {"storage": storage} + orch = MessageOrchestrator(settings, deps) + await orch.agentic_voice_toggle(update, context) + + update.message.reply_text.assert_called_once() + reply_text = update.message.reply_text.call_args[0][0] + assert "not enabled" in reply_text.lower() diff --git a/tests/unit/test_voice_preference.py b/tests/unit/test_voice_preference.py new file mode 100644 index 00000000..ee06c098 --- /dev/null +++ b/tests/unit/test_voice_preference.py @@ -0,0 +1,56 @@ +"""Tests for voice response preference storage.""" + +import pytest + +from src.storage.database import DatabaseManager +from src.storage.models import UserModel +from src.storage.repositories import UserRepository + + +@pytest.fixture +async def db_manager(tmp_path): + """Create an in-memory database manager.""" + db_path = str(tmp_path / "test.db") + manager = DatabaseManager(f"sqlite:///{db_path}") + await manager.initialize() + yield manager + await manager.close() + + +@pytest.fixture +async def user_repo(db_manager): + """Create a UserRepository with initialized DB.""" + return UserRepository(db_manager) + + +async def test_get_voice_responses_default_false(user_repo): + """New users have voice_responses_enabled = False by default.""" + user = UserModel(user_id=123, telegram_username="testuser") + await user_repo.create_user(user) + result = await user_repo.get_voice_responses_enabled(123) + assert result is False + + +async def test_set_voice_responses_enabled(user_repo): + """Setting voice_responses_enabled to True persists.""" + user = UserModel(user_id=456, telegram_username="testuser2") + await user_repo.create_user(user) + await user_repo.set_voice_responses_enabled(456, True) + result = await user_repo.get_voice_responses_enabled(456) + assert result is True + + +async def test_set_voice_responses_disabled(user_repo): + """Setting voice_responses_enabled back to False persists.""" + user = UserModel(user_id=789, telegram_username="testuser3") + await user_repo.create_user(user) + await user_repo.set_voice_responses_enabled(789, True) + await user_repo.set_voice_responses_enabled(789, False) + result = await user_repo.get_voice_responses_enabled(789) + assert result is False + + +async def test_get_voice_responses_nonexistent_user(user_repo): + """Nonexistent user returns False.""" + result = await user_repo.get_voice_responses_enabled(999) + assert result is False diff --git a/tests/unit/test_voice_response_flow.py b/tests/unit/test_voice_response_flow.py new file mode 100644 index 00000000..886f33f5 --- /dev/null +++ b/tests/unit/test_voice_response_flow.py @@ -0,0 +1,147 @@ +"""Tests for voice response flow in orchestrator.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +def _make_orchestrator_with_voice(): + """Create orchestrator with voice responses enabled.""" + settings = MagicMock() + settings.enable_voice_responses = True + settings.mistral_api_key = MagicMock() + settings.voice_response_max_length = 2000 + settings.agentic_mode = True + + storage = MagicMock() + storage.users = MagicMock() + + deps = {"storage": storage} + + from src.bot.orchestrator import MessageOrchestrator + + orch = MessageOrchestrator(settings, deps) + return orch, storage + + +def _make_update_context(storage): + """Create mock Update and Context.""" + update = MagicMock() + update.effective_user.id = 123 + update.message.reply_voice = AsyncMock() + update.message.reply_text = AsyncMock() + update.message.message_id = 1 + + context = MagicMock() + context.bot_data = {"storage": storage} + return update, context + + +async def test_short_response_sends_voice(): + """Short response synthesizes and sends voice message.""" + orch, storage = _make_orchestrator_with_voice() + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=True) + + voice_handler = MagicMock() + voice_handler.synthesize_speech = AsyncMock(return_value=b"audio-data") + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text="Hello, this is a short response.", + user_id=123, + voice_handler=voice_handler, + ) + + assert result is True + voice_handler.synthesize_speech.assert_called_once_with( + "Hello, this is a short response." + ) + update.message.reply_voice.assert_called_once() + # Short responses: audio only, no text label + update.message.reply_text.assert_not_called() + + +async def test_voice_disabled_skips(): + """When user has voice off, returns False.""" + orch, storage = _make_orchestrator_with_voice() + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=False) + voice_handler = MagicMock() + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text="Some response", + user_id=123, + voice_handler=voice_handler, + ) + + assert result is False + voice_handler.synthesize_speech.assert_not_called() + + +async def test_long_response_summarizes_then_speaks(): + """Long response triggers summarization, sends audio of summary + full text.""" + orch, storage = _make_orchestrator_with_voice() + orch.settings.voice_response_max_length = 50 # Low threshold for testing + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=True) + + voice_handler = MagicMock() + voice_handler.synthesize_speech = AsyncMock(return_value=b"audio-data") + + # Mock Claude integration for summarization + mock_claude = MagicMock() + mock_summary_response = MagicMock() + mock_summary_response.content = "This is a brief summary." + mock_claude.run_command = AsyncMock(return_value=mock_summary_response) + context.bot_data["claude_integration"] = mock_claude + + long_text = "A" * 100 # Exceeds threshold of 50 + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text=long_text, + user_id=123, + voice_handler=voice_handler, + ) + + assert result is True + # Should synthesize the SUMMARY, not the full text + voice_handler.synthesize_speech.assert_called_once_with("This is a brief summary.") + # Should send voice + text messages (full text) + update.message.reply_voice.assert_called_once() + assert update.message.reply_text.call_count >= 1 # Full text sent + + +async def test_tts_failure_falls_back_to_text(): + """TTS failure returns False and sends fallback note.""" + orch, storage = _make_orchestrator_with_voice() + update, context = _make_update_context(storage) + + storage.users.get_voice_responses_enabled = AsyncMock(return_value=True) + + voice_handler = MagicMock() + voice_handler.synthesize_speech = AsyncMock( + side_effect=RuntimeError("TTS failed") + ) + + result = await orch._maybe_send_voice_response( + update=update, + context=context, + response_text="Some response", + user_id=123, + voice_handler=voice_handler, + ) + + assert result is False + # Should have sent a fallback note + update.message.reply_text.assert_called_once() + note = update.message.reply_text.call_args[0][0] + assert "audio unavailable" in note.lower() diff --git a/tests/unit/test_voice_tts.py b/tests/unit/test_voice_tts.py new file mode 100644 index 00000000..3352f5b9 --- /dev/null +++ b/tests/unit/test_voice_tts.py @@ -0,0 +1,70 @@ +"""Tests for VoiceHandler TTS synthesis.""" + +import base64 +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.bot.features.voice_handler import VoiceHandler + + +@pytest.fixture +def tts_config(): + """Create a mock config with TTS settings.""" + cfg = MagicMock() + cfg.voice_provider = "mistral" + cfg.mistral_api_key_str = "test-api-key" + cfg.voice_response_model = "voxtral-mini-tts-2603" + cfg.voice_response_voice = "c69964a6-ab8b-4f8a-9465-ec0925096ec8" + cfg.voice_response_format = "mp3" + cfg.resolved_voice_model = "voxtral-mini-latest" + cfg.voice_max_file_size_mb = 20 + cfg.voice_max_file_size_bytes = 20 * 1024 * 1024 + return cfg + + +@pytest.fixture +def voice_handler(tts_config): + return VoiceHandler(config=tts_config) + + +async def test_synthesize_speech_calls_mistral(voice_handler): + """synthesize_speech calls Mistral TTS REST API with correct params.""" + fake_audio = b"fake-audio-bytes" + fake_b64 = base64.b64encode(fake_audio).decode() + + mock_response = MagicMock() + mock_response.raise_for_status = MagicMock() + mock_response.json = MagicMock(return_value={"audio_data": fake_b64}) + + mock_client_instance = AsyncMock() + mock_client_instance.post = AsyncMock(return_value=mock_response) + mock_client_instance.__aenter__ = AsyncMock(return_value=mock_client_instance) + mock_client_instance.__aexit__ = AsyncMock(return_value=False) + + with patch("httpx.AsyncClient", return_value=mock_client_instance): + result = await voice_handler.synthesize_speech("Hello world") + + assert result == fake_audio + mock_client_instance.post.assert_called_once() + call_args = mock_client_instance.post.call_args + assert call_args[0][0] == "https://api.mistral.ai/v1/audio/speech" + payload = call_args[1]["json"] + assert payload["model"] == "voxtral-mini-tts-2603" + assert payload["voice_id"] == "c69964a6-ab8b-4f8a-9465-ec0925096ec8" + assert payload["input"] == "Hello world" + assert payload["response_format"] == "mp3" + assert call_args[1]["headers"]["Authorization"] == "Bearer test-api-key" + + +async def test_synthesize_speech_api_failure(voice_handler): + """synthesize_speech raises RuntimeError on API failure.""" + mock_client_instance = AsyncMock() + mock_client_instance.post = AsyncMock(side_effect=Exception("API down")) + mock_client_instance.__aenter__ = AsyncMock(return_value=mock_client_instance) + mock_client_instance.__aexit__ = AsyncMock(return_value=False) + + with patch("httpx.AsyncClient", return_value=mock_client_instance): + with pytest.raises(RuntimeError, match="Mistral TTS request failed"): + await voice_handler.synthesize_speech("Hello world")