From 767e0b918a59c91d4a2f6462199020b451fc8131 Mon Sep 17 00:00:00 2001 From: Mac Date: Tue, 7 Apr 2026 16:41:55 +0530 Subject: [PATCH 01/96] feat: add TTS with Audio Mode and Chat Mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements on-device text-to-speech using OuteTTS 0.3 (454 MB) + WavTokenizer (73 MB) via llama.rn, with react-native-audio-api for playback. Two interface modes (user-switchable from Settings): - Chat Mode: play/stop TTSButton on each assistant message bubble - Audio Mode: waveform bubbles with auto-TTS after streaming, transcript expand, speed cycling, and PCM audio persisted to disk per message for repeat playback New files: - src/constants/ttsModels.ts — model URLs, RAM thresholds, cache config - src/services/ttsService.ts — download, load, generate, persist, play - src/stores/ttsStore.ts — Zustand store with Chat + Audio Mode actions - src/hooks/useTTS.ts — convenience hook with RAM gate and weighted progress - src/components/TTSButton/index.tsx — Chat Mode play/stop per message - src/components/AudioMessageBubble/index.tsx — waveform bubble component - src/screens/TTSSettingsScreen/index.tsx — download, mode, speed, cache Modified: - Message type: audioPath, waveformData, audioDurationSeconds, isGeneratingAudio - ChatMessage: Audio Mode branch + TTSButton in meta row - SettingsScreen: Text to Speech nav row - Navigation: TTSSettings route - stores/index.ts, services/index.ts: exports Tests: 42 unit + integration tests covering service, store, and full flows Co-Authored-By: Claude Sonnet 4.6 --- __tests__/integration/stores/tts.test.ts | 194 +++++++++++ __tests__/unit/services/ttsService.test.ts | 294 +++++++++++++++++ __tests__/unit/stores/ttsStore.test.ts | 275 +++++++++++++++ package-lock.json | 29 ++ package.json | 1 + src/components/AudioMessageBubble/index.tsx | 247 ++++++++++++++ src/components/ChatMessage/index.tsx | 27 ++ src/components/TTSButton/index.tsx | 106 ++++++ src/constants/ttsModels.ts | 25 ++ src/hooks/useTTS.ts | 48 +++ src/navigation/AppNavigator.tsx | 2 + src/navigation/types.ts | 1 + src/screens/SettingsScreen.tsx | 1 + src/screens/TTSSettingsScreen/index.tsx | 349 ++++++++++++++++++++ src/screens/index.ts | 1 + src/services/index.ts | 2 + src/services/ttsService.ts | 326 ++++++++++++++++++ src/stores/index.ts | 2 + src/stores/ttsStore.ts | 243 ++++++++++++++ src/types/index.ts | 9 + 20 files changed, 2182 insertions(+) create mode 100644 __tests__/integration/stores/tts.test.ts create mode 100644 __tests__/unit/services/ttsService.test.ts create mode 100644 __tests__/unit/stores/ttsStore.test.ts create mode 100644 src/components/AudioMessageBubble/index.tsx create mode 100644 src/components/TTSButton/index.tsx create mode 100644 src/constants/ttsModels.ts create mode 100644 src/hooks/useTTS.ts create mode 100644 src/screens/TTSSettingsScreen/index.tsx create mode 100644 src/services/ttsService.ts create mode 100644 src/stores/ttsStore.ts diff --git a/__tests__/integration/stores/tts.test.ts b/__tests__/integration/stores/tts.test.ts new file mode 100644 index 00000000..e3c4e22c --- /dev/null +++ b/__tests__/integration/stores/tts.test.ts @@ -0,0 +1,194 @@ +/** + * TTS Integration Tests + * + * Tests the wiring between ttsStore and ttsService: + * - Chat Mode full flow: download → load → speak → stop + * - Audio Mode full flow: download → load → generateAndSave → playMessage → stop + * - Auto-play triggering in Chat Mode + * - Mode switching + */ + +jest.mock('../../../src/services/ttsService', () => ({ + ttsService: { + isBackboneDownloaded: jest.fn(), + isVocoderDownloaded: jest.fn(), + downloadBackbone: jest.fn(), + downloadVocoder: jest.fn(), + deleteModels: jest.fn(), + loadModels: jest.fn(), + unloadModels: jest.fn(), + speak: jest.fn(), + stop: jest.fn(), + generateAndSave: jest.fn(), + playFromFile: jest.fn(), + getAudioCacheSizeMB: jest.fn(), + clearAudioCache: jest.fn(), + }, +})); + +jest.mock('../../../src/utils/logger', () => ({ + __esModule: true, + default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() }, +})); + +import { useTTSStore } from '../../../src/stores/ttsStore'; +import { ttsService } from '../../../src/services/ttsService'; + +const mockTTS = ttsService as jest.Mocked; +const getState = () => useTTSStore.getState(); + +const resetStore = () => { + useTTSStore.setState({ + isBackboneDownloaded: false, + isVocoderDownloaded: false, + isDownloadingBackbone: false, + isDownloadingVocoder: false, + backboneDownloadProgress: 0, + vocoderDownloadProgress: 0, + isModelLoading: false, + isModelLoaded: false, + isSpeaking: false, + currentMessageId: null, + audioCacheSizeMB: 0, + settings: { interfaceMode: 'chat', enabled: true, autoPlay: false, speed: 1.0, voiceId: '0' }, + error: null, + }); +}; + +describe('TTS integration', () => { + beforeEach(() => { + resetStore(); + jest.clearAllMocks(); + mockTTS.getAudioCacheSizeMB.mockResolvedValue(0); + }); + + // ─── Chat Mode ──────────────────────────────────────────────────────────── + + describe('Chat Mode: download → load → speak → stop', () => { + it('completes the full Chat Mode flow', async () => { + // 1. Download + mockTTS.downloadBackbone.mockResolvedValue('/bb.gguf'); + mockTTS.downloadVocoder.mockResolvedValue('/voc.gguf'); + await getState().downloadModels(); + + expect(getState().isBackboneDownloaded).toBe(true); + expect(getState().isVocoderDownloaded).toBe(true); + + // 2. Load + mockTTS.loadModels.mockResolvedValue(undefined); + await getState().loadModels(); + expect(getState().isModelLoaded).toBe(true); + + // 3. Speak + mockTTS.speak.mockResolvedValue(undefined); + mockTTS.stop.mockReturnValue(undefined); + + const speakPromise = getState().speak('hello', 'msg1'); + expect(getState().isSpeaking).toBe(true); + expect(getState().currentMessageId).toBe('msg1'); + + await speakPromise; + expect(getState().isSpeaking).toBe(false); + expect(getState().currentMessageId).toBeNull(); + + // 4. Stop mid-speech + mockTTS.speak.mockImplementation( + () => new Promise((resolve) => setTimeout(resolve, 1000)), + ); + getState().speak('second', 'msg2'); + getState().stop(); + expect(getState().isSpeaking).toBe(false); + }); + }); + + // ─── Audio Mode ─────────────────────────────────────────────────────────── + + describe('Audio Mode: download → load → generateAndSave → playMessage → stop', () => { + beforeEach(() => { + useTTSStore.setState({ + settings: { interfaceMode: 'audio', enabled: true, autoPlay: false, speed: 1.0, voiceId: '0' }, + }); + }); + + it('completes the full Audio Mode flow', async () => { + // 1. Download + mockTTS.downloadBackbone.mockResolvedValue('/bb.gguf'); + mockTTS.downloadVocoder.mockResolvedValue('/voc.gguf'); + await getState().downloadModels(); + + // 2. Load + mockTTS.loadModels.mockResolvedValue(undefined); + await getState().loadModels(); + expect(getState().isModelLoaded).toBe(true); + + // 3. GenerateAndSave + const mockAudio = { + samples: new Float32Array(100), + durationSeconds: 1.5, + sampleRate: 24000, + waveformData: new Array(200).fill(0.2), + }; + mockTTS.generateAndSave.mockResolvedValue({ path: '/cache/c1/m1.pcm', audio: mockAudio } as any); + mockTTS.getAudioCacheSizeMB.mockResolvedValue(1.5); + + const result = await getState().generateAndSave('hello audio', 'conv1', 'msg1'); + + expect(result.path).toBe('/cache/c1/m1.pcm'); + expect(result.waveformData).toHaveLength(200); + expect(result.durationSeconds).toBe(1.5); + expect(getState().audioCacheSizeMB).toBeCloseTo(1.5); + + // 4. PlayMessage + mockTTS.playFromFile.mockResolvedValue(undefined); + mockTTS.stop.mockReturnValue(undefined); + + const playPromise = getState().playMessage('msg1', '/cache/c1/m1.pcm'); + expect(getState().isSpeaking).toBe(true); + expect(getState().currentMessageId).toBe('msg1'); + + await playPromise; + expect(getState().isSpeaking).toBe(false); + + // 5. StopPlayback + getState().stopPlayback(); + expect(mockTTS.stop).toHaveBeenCalled(); + }); + }); + + // ─── Mode switching ─────────────────────────────────────────────────────── + + describe('mode switching', () => { + it('switching interfaceMode to audio takes effect immediately', () => { + expect(getState().settings.interfaceMode).toBe('chat'); + getState().updateSettings({ interfaceMode: 'audio' }); + expect(getState().settings.interfaceMode).toBe('audio'); + }); + + it('switching back to chat mode works', () => { + getState().updateSettings({ interfaceMode: 'audio' }); + getState().updateSettings({ interfaceMode: 'chat' }); + expect(getState().settings.interfaceMode).toBe('chat'); + }); + }); + + // ─── Auto-play ──────────────────────────────────────────────────────────── + + describe('auto-play', () => { + it('speak is called when autoPlay is true and model is loaded', async () => { + useTTSStore.setState({ + isModelLoaded: true, + settings: { interfaceMode: 'chat', enabled: true, autoPlay: true, speed: 1.0, voiceId: '0' }, + }); + mockTTS.speak.mockResolvedValue(undefined); + mockTTS.stop.mockReturnValue(undefined); + + // Simulate chat completion triggering speak + await getState().speak('AI response text', 'last-msg-id'); + + expect(mockTTS.speak).toHaveBeenCalledWith( + 'AI response text', + expect.objectContaining({ voiceId: '0', speed: 1.0 }), + ); + }); + }); +}); diff --git a/__tests__/unit/services/ttsService.test.ts b/__tests__/unit/services/ttsService.test.ts new file mode 100644 index 00000000..4a7807c1 --- /dev/null +++ b/__tests__/unit/services/ttsService.test.ts @@ -0,0 +1,294 @@ +/** + * TTS Service Unit Tests + * + * Tests for backbone/vocoder download, model lifecycle, audio generation, + * file persistence, and playback control. + * Priority: P1 - Core TTS functionality. + */ + +jest.mock('llama.rn', () => ({ + initLlama: jest.fn(), +})); + +jest.mock('react-native-fs', () => ({ + DocumentDirectoryPath: '/mock/docs', + exists: jest.fn(), + mkdir: jest.fn(), + unlink: jest.fn(), + downloadFile: jest.fn(), + writeFile: jest.fn(), + readFile: jest.fn(), + stat: jest.fn(), +})); + +jest.mock('react-native-audio-api', () => ({ + AudioContext: jest.fn().mockImplementation(() => ({ + createBuffer: jest.fn().mockReturnValue({ copyToChannel: jest.fn() }), + createBufferSource: jest.fn().mockReturnValue({ + connect: jest.fn(), + start: jest.fn(), + stop: jest.fn(), + playbackRate: { value: 1.0 }, + onended: null, + buffer: null, + }), + destination: {}, + close: jest.fn(), + })), +})); + +jest.mock('../../../src/utils/logger', () => ({ + __esModule: true, + default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() }, +})); + +import RNFS from 'react-native-fs'; +import { initLlama } from 'llama.rn'; +import { ttsService } from '../../../src/services/ttsService'; +import { TTS_BACKBONE_MODEL } from '../../../src/constants/ttsModels'; + +const mockRNFS = RNFS as jest.Mocked; +const mockInitLlama = initLlama as jest.Mock; + +const makeMockContext = (vocoderEnabled = true) => ({ + initVocoder: jest.fn().mockResolvedValue(undefined), + isVocoderEnabled: jest.fn().mockResolvedValue(vocoderEnabled), + releaseVocoder: jest.fn().mockResolvedValue(undefined), + release: jest.fn().mockResolvedValue(undefined), + getFormattedAudioCompletion: jest.fn().mockResolvedValue({ prompt: 'p', grammar: 'g' }), + getAudioCompletionGuideTokens: jest.fn().mockResolvedValue([1, 2, 3]), + completion: jest.fn().mockResolvedValue({ audio_tokens: [10, 20, 30] }), + decodeAudioTokens: jest.fn().mockResolvedValue(new Array(2400).fill(0.1)), +}); + +describe('ttsService', () => { + beforeEach(() => { + jest.clearAllMocks(); + // Reset internal state between tests + (ttsService as any).context = null; + (ttsService as any).isVocoderReady = false; + (ttsService as any).isSpeakingFlag = false; + (ttsService as any).contextLoadPromise = Promise.resolve(); + }); + + // ─── Paths ──────────────────────────────────────────────────────────────── + + describe('paths', () => { + it('backbone path uses tts-models directory', () => { + expect(ttsService.getBackbonePath()).toBe( + `/mock/docs/tts-models/${TTS_BACKBONE_MODEL.backboneFile}`, + ); + }); + + it('vocoder path uses tts-models directory', () => { + expect(ttsService.getVocoderPath()).toBe( + `/mock/docs/tts-models/${TTS_BACKBONE_MODEL.vocoderFile}`, + ); + }); + + it('audio file path scoped to conversationId and messageId', () => { + expect(ttsService.getAudioFilePath('conv1', 'msg1')).toBe( + '/mock/docs/audio-cache/conv1/msg1.pcm', + ); + }); + }); + + // ─── Download ──────────────────────────────────────────────────────────── + + describe('downloadBackbone', () => { + it('returns existing path without downloading if already present', async () => { + mockRNFS.exists.mockResolvedValueOnce(true) // ensureDir + .mockResolvedValueOnce(true); // file exists + const path = await ttsService.downloadBackbone(); + expect(mockRNFS.downloadFile).not.toHaveBeenCalled(); + expect(path).toBe(ttsService.getBackbonePath()); + }); + + it('downloads and returns path on success', async () => { + mockRNFS.exists.mockResolvedValueOnce(false) // dir missing + .mockResolvedValueOnce(false); // file missing + mockRNFS.mkdir.mockResolvedValueOnce(undefined); + mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 200, jobId: 1, bytesWritten: 0 }) }); + + const onProgress = jest.fn(); + const path = await ttsService.downloadBackbone(onProgress); + + expect(mockRNFS.downloadFile).toHaveBeenCalledWith( + expect.objectContaining({ fromUrl: TTS_BACKBONE_MODEL.backboneUrl }), + ); + expect(path).toBe(ttsService.getBackbonePath()); + }); + + it('throws and removes partial file on non-200 response', async () => { + mockRNFS.exists.mockResolvedValue(false); + mockRNFS.mkdir.mockResolvedValueOnce(undefined); + mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 404, jobId: 1, bytesWritten: 0 }) }); + mockRNFS.unlink.mockResolvedValue(undefined); + + await expect(ttsService.downloadBackbone()).rejects.toThrow('HTTP 404'); + expect(mockRNFS.unlink).toHaveBeenCalled(); + }); + }); + + describe('downloadVocoder', () => { + it('downloads vocoder to correct path', async () => { + mockRNFS.exists.mockResolvedValue(false); + mockRNFS.mkdir.mockResolvedValueOnce(undefined); + mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 200, jobId: 1, bytesWritten: 0 }) }); + + const path = await ttsService.downloadVocoder(); + expect(mockRNFS.downloadFile).toHaveBeenCalledWith( + expect.objectContaining({ fromUrl: TTS_BACKBONE_MODEL.vocoderUrl }), + ); + expect(path).toBe(ttsService.getVocoderPath()); + }); + }); + + // ─── Model Lifecycle ───────────────────────────────────────────────────── + + describe('loadModels', () => { + it('calls initLlama with backbone path then initVocoder', async () => { + const ctx = makeMockContext(); + mockInitLlama.mockResolvedValue(ctx); + + await ttsService.loadModels(); + + expect(mockInitLlama).toHaveBeenCalledWith( + expect.objectContaining({ model: ttsService.getBackbonePath() }), + ); + expect(ctx.initVocoder).toHaveBeenCalledWith( + expect.objectContaining({ path: ttsService.getVocoderPath() }), + ); + }); + + it('throws if isVocoderEnabled returns false', async () => { + const ctx = makeMockContext(false); + mockInitLlama.mockResolvedValue(ctx); + + await expect(ttsService.loadModels()).rejects.toThrow('Vocoder failed to initialize'); + }); + + it('is idempotent — does not double-init if already loaded', async () => { + const ctx = makeMockContext(); + mockInitLlama.mockResolvedValue(ctx); + + await ttsService.loadModels(); + await ttsService.loadModels(); + + expect(mockInitLlama).toHaveBeenCalledTimes(1); + }); + }); + + describe('unloadModels', () => { + it('calls releaseVocoder and release', async () => { + const ctx = makeMockContext(); + mockInitLlama.mockResolvedValue(ctx); + await ttsService.loadModels(); + + await ttsService.unloadModels(); + + expect(ctx.releaseVocoder).toHaveBeenCalled(); + expect(ctx.release).toHaveBeenCalled(); + expect(ttsService.isLoaded()).toBe(false); + }); + }); + + // ─── Generation ────────────────────────────────────────────────────────── + + describe('generate', () => { + it('calls completion pipeline in correct order and returns GeneratedAudio', async () => { + const ctx = makeMockContext(); + mockInitLlama.mockResolvedValue(ctx); + await ttsService.loadModels(); + + const audio = await ttsService.generate('hello world'); + + expect(ctx.getFormattedAudioCompletion).toHaveBeenCalled(); + expect(ctx.getAudioCompletionGuideTokens).toHaveBeenCalledWith('hello world'); + expect(ctx.completion).toHaveBeenCalled(); + expect(ctx.decodeAudioTokens).toHaveBeenCalled(); + + expect(audio.samples).toBeInstanceOf(Float32Array); + expect(audio.waveformData).toHaveLength(200); + expect(audio.durationSeconds).toBeGreaterThan(0); + expect(audio.sampleRate).toBe(TTS_BACKBONE_MODEL.sampleRate); + }); + + it('throws if models not loaded', async () => { + await expect(ttsService.generate('test')).rejects.toThrow('TTS models not loaded'); + }); + }); + + describe('saveToFile', () => { + it('writes base64-encoded PCM to correct path', async () => { + mockRNFS.exists.mockResolvedValue(false); + mockRNFS.mkdir.mockResolvedValueOnce(undefined); + mockRNFS.writeFile.mockResolvedValueOnce(undefined); + + const audio = { + samples: new Float32Array([0.1, 0.2, 0.3]), + durationSeconds: 0.01, + sampleRate: 24000, + waveformData: new Array(200).fill(0.1), + }; + + const path = await ttsService.saveToFile(audio, 'conv1', 'msg1'); + + expect(path).toBe('/mock/docs/audio-cache/conv1/msg1.pcm'); + expect(mockRNFS.writeFile).toHaveBeenCalledWith( // eslint-disable-line @typescript-eslint/no-unsafe-call + '/mock/docs/audio-cache/conv1/msg1.pcm', + expect.any(String), + 'base64', + ); + }); + }); + + // ─── Stop ──────────────────────────────────────────────────────────────── + + describe('stop', () => { + it('sets isSpeakingFlag to false', () => { + (ttsService as any).isSpeakingFlag = true; + ttsService.stop(); + expect(ttsService.isSpeaking()).toBe(false); + }); + + it('calls stop on currentSource', () => { + const mockSource = { stop: jest.fn() }; + (ttsService as any).currentSource = mockSource; + ttsService.stop(); + expect(mockSource.stop).toHaveBeenCalled(); + }); + }); + + // ─── Cache ──────────────────────────────────────────────────────────────── + + describe('getAudioCacheSizeMB', () => { + it('returns 0 if cache directory does not exist', async () => { + mockRNFS.exists.mockResolvedValueOnce(false); + const size = await ttsService.getAudioCacheSizeMB(); + expect(size).toBe(0); + }); + + it('returns size in MB', async () => { + mockRNFS.exists.mockResolvedValueOnce(true); + mockRNFS.stat.mockResolvedValueOnce({ size: 5 * 1024 * 1024 } as any); + const size = await ttsService.getAudioCacheSizeMB(); + expect(size).toBeCloseTo(5); + }); + }); + + describe('clearAudioCache', () => { + it('unlinks the cache root if it exists', async () => { + mockRNFS.exists.mockResolvedValueOnce(true); + mockRNFS.unlink.mockResolvedValueOnce(undefined); + await ttsService.clearAudioCache(); + expect(mockRNFS.unlink).toHaveBeenCalledWith('/mock/docs/audio-cache'); + }); + + it('does nothing if cache does not exist', async () => { + mockRNFS.exists.mockResolvedValueOnce(false); + await ttsService.clearAudioCache(); + expect(mockRNFS.unlink).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/__tests__/unit/stores/ttsStore.test.ts b/__tests__/unit/stores/ttsStore.test.ts new file mode 100644 index 00000000..649738e4 --- /dev/null +++ b/__tests__/unit/stores/ttsStore.test.ts @@ -0,0 +1,275 @@ +/** + * TTS Store Unit Tests + * + * Tests for download state, model lifecycle, Chat Mode speak/stop, + * Audio Mode generateAndSave/playMessage, and settings persistence. + * Priority: P1 - Core TTS state management. + */ + +jest.mock('../../../src/services/ttsService', () => ({ + ttsService: { + isBackboneDownloaded: jest.fn(), + isVocoderDownloaded: jest.fn(), + downloadBackbone: jest.fn(), + downloadVocoder: jest.fn(), + deleteModels: jest.fn(), + loadModels: jest.fn(), + unloadModels: jest.fn(), + speak: jest.fn(), + stop: jest.fn(), + generateAndSave: jest.fn(), + playFromFile: jest.fn(), + getAudioCacheSizeMB: jest.fn(), + clearAudioCache: jest.fn(), + }, +})); + +jest.mock('../../../src/utils/logger', () => ({ + __esModule: true, + default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() }, +})); + +import { useTTSStore } from '../../../src/stores/ttsStore'; +import { ttsService } from '../../../src/services/ttsService'; + +const mockTTSService = ttsService as jest.Mocked; +const getState = () => useTTSStore.getState(); + +const resetState = () => { + useTTSStore.setState({ + isBackboneDownloaded: false, + isVocoderDownloaded: false, + isDownloadingBackbone: false, + isDownloadingVocoder: false, + backboneDownloadProgress: 0, + vocoderDownloadProgress: 0, + isModelLoading: false, + isModelLoaded: false, + isSpeaking: false, + currentMessageId: null, + audioCacheSizeMB: 0, + settings: { + interfaceMode: 'chat', + enabled: true, + autoPlay: false, + speed: 1.0, + voiceId: '0', + }, + error: null, + }); +}; + +describe('ttsStore', () => { + beforeEach(() => { + resetState(); + jest.clearAllMocks(); + }); + + // ─── Download ───────────────────────────────────────────────────────────── + + describe('checkDownloadStatus', () => { + it('reflects backbone and vocoder download state', async () => { + mockTTSService.isBackboneDownloaded.mockResolvedValue(true); + mockTTSService.isVocoderDownloaded.mockResolvedValue(false); + + await getState().checkDownloadStatus(); + + expect(getState().isBackboneDownloaded).toBe(true); + expect(getState().isVocoderDownloaded).toBe(false); + }); + }); + + describe('downloadModels', () => { + it('sets progress states and marks both downloaded on success', async () => { + mockTTSService.downloadBackbone.mockImplementation(async (onProgress) => { + onProgress?.(0.5); + onProgress?.(1.0); + return '/path/backbone'; + }); + mockTTSService.downloadVocoder.mockImplementation(async (onProgress) => { + onProgress?.(1.0); + return '/path/vocoder'; + }); + + await getState().downloadModels(); + + const state = getState(); + expect(state.isBackboneDownloaded).toBe(true); + expect(state.isVocoderDownloaded).toBe(true); + expect(state.isDownloadingBackbone).toBe(false); + expect(state.isDownloadingVocoder).toBe(false); + expect(state.error).toBeNull(); + }); + + it('sets error and resets downloading flags on failure', async () => { + mockTTSService.downloadBackbone.mockRejectedValue(new Error('network error')); + + await getState().downloadModels(); + + const state = getState(); + expect(state.error).toBe('network error'); + expect(state.isDownloadingBackbone).toBe(false); + expect(state.isDownloadingVocoder).toBe(false); + }); + }); + + // ─── Model lifecycle ───────────────────────────────────────────────────── + + describe('loadModels', () => { + it('sets isModelLoaded on success', async () => { + mockTTSService.loadModels.mockResolvedValue(undefined); + await getState().loadModels(); + expect(getState().isModelLoaded).toBe(true); + expect(getState().isModelLoading).toBe(false); + }); + + it('sets error on failure', async () => { + mockTTSService.loadModels.mockRejectedValue(new Error('OOM')); + await getState().loadModels(); + expect(getState().error).toBe('OOM'); + expect(getState().isModelLoaded).toBe(false); + }); + + it('is a no-op if already loaded', async () => { + useTTSStore.setState({ isModelLoaded: true }); + await getState().loadModels(); + expect(mockTTSService.loadModels).not.toHaveBeenCalled(); + }); + }); + + // ─── Chat Mode ──────────────────────────────────────────────────────────── + + describe('speak', () => { + beforeEach(() => { + useTTSStore.setState({ isModelLoaded: true }); + }); + + it('sets isSpeaking true then false after completion', async () => { + mockTTSService.speak.mockResolvedValue(undefined); + mockTTSService.stop.mockReturnValue(undefined); + + const speaking: boolean[] = []; + const unsubscribe = useTTSStore.subscribe((s) => speaking.push(s.isSpeaking)); + + await getState().speak('hello', 'msg1'); + + unsubscribe(); + expect(speaking).toContain(true); + expect(getState().isSpeaking).toBe(false); + }); + + it('stops speaking the same message when called again', async () => { + useTTSStore.setState({ isSpeaking: true, currentMessageId: 'msg1' }); + mockTTSService.stop.mockReturnValue(undefined); + + await getState().speak('hello', 'msg1'); + + expect(mockTTSService.stop).toHaveBeenCalled(); + expect(mockTTSService.speak).not.toHaveBeenCalled(); + }); + + it('does nothing if TTS disabled', async () => { + useTTSStore.setState({ settings: { ...getState().settings, enabled: false } }); + await getState().speak('hello', 'msg1'); + expect(mockTTSService.speak).not.toHaveBeenCalled(); + }); + + it('does nothing if model not loaded', async () => { + useTTSStore.setState({ isModelLoaded: false }); + await getState().speak('hello', 'msg1'); + expect(mockTTSService.speak).not.toHaveBeenCalled(); + }); + }); + + // ─── Audio Mode ─────────────────────────────────────────────────────────── + + describe('generateAndSave', () => { + it('returns path, waveformData, durationSeconds and refreshes cache', async () => { + const mockAudio = { + samples: new Float32Array(100), + durationSeconds: 2.5, + sampleRate: 24000, + waveformData: new Array(200).fill(0.1), + }; + mockTTSService.generateAndSave.mockResolvedValue({ + path: '/cache/conv1/msg1.pcm', + audio: mockAudio, + }); + mockTTSService.getAudioCacheSizeMB.mockResolvedValue(3.2); + + const result = await getState().generateAndSave('hello', 'conv1', 'msg1'); + + expect(result.path).toBe('/cache/conv1/msg1.pcm'); + expect(result.waveformData).toHaveLength(200); + expect(result.durationSeconds).toBe(2.5); + expect(getState().audioCacheSizeMB).toBeCloseTo(3.2); + }); + }); + + describe('playMessage', () => { + it('sets isSpeaking true during playback then false after', async () => { + mockTTSService.stop.mockReturnValue(undefined); + mockTTSService.playFromFile.mockResolvedValue(undefined); + + const speaking: boolean[] = []; + const unsubscribe = useTTSStore.subscribe((s) => speaking.push(s.isSpeaking)); + + await getState().playMessage('msg1', '/cache/conv1/msg1.pcm'); + + unsubscribe(); + expect(speaking).toContain(true); + expect(getState().isSpeaking).toBe(false); + }); + + it('stops if same message is already playing', async () => { + useTTSStore.setState({ isSpeaking: true, currentMessageId: 'msg1' }); + mockTTSService.stop.mockReturnValue(undefined); + + await getState().playMessage('msg1', '/cache/conv1/msg1.pcm'); + + expect(mockTTSService.stop).toHaveBeenCalled(); + expect(mockTTSService.playFromFile).not.toHaveBeenCalled(); + }); + }); + + // ─── Settings ───────────────────────────────────────────────────────────── + + describe('updateSettings', () => { + it('merges partial settings correctly', () => { + getState().updateSettings({ speed: 1.5, autoPlay: true }); + const { settings } = getState(); + expect(settings.speed).toBe(1.5); + expect(settings.autoPlay).toBe(true); + // Other fields untouched + expect(settings.enabled).toBe(true); + expect(settings.voiceId).toBe('0'); + }); + + it('can switch interfaceMode', () => { + getState().updateSettings({ interfaceMode: 'audio' }); + expect(getState().settings.interfaceMode).toBe('audio'); + }); + }); + + describe('clearError', () => { + it('clears the error field', () => { + useTTSStore.setState({ error: 'something went wrong' }); + getState().clearError(); + expect(getState().error).toBeNull(); + }); + }); + + // ─── Cache ──────────────────────────────────────────────────────────────── + + describe('clearAudioCache', () => { + it('calls ttsService.clearAudioCache and resets size', async () => { + useTTSStore.setState({ audioCacheSizeMB: 10 }); + mockTTSService.clearAudioCache.mockResolvedValue(undefined); + + await getState().clearAudioCache(); + + expect(mockTTSService.clearAudioCache).toHaveBeenCalled(); + expect(getState().audioCacheSizeMB).toBe(0); + }); + }); +}); diff --git a/package-lock.json b/package-lock.json index 2a097a8b..4671b895 100644 --- a/package-lock.json +++ b/package-lock.json @@ -31,6 +31,7 @@ "patch-package": "^8.0.1", "react": "19.2.0", "react-native": "0.83.1", + "react-native-audio-api": "^0.11.7", "react-native-device-info": "^15.0.1", "react-native-fs": "^2.20.0", "react-native-gesture-handler": "^2.30.0", @@ -12220,6 +12221,34 @@ } } }, + "node_modules/react-native-audio-api": { + "version": "0.11.7", + "resolved": "https://registry.npmjs.org/react-native-audio-api/-/react-native-audio-api-0.11.7.tgz", + "integrity": "sha512-2oIoP77Tn2nlouRVfEC3bAsuSyKU6xhGNkSnVXTLLQQZslEDoYX2cN9pVRZoWOqhFrLT8q4IZI9HaFgYL13L1A==", + "license": "MIT", + "dependencies": { + "semver": "^7.7.3" + }, + "bin": { + "setup-rn-audio-api-web": "scripts/setup-rn-audio-api-web.js" + }, + "peerDependencies": { + "react": "*", + "react-native": "*" + } + }, + "node_modules/react-native-audio-api/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/react-native-device-info": { "version": "15.0.1", "resolved": "https://registry.npmjs.org/react-native-device-info/-/react-native-device-info-15.0.1.tgz", diff --git a/package.json b/package.json index 873a1957..7236881c 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,7 @@ "patch-package": "^8.0.1", "react": "19.2.0", "react-native": "0.83.1", + "react-native-audio-api": "^0.11.7", "react-native-device-info": "^15.0.1", "react-native-fs": "^2.20.0", "react-native-gesture-handler": "^2.30.0", diff --git a/src/components/AudioMessageBubble/index.tsx b/src/components/AudioMessageBubble/index.tsx new file mode 100644 index 00000000..e93f8c0c --- /dev/null +++ b/src/components/AudioMessageBubble/index.tsx @@ -0,0 +1,247 @@ +import React, { useState, useCallback } from 'react'; +import { + View, + Text, + TouchableOpacity, + ActivityIndicator, + StyleSheet, +} from 'react-native'; +import Icon from 'react-native-vector-icons/Feather'; +import { useTheme, useThemedStyles } from '../../theme'; +import { useTTSStore } from '../../stores/ttsStore'; +import { TYPOGRAPHY, SPACING } from '../../constants'; +import type { ThemeColors, ThemeShadows } from '../../theme'; + +const WAVEFORM_BARS = 40; // number of bars to display (subset of 200 data points) +const SPEED_STEPS: number[] = [0.5, 1.0, 1.5, 2.0]; + +interface AudioMessageBubbleProps { + messageId: string; + audioPath: string; + waveformData: number[]; + durationSeconds: number; + /** Optional plain-text transcript to show when user expands */ + transcript?: string; + isGenerating?: boolean; +} + +function formatDuration(seconds: number): string { + const m = Math.floor(seconds / 60); + const s = Math.floor(seconds % 60); + return `${m}:${s.toString().padStart(2, '0')}`; +} + +function subsample(data: number[], count: number): number[] { + if (data.length === 0) { + return Array(count).fill(0.1); + } + const step = data.length / count; + const result: number[] = []; + for (let i = 0; i < count; i++) { + result.push(data[Math.floor(i * step)] ?? 0.1); + } + return result; +} + +function normalize(data: number[]): number[] { + const max = Math.max(...data, 0.001); + return data.map((v) => v / max); +} + +const WaveformBars: React.FC<{ + data: number[]; + colors: ThemeColors; +}> = ({ data, colors }) => { + const bars = normalize(subsample(data, WAVEFORM_BARS)); + return ( + + {bars.map((amp, i) => { + const height = Math.max(3, Math.round(amp * 28)); + return ( + + ); + })} + + ); +}; + +const barStyles = StyleSheet.create({ + container: { + flexDirection: 'row', + alignItems: 'center', + gap: 2, + height: 32, + }, + bar: { + width: 3, + borderRadius: 2, + }, +}); + +export const AudioMessageBubble: React.FC = ({ + messageId, + audioPath, + waveformData, + durationSeconds, + transcript, + isGenerating, +}) => { + const { colors } = useTheme(); + const styles = useThemedStyles(createStyles); + const { isSpeaking, currentMessageId, settings, playMessage, stopPlayback, updateSettings } = + useTTSStore(); + + const [showTranscript, setShowTranscript] = useState(false); + const initialSpeedIdx = SPEED_STEPS.indexOf(settings.speed); + const [speedIndex, setSpeedIndex] = useState(initialSpeedIdx >= 0 ? initialSpeedIdx : 1); + + const isThisPlaying = isSpeaking && currentMessageId === messageId; + + const handlePlayPause = useCallback(() => { + if (isThisPlaying) { + stopPlayback(); + return; + } + playMessage(messageId, audioPath); + }, [isThisPlaying, stopPlayback, playMessage, messageId, audioPath]); + + const handleSpeedCycle = useCallback(() => { + const next = (speedIndex + 1) % SPEED_STEPS.length; + setSpeedIndex(next); + updateSettings({ speed: SPEED_STEPS[next] }); + }, [speedIndex, updateSettings]); + + if (isGenerating) { + return ( + + + Generating audio... + + ); + } + + return ( + + {/* Playback row */} + + + + + + + + {formatDuration(durationSeconds)} + + + {SPEED_STEPS[speedIndex]}x + + + + {/* Transcript toggle */} + {transcript ? ( + setShowTranscript((v) => !v)} + style={styles.transcriptToggle} + > + + {showTranscript ? 'Hide transcript' : 'Show transcript'} + + + + ) : null} + + {showTranscript && transcript ? ( + {transcript} + ) : null} + + ); +}; + +const createStyles = (colors: ThemeColors, _shadows: ThemeShadows) => ({ + bubble: { + backgroundColor: colors.surface, + borderRadius: 12, + borderWidth: 1, + borderColor: colors.border, + padding: SPACING.md, + maxWidth: '80%' as const, + alignSelf: 'flex-start' as const, + gap: SPACING.sm, + }, + generatingText: { + ...TYPOGRAPHY.meta, + color: colors.textMuted, + marginLeft: SPACING.sm, + }, + playRow: { + flexDirection: 'row' as const, + alignItems: 'center' as const, + gap: SPACING.sm, + }, + playButton: { + width: 28, + height: 28, + borderRadius: 14, + backgroundColor: `${colors.primary}20`, + alignItems: 'center' as const, + justifyContent: 'center' as const, + }, + duration: { + ...TYPOGRAPHY.meta, + color: colors.textMuted, + minWidth: 32, + textAlign: 'right' as const, + }, + speedChip: { + backgroundColor: colors.surfaceLight, + borderRadius: 6, + paddingHorizontal: SPACING.xs, + paddingVertical: 2, + borderWidth: 1, + borderColor: colors.border, + }, + speedText: { + ...TYPOGRAPHY.metaSmall, + color: colors.textSecondary, + }, + transcriptToggle: { + flexDirection: 'row' as const, + alignItems: 'center' as const, + gap: SPACING.xs, + }, + transcriptToggleText: { + ...TYPOGRAPHY.meta, + color: colors.textMuted, + }, + transcript: { + ...TYPOGRAPHY.bodySmall, + color: colors.textSecondary, + lineHeight: 18, + }, +}); diff --git a/src/components/ChatMessage/index.tsx b/src/components/ChatMessage/index.tsx index d80310b7..417865ee 100644 --- a/src/components/ChatMessage/index.tsx +++ b/src/components/ChatMessage/index.tsx @@ -1,5 +1,8 @@ import React, { useState } from 'react'; import { View, Text, TouchableOpacity, Clipboard } from 'react-native'; +import { TTSButton } from '../TTSButton'; +import { AudioMessageBubble } from '../AudioMessageBubble'; +import { useTTSStore } from '../../stores/ttsStore'; import { useTheme, useThemedStyles } from '../../theme'; import Icon from 'react-native-vector-icons/Feather'; import { stripControlTokens } from '../../utils/messageContent'; @@ -141,6 +144,9 @@ const MessageMetaRow: React.FC = ({ message, styles, isStreaming, {message.generationTimeMs != null && message.role === 'assistant' && ( {formatDuration(message.generationTimeMs)} )} + {message.role === 'assistant' && !isStreaming && ( + + )} {showActions && !isStreaming && ( ••• @@ -184,6 +190,7 @@ export const ChatMessage: React.FC = ({ }) => { const { colors } = useTheme(); const styles = useThemedStyles(createStyles); + const ttsInterfaceMode = useTTSStore((s) => s.settings.interfaceMode); const [showActionMenu, setShowActionMenu] = useState(false); const [isEditing, setIsEditing] = useState(false); const [editedContent, setEditedContent] = useState(message.content); @@ -242,6 +249,26 @@ export const ChatMessage: React.FC = ({ setShowActionMenu(false); }; + // Audio Mode: assistant messages render as waveform bubbles + if ( + message.role === 'assistant' && + ttsInterfaceMode === 'audio' && + !message.isSystemInfo && + !message.toolCalls?.length + ) { + const bubble = ( + + ); + return animateEntry ? {bubble} : bubble; + } + if (message.isSystemInfo) { return setAlertState(hideAlert())} />; diff --git a/src/components/TTSButton/index.tsx b/src/components/TTSButton/index.tsx new file mode 100644 index 00000000..289e2eb4 --- /dev/null +++ b/src/components/TTSButton/index.tsx @@ -0,0 +1,106 @@ +import React, { useEffect } from 'react'; +import { TouchableOpacity, ActivityIndicator, StyleSheet } from 'react-native'; +import Animated, { + useSharedValue, + useAnimatedStyle, + withRepeat, + withSequence, + withTiming, +} from 'react-native-reanimated'; +import Icon from 'react-native-vector-icons/Feather'; +import { useTheme } from '../../theme'; +import { useTTSStore } from '../../stores/ttsStore'; +import { SPACING } from '../../constants'; + +interface TTSButtonProps { + text: string; + messageId: string; +} + +export const TTSButton: React.FC = ({ text, messageId }) => { + const { colors } = useTheme(); + const { + speak, + stop, + isSpeaking, + isModelLoading, + isModelLoaded, + currentMessageId, + settings, + isBackboneDownloaded, + isVocoderDownloaded, + loadModels, + } = useTTSStore(); + + const areBothDownloaded = isBackboneDownloaded && isVocoderDownloaded; + const isThisMessageSpeaking = isSpeaking && currentMessageId === messageId; + + const opacity = useSharedValue(1); + useEffect(() => { + if (isThisMessageSpeaking) { + opacity.value = withRepeat( + withSequence( + withTiming(0.4, { duration: 600 }), + withTiming(1, { duration: 600 }), + ), + -1, + false, + ); + } else { + opacity.value = withTiming(1, { duration: 200 }); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [isThisMessageSpeaking]); + + const animatedStyle = useAnimatedStyle(() => ({ opacity: opacity.value })); + + // Don't render in Audio Mode, or if TTS disabled / not downloaded + if ( + settings.interfaceMode === 'audio' || + !settings.enabled || + !areBothDownloaded + ) { + return null; + } + + if (isModelLoading && currentMessageId === messageId) { + return ; + } + + const handlePress = () => { + if (isThisMessageSpeaking) { + stop(); + return; + } + if (!isModelLoaded) { + loadModels().then(() => { + useTTSStore.getState().speak(text, messageId); + }); + return; + } + speak(text, messageId); + }; + + return ( + + + + + + ); +}; + +const styles = StyleSheet.create({ + button: { + padding: SPACING.xs, + }, +}); diff --git a/src/constants/ttsModels.ts b/src/constants/ttsModels.ts new file mode 100644 index 00000000..f93dfe85 --- /dev/null +++ b/src/constants/ttsModels.ts @@ -0,0 +1,25 @@ +export const TTS_BACKBONE_MODEL = { + id: 'outetts-0.3-500m-q4', + name: 'OuteTTS 0.3', + backboneFile: 'OuteTTS-0.3-500M-Q4_K_M.gguf', + backboneUrl: + 'https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf', + backboneSizeMB: 454, + vocoderFile: 'WavTokenizer-Large-75-Q5_1.gguf', + vocoderUrl: + 'https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf', + vocoderSizeMB: 73, + sampleRate: 24000, + description: 'Natural-sounding on-device speech. Requires ~530 MB storage.', +}; + +export const TTS_SPEAKER_PROFILES = [ + { id: '0', label: 'Default' }, +]; + +/** Warn user if device RAM is below this threshold */ +export const TTS_WARN_RAM_GB = 8; +/** Hard-block TTS on devices below this threshold */ +export const TTS_BLOCK_RAM_GB = 6; +/** Max cached audio messages per conversation before eviction */ +export const AUDIO_CACHE_MAX_MESSAGES = 50; diff --git a/src/hooks/useTTS.ts b/src/hooks/useTTS.ts new file mode 100644 index 00000000..5ad948a3 --- /dev/null +++ b/src/hooks/useTTS.ts @@ -0,0 +1,48 @@ +import { useEffect, useCallback } from 'react'; +import { useTTSStore } from '../stores/ttsStore'; +import { hardwareService } from '../services/hardware'; +import { TTS_BLOCK_RAM_GB, TTS_WARN_RAM_GB } from '../constants/ttsModels'; + +export function useTTS() { + const store = useTTSStore(); + + useEffect(() => { + store.checkDownloadStatus(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + const canRunOnDevice = useCallback((): { allowed: boolean; warning: boolean } => { + const ramGB = hardwareService.getTotalMemoryGB(); + return { + allowed: ramGB >= TTS_BLOCK_RAM_GB, + warning: ramGB < TTS_WARN_RAM_GB, + }; + }, []); + + const speakMessage = useCallback( + (text: string, messageId: string) => { + if (!store.isModelLoaded && store.isBackboneDownloaded && store.isVocoderDownloaded) { + store.loadModels().then(() => store.speak(text, messageId)); + return; + } + store.speak(text, messageId); + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [store.isModelLoaded, store.isBackboneDownloaded, store.isVocoderDownloaded], + ); + + const areBothDownloaded = store.isBackboneDownloaded && store.isVocoderDownloaded; + + return { + ...store, + speakMessage, + canRunOnDevice, + areBothDownloaded, + isDownloading: store.isDownloadingBackbone || store.isDownloadingVocoder, + // weighted by file size (454 MB backbone, 73 MB vocoder → 86% / 14%) + overallDownloadProgress: + store.backboneDownloadProgress * 0.86 + store.vocoderDownloadProgress * 0.14, + isAudioMode: store.settings.interfaceMode === 'audio', + isChatMode: store.settings.interfaceMode === 'chat', + }; +} diff --git a/src/navigation/AppNavigator.tsx b/src/navigation/AppNavigator.tsx index 1d15b73a..517357a2 100644 --- a/src/navigation/AppNavigator.tsx +++ b/src/navigation/AppNavigator.tsx @@ -32,6 +32,7 @@ import { DownloadManagerScreen, ModelSettingsScreen, VoiceSettingsScreen, + TTSSettingsScreen, DeviceInfoScreen, StorageSettingsScreen, SecuritySettingsScreen, @@ -229,6 +230,7 @@ export const AppNavigator: React.FC = () => { + diff --git a/src/navigation/types.ts b/src/navigation/types.ts index e5326a80..cdde39c4 100644 --- a/src/navigation/types.ts +++ b/src/navigation/types.ts @@ -14,6 +14,7 @@ export type RootStackParamList = { ModelSettings: undefined; RemoteServers: undefined; VoiceSettings: undefined; + TTSSettings: undefined; DeviceInfo: undefined; StorageSettings: undefined; SecuritySettings: undefined; diff --git a/src/screens/SettingsScreen.tsx b/src/screens/SettingsScreen.tsx index f1cd721a..353c9b23 100644 --- a/src/screens/SettingsScreen.tsx +++ b/src/screens/SettingsScreen.tsx @@ -151,6 +151,7 @@ export const SettingsScreen: React.FC = () => { { icon: 'wifi', title: 'Remote Servers', desc: 'Connect to Ollama, LM Studio, and more', screen: 'RemoteServers' as const }, // { icon: 'search', title: 'Web Search', desc: 'Configure search API key for reliable results', screen: 'WebSearchSettings' as const }, { icon: 'mic', title: 'Voice Transcription', desc: 'On-device speech to text', screen: 'VoiceSettings' as const }, + { icon: 'volume-2', title: 'Text to Speech', desc: 'On-device voice responses', screen: 'TTSSettings' as const }, { icon: 'lock', title: 'Security', desc: 'Passphrase and app lock', screen: 'SecuritySettings' as const }, { icon: 'smartphone', title: 'Device Information', desc: 'Hardware and compatibility', screen: 'DeviceInfo' as const }, { icon: 'hard-drive', title: 'Storage', desc: 'Models and data usage', screen: 'StorageSettings' as const }, diff --git a/src/screens/TTSSettingsScreen/index.tsx b/src/screens/TTSSettingsScreen/index.tsx new file mode 100644 index 00000000..54e9a9f6 --- /dev/null +++ b/src/screens/TTSSettingsScreen/index.tsx @@ -0,0 +1,349 @@ +import React, { useEffect, useState } from 'react'; +import { View, Text, ScrollView, TouchableOpacity, Switch, ActivityIndicator } from 'react-native'; +import { SafeAreaView } from 'react-native-safe-area-context'; +import Slider from '@react-native-community/slider'; +import Icon from 'react-native-vector-icons/Feather'; +import { useNavigation } from '@react-navigation/native'; +import { Card, Button } from '../../components'; +import { CustomAlert, showAlert, hideAlert, AlertState, initialAlertState } from '../../components/CustomAlert'; +import { useTheme, useThemedStyles } from '../../theme'; +import type { ThemeColors, ThemeShadows } from '../../theme'; +import { TYPOGRAPHY, SPACING } from '../../constants'; +import { useTTSStore } from '../../stores/ttsStore'; +import { hardwareService } from '../../services/hardware'; +import { TTS_BACKBONE_MODEL, TTS_WARN_RAM_GB, TTS_BLOCK_RAM_GB } from '../../constants/ttsModels'; +import type { InterfaceMode } from '../../stores/ttsStore'; + +// ─── Sub-components ─────────────────────────────────────────────────────────── + +type Styles = ReturnType; + +const ProgressRow: React.FC<{ + label: string; + sizeMB: number; + downloaded: boolean; + downloading: boolean; + progress: number; + styles: Styles; + colors: ThemeColors; + border?: boolean; +}> = ({ label, sizeMB, downloaded, downloading, progress, styles, colors, border }) => ( + + + + {label} + {sizeMB} MB + + {downloaded && } + {downloading && {Math.round(progress * 100)}%} + {!downloaded && !downloading && } + + {downloading && ( + + + + )} + +); + +const InterfaceModeCard: React.FC<{ + mode: InterfaceMode; + deviceBlocked: boolean; + areBothDownloaded: boolean; + onModeChange: (m: InterfaceMode) => void; + styles: Styles; +}> = ({ mode, deviceBlocked, areBothDownloaded, onModeChange, styles }) => ( + + Interface Mode + + Audio Mode renders responses as voice notes. Chat Mode adds a play button to text bubbles. + + + {(['chat', 'audio'] as InterfaceMode[]).map((m) => { + const active = mode === m; + const blocked = m === 'audio' && (deviceBlocked || !areBothDownloaded); + return ( + onModeChange(m)} + disabled={blocked} + > + + {m === 'chat' ? 'Chat' : 'Audio'} + + + ); + })} + + {!areBothDownloaded && ( + Download models below to enable Audio Mode. + )} + +); + +const PlaybackCard: React.FC<{ + settings: ReturnType['settings']; + onUpdate: (patch: Partial['settings']>) => void; + colors: ThemeColors; + styles: Styles; +}> = ({ settings, onUpdate, colors, styles }) => ( + + Playback + + Speed + {settings.speed.toFixed(1)}x + + + 0.5x + 1x + 2x + + onUpdate({ speed: parseFloat(v.toFixed(1)) })} + minimumTrackTintColor={colors.primary} + maximumTrackTintColor={colors.border} + thumbTintColor={colors.primary} + /> + {settings.interfaceMode === 'chat' && ( + + + Auto-play + Speak AI responses automatically + + onUpdate({ autoPlay: v })} + trackColor={{ true: colors.primary }} + /> + + )} + +); + +const CompatibilityCard: React.FC<{ + ramGB: number; + deviceBlocked: boolean; + deviceWarning: boolean; + styles: Styles; + colors: ThemeColors; +}> = ({ ramGB, deviceBlocked, deviceWarning, styles, colors }) => { + if (!deviceWarning && !deviceBlocked) { return null; } + return ( + + + + + {deviceBlocked + ? `TTS requires at least ${TTS_BLOCK_RAM_GB} GB RAM. Your device has ${ramGB.toFixed(1)} GB.` + : `Your device (${ramGB.toFixed(1)} GB RAM) may run TTS but performance could be slow. 8 GB recommended.`} + + + + ); +}; + +// ─── Main screen ────────────────────────────────────────────────────────────── + +export const TTSSettingsScreen: React.FC = () => { + const navigation = useNavigation(); + const { colors } = useTheme(); + const styles = useThemedStyles(createStyles); + const [alertState, setAlertState] = useState(initialAlertState); + const [ramGB, setRamGB] = useState(8); + + const { + isBackboneDownloaded, isVocoderDownloaded, + isDownloadingBackbone, isDownloadingVocoder, + backboneDownloadProgress, vocoderDownloadProgress, + isModelLoaded, isModelLoading, + audioCacheSizeMB, settings, error, + downloadModels, deleteModels, loadModels, unloadModels, + refreshCacheSize, clearAudioCache, updateSettings, clearError, + } = useTTSStore(); + + useEffect(() => { + setRamGB(hardwareService.getTotalMemoryGB()); + refreshCacheSize(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + const areBothDownloaded = isBackboneDownloaded && isVocoderDownloaded; + const isDownloading = isDownloadingBackbone || isDownloadingVocoder; + const deviceBlocked = ramGB < TTS_BLOCK_RAM_GB; + const deviceWarning = !deviceBlocked && ramGB < TTS_WARN_RAM_GB; + const totalSizeMB = TTS_BACKBONE_MODEL.backboneSizeMB + TTS_BACKBONE_MODEL.vocoderSizeMB; + + const handleDelete = () => { + setAlertState( + showAlert('Remove TTS Models', 'This will delete both model files and disable text-to-speech.', [ + { text: 'Cancel', style: 'cancel' }, + { text: 'Remove', style: 'destructive', onPress: () => { setAlertState(hideAlert()); deleteModels(); } }, + ]), + ); + }; + + const handleClearCache = () => { + setAlertState( + showAlert('Clear Audio Cache', `This will delete ${audioCacheSizeMB.toFixed(1)} MB of cached audio.`, [ + { text: 'Cancel', style: 'cancel' }, + { text: 'Clear', style: 'destructive', onPress: () => { setAlertState(hideAlert()); clearAudioCache(); } }, + ]), + ); + }; + + const handleModeChange = (mode: InterfaceMode) => { + if (mode === 'audio' && deviceBlocked) { return; } + updateSettings({ interfaceMode: mode }); + if (mode === 'audio' && !isModelLoaded && areBothDownloaded) { loadModels(); } + if (mode === 'chat' && isModelLoaded) { unloadModels(); } + }; + + return ( + + + navigation.goBack()}> + + + Text to Speech + {isModelLoading && } + + + + + + + {settings.interfaceMode === 'chat' && ( + + + + Enable TTS + Show play buttons on assistant messages + + updateSettings({ enabled: v })} trackColor={{ true: colors.primary }} /> + + + )} + + + Models ({totalSizeMB} MB total) + + + + {areBothDownloaded + ?