diff --git a/App.tsx b/App.tsx
index ac8cee15..1020942d 100644
--- a/App.tsx
+++ b/App.tsx
@@ -14,6 +14,15 @@ import { useTheme } from './src/theme';
 import { hardwareService, modelManager, authService, ragService, remoteServerManager } from './src/services';
 import logger from './src/utils/logger';
 import { useAppStore, useAuthStore, useRemoteServerStore } from './src/stores';
+import { useTTSStore } from './src/stores/ttsStore';
+import { initExecutorch } from 'react-native-executorch';
+import { BareResourceFetcher } from 'react-native-executorch-bare-resource-fetcher';
+import { KokoroTTSManager } from './src/components/KokoroTTSManager';
+import { isExecutorchSupported } from './src/constants/kokoroModels';
+
+// Initialise executorch resource fetcher once at module load time.
+// This must run before any useTextToSpeech hook is mounted.
+initExecutorch({ resourceFetcher: BareResourceFetcher });
 import { LockScreen } from './src/screens';
 import { useAppState } from './src/hooks/useAppState';
 
@@ -191,6 +200,9 @@ function App() {
       // Initialize RAG database tables
       ragService.ensureReady().catch((err) => logger.error('Failed to initialize RAG service on startup', err));
 
+      // Sync TTS download state so TTSButton / audio mode know models are available
+      useTTSStore.getState().checkDownloadStatus().catch(() => {});
+
       // Show the UI immediately
       setIsInitializing(false);
 
@@ -235,6 +247,7 @@ function App() {
     <GestureHandlerRootView style={styles.flex}>
       <SafeAreaProvider>
         <StatusBar barStyle={isDark ? 'light-content' : 'dark-content'} backgroundColor={colors.background} />
+        {isExecutorchSupported() && <KokoroTTSManager />}
         <NavigationContainer
           theme={{
             dark: isDark,
diff --git a/__tests__/integration/stores/tts.test.ts b/__tests__/integration/stores/tts.test.ts
new file mode 100644
index 00000000..5a84f400
--- /dev/null
+++ b/__tests__/integration/stores/tts.test.ts
@@ -0,0 +1,195 @@
+/**
+ * TTS Integration Tests
+ *
+ * Tests the wiring between ttsStore and ttsService:
+ * - Chat Mode full flow: download → load → speak → stop
+ * - Audio Mode full flow: download → load → generateAndSave → playMessage → stop
+ * - Auto-play triggering in Chat Mode
+ * - Mode switching
+ */
+
+jest.mock('../../../src/services/ttsService', () => ({
+  ttsService: {
+    isBackboneDownloaded: jest.fn(),
+    isVocoderDownloaded: jest.fn(),
+    downloadBackbone: jest.fn(),
+    downloadVocoder: jest.fn(),
+    deleteModels: jest.fn(),
+    loadModels: jest.fn(),
+    unloadModels: jest.fn(),
+    speak: jest.fn(),
+    stop: jest.fn(),
+    generateAndSave: jest.fn(),
+    playFromFile: jest.fn(),
+    getAudioCacheSizeMB: jest.fn(),
+    clearAudioCache: jest.fn(),
+  },
+}));
+
+jest.mock('../../../src/utils/logger', () => ({
+  __esModule: true,
+  default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+
+import { useTTSStore } from '../../../src/stores/ttsStore';
+import { ttsService } from '../../../src/services/ttsService';
+
+const mockTTS = ttsService as jest.Mocked<typeof ttsService>;
+const getState = () => useTTSStore.getState();
+
+const resetStore = () => {
+  useTTSStore.setState({
+    isBackboneDownloaded: false,
+    isVocoderDownloaded: false,
+    isDownloadingBackbone: false,
+    isDownloadingVocoder: false,
+    backboneDownloadProgress: 0,
+    vocoderDownloadProgress: 0,
+    isModelLoading: false,
+    isModelLoaded: false,
+    isSpeaking: false,
+    currentMessageId: null,
+    audioCacheSizeMB: 0,
+    settings: { interfaceMode: 'chat', enabled: true, autoPlay: false, speed: 1.0, voiceId: '0', kokoroVoiceId: 'af_heart' },
+    error: null,
+  });
+};
+
+describe('TTS integration', () => {
+  beforeEach(() => {
+    resetStore();
+    jest.clearAllMocks();
+    mockTTS.getAudioCacheSizeMB.mockResolvedValue(0);
+  });
+
+  // ─── Chat Mode ────────────────────────────────────────────────────────────
+
+  describe('Chat Mode: download → load → speak → stop', () => {
+    it('completes the full Chat Mode flow', async () => {
+      // 1. Download
+      mockTTS.downloadBackbone.mockResolvedValue('/bb.gguf');
+      mockTTS.downloadVocoder.mockResolvedValue('/voc.gguf');
+      await getState().downloadModels();
+
+      expect(getState().isBackboneDownloaded).toBe(true);
+      expect(getState().isVocoderDownloaded).toBe(true);
+
+      // 2. Load
+      mockTTS.loadModels.mockResolvedValue(undefined);
+      await getState().loadModels();
+      expect(getState().isModelLoaded).toBe(true);
+
+      // 3. Speak
+      mockTTS.speak.mockResolvedValue(undefined);
+      mockTTS.stop.mockReturnValue(undefined);
+
+      const speakPromise = getState().speak('hello', 'msg1');
+      expect(getState().isSpeaking).toBe(true);
+      expect(getState().currentMessageId).toBe('msg1');
+
+      await speakPromise;
+      expect(getState().isSpeaking).toBe(false);
+      expect(getState().currentMessageId).toBeNull();
+
+      // 4. Stop mid-speech
+      mockTTS.speak.mockImplementation(
+        () => new Promise((resolve) => setTimeout(resolve, 1000)),
+      );
+      getState().speak('second', 'msg2');
+      getState().stop();
+      expect(getState().isSpeaking).toBe(false);
+    });
+  });
+
+  // ─── Audio Mode ───────────────────────────────────────────────────────────
+
+  describe('Audio Mode: download → load → generateAndSave → playMessage → stop', () => {
+    beforeEach(() => {
+      useTTSStore.setState({
+        settings: { interfaceMode: 'audio', enabled: true, autoPlay: false, speed: 1.0, voiceId: '0', kokoroVoiceId: 'af_heart' },
+      });
+    });
+
+    it('completes the full Audio Mode flow', async () => {
+      // 1. Download
+      mockTTS.downloadBackbone.mockResolvedValue('/bb.gguf');
+      mockTTS.downloadVocoder.mockResolvedValue('/voc.gguf');
+      await getState().downloadModels();
+
+      // 2. Load
+      mockTTS.loadModels.mockResolvedValue(undefined);
+      await getState().loadModels();
+      expect(getState().isModelLoaded).toBe(true);
+
+      // 3. GenerateAndSave
+      const mockAudio = {
+        samples: new Float32Array(100),
+        durationSeconds: 1.5,
+        sampleRate: 24000,
+        waveformData: new Array(200).fill(0.2),
+      };
+      mockTTS.generateAndSave.mockResolvedValue({ path: '/cache/c1/m1.pcm', audio: mockAudio } as any);
+      mockTTS.getAudioCacheSizeMB.mockResolvedValue(1.5);
+
+      const result = await getState().generateAndSave('hello audio', 'conv1', 'msg1');
+
+      expect(result.path).toBe('/cache/c1/m1.pcm');
+      expect(result.waveformData).toHaveLength(200);
+      expect(result.durationSeconds).toBe(1.5);
+      expect(getState().audioCacheSizeMB).toBeCloseTo(1.5);
+
+      // 4. PlayMessage
+      mockTTS.playFromFile.mockResolvedValue(undefined);
+      mockTTS.stop.mockReturnValue(undefined);
+
+      const playPromise = getState().playMessage('msg1', '/cache/c1/m1.pcm');
+      expect(getState().isSpeaking).toBe(true);
+      expect(getState().currentMessageId).toBe('msg1');
+
+      await playPromise;
+      expect(getState().isSpeaking).toBe(false);
+
+      // 5. StopPlayback
+      getState().stopPlayback();
+      expect(mockTTS.stop).toHaveBeenCalled();
+    });
+  });
+
+  // ─── Mode switching ───────────────────────────────────────────────────────
+
+  describe('mode switching', () => {
+    it('switching interfaceMode to audio takes effect immediately', () => {
+      expect(getState().settings.interfaceMode).toBe('chat');
+      getState().updateSettings({ interfaceMode: 'audio' });
+      expect(getState().settings.interfaceMode).toBe('audio');
+    });
+
+    it('switching back to chat mode works', () => {
+      getState().updateSettings({ interfaceMode: 'audio' });
+      getState().updateSettings({ interfaceMode: 'chat' });
+      expect(getState().settings.interfaceMode).toBe('chat');
+    });
+  });
+
+  // ─── Auto-play ────────────────────────────────────────────────────────────
+
+  describe('auto-play', () => {
+    it('speak is called when autoPlay is true and model is loaded', async () => {
+      useTTSStore.setState({
+        isModelLoaded: true,
+        settings: { interfaceMode: 'chat', enabled: true, autoPlay: true, speed: 1.0, voiceId: '0', kokoroVoiceId: 'af_heart' },
+      });
+      mockTTS.speak.mockResolvedValue(undefined);
+      mockTTS.stop.mockReturnValue(undefined);
+
+      // Simulate chat completion triggering speak
+      await getState().speak('AI response text', 'last-msg-id');
+
+      expect(mockTTS.speak).toHaveBeenCalledWith(
+        'AI response text',
+        expect.objectContaining({ voiceId: '0', speed: 1.0 }),
+        expect.any(Function),
+      );
+    });
+  });
+});
diff --git a/__tests__/rntl/components/ChatInput.test.tsx b/__tests__/rntl/components/ChatInput.test.tsx
index 617430ab..303297d3 100644
--- a/__tests__/rntl/components/ChatInput.test.tsx
+++ b/__tests__/rntl/components/ChatInput.test.tsx
@@ -51,10 +51,20 @@ jest.mock('../../../src/services/documentService', () => ({
 // Mock the stores
 const mockUseWhisperStore = jest.fn();
 const mockUseAppStore = jest.fn();
+const mockUseTTSStore = jest.fn(() => ({
+  settings: { interfaceMode: 'chat', enabled: false, speed: 1.0 },
+  isBackboneDownloaded: false,
+  isVocoderDownloaded: false,
+  isModelLoaded: false,
+  loadModels: jest.fn(),
+  unloadModels: jest.fn(),
+  updateSettings: jest.fn(),
+}));
 
 jest.mock('../../../src/stores', () => ({
   useWhisperStore: () => mockUseWhisperStore(),
   useAppStore: () => mockUseAppStore(),
+  useTTSStore: () => mockUseTTSStore(),
 }));
 
 // Mock the whisper hook
diff --git a/__tests__/rntl/components/GenerationSettingsModal.test.tsx b/__tests__/rntl/components/GenerationSettingsModal.test.tsx
index a9ef4647..ed7272b1 100644
--- a/__tests__/rntl/components/GenerationSettingsModal.test.tsx
+++ b/__tests__/rntl/components/GenerationSettingsModal.test.tsx
@@ -859,13 +859,13 @@ describe('GenerationSettingsModal', () => {
   });
 
   it('calls handleSliderComplete on text generation slider (no-op)', () => {
-    const { getByText, getAllByTestId } = render(
+    const { getByText, queryAllByTestId } = render(
       <GenerationSettingsModal {...defaultProps} />,
     );
 
     fireEvent.press(getByText('TEXT GENERATION'));
 
-    const sliders = getAllByTestId('slider');
+    const sliders = queryAllByTestId('slider');
     // onSlidingComplete is a no-op but should not throw
     if (sliders.length > 0 && sliders[0].props.onSlidingComplete) {
       expect(() => sliders[0].props.onSlidingComplete(0.5)).not.toThrow();
@@ -873,13 +873,13 @@ describe('GenerationSettingsModal', () => {
   });
 
   it('calls handleSliderChange on text slider value change', () => {
-    const { getByText, getAllByTestId } = render(
+    const { getByText, queryAllByTestId } = render(
       <GenerationSettingsModal {...defaultProps} />,
     );
 
     fireEvent.press(getByText('TEXT GENERATION'));
 
-    const sliders = getAllByTestId('slider');
+    const sliders = queryAllByTestId('slider');
     if (sliders.length > 0 && sliders[0].props.onValueChange) {
       sliders[0].props.onValueChange(0.5);
       expect(mockUpdateSettings).toHaveBeenCalled();
@@ -1070,17 +1070,16 @@ describe('GenerationSettingsModal', () => {
         expect(mockUpdateSettings).toHaveBeenCalledWith({ enableGpu: true, cacheType: 'f16' });
       });
 
-      it('calls updateSettings with gpuLayers value from GPU layers slider', () => {
+      it('calls updateSettings with gpuLayers value from GPU layers stepper', () => {
         mockStoreValues.settings = { ...defaultSettings, enableGpu: true, gpuLayers: 6, flashAttn: false };
         const { getByText, getByTestId } = render(<GenerationSettingsModal {...defaultProps} />);
         fireEvent.press(getByText('TEXT GENERATION'));
         fireEvent.press(getByTestId('modal-text-advanced-toggle'));
         mockUpdateSettings.mockClear();
 
-        const slider = getByTestId('gpu-layers-slider');
-        slider.props.onSlidingComplete(12);
+        fireEvent.press(getByTestId('gpu-layers-stepper-increment'));
 
-        expect(mockUpdateSettings).toHaveBeenCalledWith({ gpuLayers: 12 });
+        expect(mockUpdateSettings).toHaveBeenCalledWith({ gpuLayers: 7 });
       });
     });
   });
diff --git a/__tests__/rntl/components/VoiceRecordButton.test.tsx b/__tests__/rntl/components/VoiceRecordButton.test.tsx
index b92c45a3..84899278 100644
--- a/__tests__/rntl/components/VoiceRecordButton.test.tsx
+++ b/__tests__/rntl/components/VoiceRecordButton.test.tsx
@@ -87,16 +87,17 @@ describe('VoiceRecordButton', () => {
     });
 
     it('shows recording indicator when isRecording is true', () => {
-      const { getByText } = render(
+      const { toJSON } = render(
         <VoiceRecordButton {...defaultProps} isRecording={true} />
       );
 
-      // When recording, "Slide to cancel" text appears in the cancel hint
-      expect(getByText('Slide to cancel')).toBeTruthy();
+      // In audio mode (default, !asSendButton), recording shows a stop icon (square)
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).toContain('square');
     });
 
     it('shows transcribing state when isTranscribing is true', () => {
-      const { getByText } = render(
+      const { toJSON } = render(
         <VoiceRecordButton
           {...defaultProps}
           isTranscribing={true}
@@ -104,14 +105,15 @@ describe('VoiceRecordButton', () => {
         />
       );
 
-      // Transcribing state shows "Transcribing..." text
-      expect(getByText('Transcribing...')).toBeTruthy();
+      // Transcribing state renders a spinning indicator (no text in audio mode)
+      expect(toJSON()).toBeTruthy();
     });
 
-    it('shows partial result text when provided', () => {
+    it('shows partial result text when provided in chat mode (asSendButton)', () => {
       const { getByText } = render(
         <VoiceRecordButton
           {...defaultProps}
+          asSendButton={true}
           isRecording={true}
           partialResult="Hello world"
         />
@@ -166,7 +168,7 @@ describe('VoiceRecordButton', () => {
       expect(toJSON()).toBeTruthy();
     });
 
-    it('taps unavailable button and triggers alert with error message', () => {
+    it('taps unavailable button and triggers download prompt alert', () => {
       const { UNSAFE_getAllByType } = render(
         <VoiceRecordButton
           {...defaultProps}
@@ -181,13 +183,13 @@ describe('VoiceRecordButton', () => {
       fireEvent.press(touchables[0]);
 
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Voice Input Unavailable',
-        expect.stringContaining('Microphone permission denied'),
+        'Download Voice Model',
+        expect.stringContaining('Download Whisper Small'),
         expect.any(Array)
       );
     });
 
-    it('taps unavailable button with default error when no error prop', () => {
+    it('taps unavailable button shows download prompt with size', () => {
       const { UNSAFE_getAllByType } = render(
         <VoiceRecordButton
           {...defaultProps}
@@ -200,13 +202,13 @@ describe('VoiceRecordButton', () => {
       fireEvent.press(touchables[0]);
 
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Voice Input Unavailable',
-        expect.stringContaining('No transcription model downloaded'),
+        'Download Voice Model',
+        expect.stringContaining('466 MB'),
         expect.any(Array)
       );
     });
 
-    it('alert message includes instructions for downloading model', () => {
+    it('alert message includes Download and Cancel buttons', () => {
       const { UNSAFE_getAllByType } = render(
         <VoiceRecordButton
           {...defaultProps}
@@ -219,9 +221,12 @@ describe('VoiceRecordButton', () => {
       fireEvent.press(touchables[0]);
 
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Voice Input Unavailable',
-        expect.stringContaining('Download a Whisper model'),
-        expect.any(Array)
+        'Download Voice Model',
+        expect.any(String),
+        expect.arrayContaining([
+          expect.objectContaining({ text: 'Cancel' }),
+          expect.objectContaining({ text: 'Download' }),
+        ])
       );
     });
   });
@@ -400,11 +405,13 @@ describe('VoiceRecordButton', () => {
     });
 
     it('does not show cancel hint when not recording', () => {
-      const { queryByText } = render(
+      const { toJSON } = render(
         <VoiceRecordButton {...defaultProps} isRecording={false} />
       );
 
-      expect(queryByText('Slide to cancel')).toBeNull();
+      // Audio mode (default) uses tap-to-toggle, no slide-to-cancel
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).not.toContain('Slide to cancel');
     });
 
     it('does not show partial result when partialResult is empty', () => {
@@ -418,12 +425,12 @@ describe('VoiceRecordButton', () => {
 
       // partialResult is empty, so the partial result container should not render
       const treeStr = JSON.stringify(toJSON());
-      // The cancel hint should still show
-      expect(treeStr).toContain('Slide to cancel');
+      // Audio mode uses tap-to-toggle with a stop icon
+      expect(treeStr).toContain('square');
     });
 
     it('shows recording UI elements but not transcribing when recording', () => {
-      const { getByText, queryByText } = render(
+      const { toJSON, queryByText } = render(
         <VoiceRecordButton
           {...defaultProps}
           isRecording={true}
@@ -433,7 +440,8 @@ describe('VoiceRecordButton', () => {
 
       // When isRecording is true AND isTranscribing is true,
       // the component shows recording UI (not transcribing state)
-      expect(getByText('Slide to cancel')).toBeTruthy();
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).toContain('square');
       expect(queryByText('Transcribing...')).toBeNull();
     });
 
@@ -446,7 +454,7 @@ describe('VoiceRecordButton', () => {
     });
 
     it('prioritizes model loading state over recording', () => {
-      const { getByText, queryByText } = render(
+      const { getByText, toJSON } = render(
         <VoiceRecordButton
           {...defaultProps}
           isModelLoading={true}
@@ -455,11 +463,13 @@ describe('VoiceRecordButton', () => {
       );
 
       expect(getByText('Loading...')).toBeTruthy();
-      expect(queryByText('Slide to cancel')).toBeNull();
+      // Recording UI should not render when loading
+      const treeStr = JSON.stringify(toJSON());
+      expect(treeStr).not.toContain('square');
     });
 
     it('prioritizes model loading state over transcribing', () => {
-      const { getByText, queryByText } = render(
+      const { getByText, toJSON } = render(
         <VoiceRecordButton
           {...defaultProps}
           isModelLoading={true}
@@ -468,7 +478,8 @@ describe('VoiceRecordButton', () => {
       );
 
       expect(getByText('Loading...')).toBeTruthy();
-      expect(queryByText('Transcribing...')).toBeNull();
+      // Transcribing state should not render when loading
+      expect(toJSON()).toBeTruthy();
     });
   });
 });
diff --git a/__tests__/rntl/screens/DownloadManagerScreen.test.tsx b/__tests__/rntl/screens/DownloadManagerScreen.test.tsx
index 2a976dfd..255ab5e6 100644
--- a/__tests__/rntl/screens/DownloadManagerScreen.test.tsx
+++ b/__tests__/rntl/screens/DownloadManagerScreen.test.tsx
@@ -212,20 +212,23 @@ describe('DownloadManagerScreen', () => {
   });
 
   it('shows empty state when no downloads', () => {
-    const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('No active downloads')).toBeTruthy();
+    const { getByText, queryByText } = render(<DownloadManagerScreen />);
+    // Active Downloads section is hidden when there are no active items
+    expect(queryByText('Active Downloads')).toBeNull();
     expect(getByText('No models downloaded yet')).toBeTruthy();
   });
 
   it('shows section headers for active and completed', () => {
-    const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('Active Downloads')).toBeTruthy();
+    const { getByText, queryByText } = render(<DownloadManagerScreen />);
+    // Active Downloads section is hidden when empty
+    expect(queryByText('Active Downloads')).toBeNull();
+    // Downloaded Models section is always shown
     expect(getByText('Downloaded Models')).toBeTruthy();
   });
 
   it('shows empty subtext when no models downloaded', () => {
     const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('Go to the Models tab to browse and download models')).toBeTruthy();
+    expect(getByText('No models downloaded yet')).toBeTruthy();
   });
 
   it('renders completed text model with details', () => {
@@ -305,11 +308,12 @@ describe('DownloadManagerScreen', () => {
     expect(getByText(/Total storage used/)).toBeTruthy();
   });
 
-  it('shows count badges for active and completed sections', () => {
+  it('shows count badge for completed section', () => {
     setupSingleModelState();
 
     const { getByText } = render(<DownloadManagerScreen />);
-    expect(getByText('0')).toBeTruthy();
+    // Active section is hidden when empty (no "0" badge)
+    // Completed section shows count of 1
     expect(getByText('1')).toBeTruthy();
   });
 
@@ -344,7 +348,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     if (cancelButtons.length > 0) {
       fireEvent.press(cancelButtons[0]);
     }
@@ -820,8 +825,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType, getByTestId } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    // Press the cancel button (second touchable after back button)
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     fireEvent.press(cancelButtons[0]);
 
     // Press "Yes" to confirm
@@ -852,7 +857,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType, getByTestId } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     fireEvent.press(cancelButtons[0]);
 
     await act(async () => {
@@ -880,7 +886,8 @@ describe('DownloadManagerScreen', () => {
 
     const { UNSAFE_getAllByType, getByTestId } = render(<DownloadManagerScreen />);
     const touchables = UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     fireEvent.press(cancelButtons[0]);
 
     await act(async () => {
@@ -1029,7 +1036,8 @@ describe('DownloadManagerScreen', () => {
 
     // Find the cancel button for the RNFS download (which has no downloadId)
     const touchables = result.UNSAFE_getAllByType(TouchableOpacity);
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     if (cancelButtons.length > 0) {
       fireEvent.press(cancelButtons[0]);
 
@@ -1367,8 +1375,8 @@ describe('DownloadManagerScreen', () => {
 
     // Find and press cancel button on the active download
     const touchables = result.UNSAFE_getAllByType(TouchableOpacity);
-    // Find cancel buttons (skip back button)
-    const cancelButtons = touchables.filter((_: any, i: number) => i > 0);
+    // Skip back button (1) + filter chips (6) = 7 touchables before content
+    const cancelButtons = touchables.filter((_: any, i: number) => i > 6);
     if (cancelButtons.length > 0) {
       fireEvent.press(cancelButtons[0]);
 
diff --git a/__tests__/rntl/screens/ModelSettingsScreen.test.tsx b/__tests__/rntl/screens/ModelSettingsScreen.test.tsx
index 026ba7b1..455b376b 100644
--- a/__tests__/rntl/screens/ModelSettingsScreen.test.tsx
+++ b/__tests__/rntl/screens/ModelSettingsScreen.test.tsx
@@ -644,14 +644,13 @@ describe('ModelSettingsScreen', () => {
         expect(useAppStore.getState().settings.enableGpu).toBe(true);
       });
 
-      it('updates gpuLayers when GPU Layers slider completes', () => {
+      it('updates gpuLayers when GPU Layers stepper is incremented', () => {
         useAppStore.getState().updateSettings({ enableGpu: true, flashAttn: false, gpuLayers: 6 });
         const { getByTestId } = renderWithSections('text');
 
-        const slider = getByTestId('gpu-layers-slider');
-        fireEvent(slider, 'slidingComplete', 12);
+        fireEvent.press(getByTestId('gpu-layers-stepper-increment'));
 
-        expect(useAppStore.getState().settings.gpuLayers).toBe(12);
+        expect(useAppStore.getState().settings.gpuLayers).toBe(7);
       });
     });
   });
diff --git a/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx b/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx
index a055a2ad..7d459bde 100644
--- a/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx
+++ b/__tests__/rntl/screens/VoiceSettingsScreen.test.tsx
@@ -3,15 +3,15 @@
  *
  * Tests for the voice settings screen including:
  * - Title display
- * - Description text about Whisper
- * - Download options when no model
+ * - Privacy note text
+ * - English and Multilingual model sections
  * - Back button navigation
- * - Downloaded model state (name, status badge, remove button)
+ * - Active model state (name, badge, remove button)
  * - Download progress display
  * - Model download trigger
  * - Remove model confirmation alert
  * - Error display and clear
- * - Privacy card display
+ * - Search bar
  *
  * Priority: P1 (High)
  */
@@ -82,6 +82,7 @@ jest.mock('../../../src/components/Button', () => ({
 }));
 
 const mockDownloadModel = jest.fn();
+const mockDownloadFromUrl = jest.fn();
 const mockDeleteModel = jest.fn();
 const mockClearError = jest.fn();
 
@@ -90,6 +91,7 @@ let mockWhisperStoreValues: any = {
   isDownloading: false,
   downloadProgress: 0,
   downloadModel: mockDownloadModel,
+  downloadFromUrl: mockDownloadFromUrl,
   deleteModel: mockDeleteModel,
   error: null,
   clearError: mockClearError,
@@ -101,13 +103,24 @@ jest.mock('../../../src/stores', () => ({
 
 jest.mock('../../../src/services', () => ({
   WHISPER_MODELS: [
-    { id: 'tiny', name: 'Whisper Tiny', size: '75', description: 'Fastest, lower accuracy' },
-    { id: 'base', name: 'Whisper Base', size: '141', description: 'Good accuracy' },
-    { id: 'small', name: 'Whisper Small', size: '461', description: 'Better accuracy' },
-    { id: 'medium', name: 'Whisper Medium', size: '1500', description: 'Best accuracy' },
+    { id: 'tiny.en', name: 'Tiny', size: 75, lang: 'en', description: 'Fastest, English only' },
+    { id: 'base.en', name: 'Base', size: 142, lang: 'en', description: 'Better accuracy, English only' },
+    { id: 'small.en', name: 'Small', size: 466, lang: 'en', description: 'High accuracy, English only' },
+    { id: 'medium.en', name: 'Medium', size: 1500, lang: 'en', description: 'Near human-level, English only' },
+    { id: 'tiny', name: 'Tiny', size: 75, lang: 'multi', description: 'Fastest, 99 languages' },
+    { id: 'base', name: 'Base', size: 142, lang: 'multi', description: 'Better accuracy, 99 languages' },
+    { id: 'small', name: 'Small', size: 466, lang: 'multi', description: 'High accuracy, 99 languages' },
+    { id: 'medium', name: 'Medium', size: 1500, lang: 'multi', description: 'Near human-level, 99 languages' },
   ],
 }));
 
+jest.mock('../../../src/services/huggingface', () => ({
+  huggingFaceService: {
+    searchWhisperRepos: jest.fn().mockResolvedValue([]),
+    getWhisperFiles: jest.fn().mockResolvedValue([]),
+  },
+}));
+
 import { VoiceSettingsScreen } from '../../../src/screens/VoiceSettingsScreen';
 
 const mockGoBack = jest.fn();
@@ -134,6 +147,7 @@ describe('VoiceSettingsScreen', () => {
       isDownloading: false,
       downloadProgress: 0,
       downloadModel: mockDownloadModel,
+      downloadFromUrl: mockDownloadFromUrl,
       deleteModel: mockDeleteModel,
       error: null,
       clearError: mockClearError,
@@ -149,19 +163,16 @@ describe('VoiceSettingsScreen', () => {
       expect(getByText('Voice Transcription')).toBeTruthy();
     });
 
-    it('shows description text about Whisper', () => {
+    it('shows privacy note about on-device transcription', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
       expect(
-        getByText(/Download a Whisper model to enable on-device voice input/),
+        getByText(/All transcription runs on-device/),
       ).toBeTruthy();
     });
 
-    it('shows privacy card', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Privacy First')).toBeTruthy();
-      expect(
-        getByText(/Voice transcription happens entirely on your device/),
-      ).toBeTruthy();
+    it('shows search bar', () => {
+      const { getByPlaceholderText } = render(<VoiceSettingsScreen />);
+      expect(getByPlaceholderText('Search models or HuggingFace...')).toBeTruthy();
     });
 
     it('back button calls goBack', () => {
@@ -178,48 +189,46 @@ describe('VoiceSettingsScreen', () => {
   // No Model Downloaded - Download Options
   // ============================================================================
   describe('download options (no model)', () => {
-    it('shows download options when no model is downloaded', () => {
+    it('shows English model section', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Whisper Tiny')).toBeTruthy();
-      expect(getByText('Whisper Base')).toBeTruthy();
-      expect(getByText('Whisper Small')).toBeTruthy();
+      expect(getByText('ENGLISH ONLY')).toBeTruthy();
     });
 
-    it('shows only first 3 models (slice(0, 3))', () => {
-      const { queryByText } = render(<VoiceSettingsScreen />);
-      // 4th model (medium) should NOT be shown due to .slice(0, 3)
-      expect(queryByText('Whisper Medium')).toBeNull();
+    it('shows Multilingual model section', () => {
+      const { getByText } = render(<VoiceSettingsScreen />);
+      expect(getByText(/MULTILINGUAL/)).toBeTruthy();
     });
 
-    it('shows "Select a model to download" label', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Select a model to download:')).toBeTruthy();
+    it('shows model names in English section', () => {
+      const { getAllByText } = render(<VoiceSettingsScreen />);
+      // "Tiny" appears in both English and Multilingual sections
+      expect(getAllByText('Tiny').length).toBeGreaterThanOrEqual(1);
     });
 
-    it('shows model size for each option', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('75 MB')).toBeTruthy();
-      expect(getByText('141 MB')).toBeTruthy();
-      expect(getByText('461 MB')).toBeTruthy();
+    it('shows model size for options', () => {
+      const { getAllByText } = render(<VoiceSettingsScreen />);
+      // Sizes appear in both English and Multilingual sections
+      expect(getAllByText('75 MB').length).toBeGreaterThanOrEqual(1);
+      expect(getAllByText('142 MB').length).toBeGreaterThanOrEqual(1);
+      expect(getAllByText('466 MB').length).toBeGreaterThanOrEqual(1);
     });
 
-    it('shows model description for each option', () => {
+    it('shows model description for options', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Fastest, lower accuracy')).toBeTruthy();
-      expect(getByText('Good accuracy')).toBeTruthy();
-      expect(getByText('Better accuracy')).toBeTruthy();
+      expect(getByText('Fastest, English only')).toBeTruthy();
+      expect(getByText('Better accuracy, English only')).toBeTruthy();
     });
 
     it('calls downloadModel when a model option is pressed', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Whisper Base'));
-      expect(mockDownloadModel).toHaveBeenCalledWith('base');
+      const { getByTestId } = render(<VoiceSettingsScreen />);
+      fireEvent.press(getByTestId('model-download-base.en'));
+      expect(mockDownloadModel).toHaveBeenCalledWith('base.en');
     });
 
     it('calls downloadModel with correct id for tiny model', () => {
-      const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Whisper Tiny'));
-      expect(mockDownloadModel).toHaveBeenCalledWith('tiny');
+      const { getByTestId } = render(<VoiceSettingsScreen />);
+      fireEvent.press(getByTestId('model-download-tiny.en'));
+      expect(mockDownloadModel).toHaveBeenCalledWith('tiny.en');
     });
   });
 
@@ -230,28 +239,28 @@ describe('VoiceSettingsScreen', () => {
     beforeEach(() => {
       mockWhisperStoreValues = {
         ...mockWhisperStoreValues,
-        downloadedModelId: 'base',
+        downloadedModelId: 'base.en',
       };
     });
 
-    it('shows downloaded model name', () => {
+    it('shows active model section label', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Whisper Base')).toBeTruthy();
+      expect(getByText('ACTIVE MODEL')).toBeTruthy();
     });
 
-    it('shows "Downloaded" status badge', () => {
+    it('shows downloaded model name with language', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Downloaded')).toBeTruthy();
+      expect(getByText(/Base — English/)).toBeTruthy();
     });
 
-    it('shows "Remove Model" button', () => {
+    it('shows "Active" status badge', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Remove Model')).toBeTruthy();
+      expect(getByText('Active')).toBeTruthy();
     });
 
-    it('does not show download options when model is downloaded', () => {
-      const { queryByText } = render(<VoiceSettingsScreen />);
-      expect(queryByText('Select a model to download:')).toBeNull();
+    it('shows "Remove" button', () => {
+      const { getByText } = render(<VoiceSettingsScreen />);
+      expect(getByText('Remove')).toBeTruthy();
     });
 
     it('shows model id as fallback when model not found in WHISPER_MODELS', () => {
@@ -263,11 +272,11 @@ describe('VoiceSettingsScreen', () => {
       expect(getByText('unknown-model')).toBeTruthy();
     });
 
-    it('pressing Remove Model shows confirmation alert', () => {
+    it('pressing Remove shows confirmation alert', () => {
       const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Remove Model'));
+      fireEvent.press(getByText('Remove'));
       expect(mockShowAlert).toHaveBeenCalledWith(
-        'Remove Whisper Model',
+        'Remove Voice Model',
         'This will disable voice input until you download a model again.',
         expect.arrayContaining([
           expect.objectContaining({ text: 'Cancel', style: 'cancel' }),
@@ -294,11 +303,6 @@ describe('VoiceSettingsScreen', () => {
       expect(getByText('Downloading... 45%')).toBeTruthy();
     });
 
-    it('does not show download options during download', () => {
-      const { queryByText } = render(<VoiceSettingsScreen />);
-      expect(queryByText('Select a model to download:')).toBeNull();
-    });
-
     it('shows 0% at start of download', () => {
       mockWhisperStoreValues = {
         ...mockWhisperStoreValues,
@@ -334,13 +338,13 @@ describe('VoiceSettingsScreen', () => {
   // Error State
   // ============================================================================
   describe('error state', () => {
-    it('shows error message when whisperError is set', () => {
+    it('shows error message with tap to dismiss when whisperError is set', () => {
       mockWhisperStoreValues = {
         ...mockWhisperStoreValues,
         error: 'Download failed: network error',
       };
       const { getByText } = render(<VoiceSettingsScreen />);
-      expect(getByText('Download failed: network error')).toBeTruthy();
+      expect(getByText('Download failed: network error (tap to dismiss)')).toBeTruthy();
     });
 
     it('calls clearError when error is tapped', () => {
@@ -349,7 +353,7 @@ describe('VoiceSettingsScreen', () => {
         error: 'Download failed',
       };
       const { getByText } = render(<VoiceSettingsScreen />);
-      fireEvent.press(getByText('Download failed'));
+      fireEvent.press(getByText('Download failed (tap to dismiss)'));
       expect(mockClearError).toHaveBeenCalled();
     });
 
diff --git a/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts b/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts
index 0e37e3e3..727880ba 100644
--- a/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts
+++ b/__tests__/unit/hooks/useKeyboardAwarePopover.test.ts
@@ -126,12 +126,12 @@ describe('useKeyboardAwarePopover', () => {
       expect(mockKeyboardDismiss).not.toHaveBeenCalled();
     });
 
-    it('measures trigger position with custom offsetX', () => {
+    it('measures trigger position from button coords', () => {
       const mockMeasureInWindow = jest.fn((callback) => {
         callback(10, 100, 50, 30);
       });
 
-      const { result } = renderHook(() => useKeyboardAwarePopover(20));
+      const { result } = renderHook(() => useKeyboardAwarePopover());
 
       // Set up mock ref
       (result.current.triggerRef as any).current = {
@@ -143,9 +143,9 @@ describe('useKeyboardAwarePopover', () => {
       });
 
       expect(mockMeasureInWindow).toHaveBeenCalled();
-      // anchor.y = screenH - y = 800 - 100 = 700
-      // anchor.x = offsetX = 20
-      expect(result.current.anchor).toEqual({ y: 700, x: 20 });
+      // anchor.y = screenH - btnY = 800 - 100 = 700
+      // anchor.x = screenW - (btnX + btnW) = 400 - (10 + 50) = 340
+      expect(result.current.anchor).toEqual({ y: 700, x: 340 });
     });
 
     it('handles missing measureInWindow gracefully', () => {
@@ -175,7 +175,8 @@ describe('useKeyboardAwarePopover', () => {
       });
 
       // y = screenH - (undefined ?? 0) = 800 - 0 = 800
-      expect(result.current.anchor).toEqual({ y: 800, x: 12 }); // SPACING.md = 12
+      // x = screenW - (btnX + btnW) = 400 - (10 + 50) = 340
+      expect(result.current.anchor).toEqual({ y: 800, x: 340 });
     });
   });
 
@@ -361,8 +362,8 @@ describe('useKeyboardAwarePopover', () => {
     });
   });
 
-  describe('offsetX parameter', () => {
-    it('uses default SPACING.md when offsetX not provided', () => {
+  describe('button position measurement', () => {
+    it('computes anchorX as right-edge distance from screen right', () => {
       const mockMeasureInWindow = jest.fn((callback) => {
         callback(10, 100, 50, 30);
       });
@@ -377,16 +378,16 @@ describe('useKeyboardAwarePopover', () => {
         result.current.show();
       });
 
-      // SPACING.md = 12
-      expect(result.current.anchor.x).toBe(12);
+      // screenW=400, btnX=10, btnW=50 → x = 400 - (10+50) = 340
+      expect(result.current.anchor.x).toBe(340);
     });
 
-    it('uses custom offsetX when provided', () => {
+    it('computes anchorY as distance from button top to screen bottom', () => {
       const mockMeasureInWindow = jest.fn((callback) => {
         callback(10, 100, 50, 30);
       });
 
-      const { result } = renderHook(() => useKeyboardAwarePopover(50));
+      const { result } = renderHook(() => useKeyboardAwarePopover());
 
       (result.current.triggerRef as any).current = {
         measureInWindow: mockMeasureInWindow,
@@ -396,7 +397,8 @@ describe('useKeyboardAwarePopover', () => {
         result.current.show();
       });
 
-      expect(result.current.anchor.x).toBe(50);
+      // screenH=800, btnY=100 → y = 800 - 100 = 700
+      expect(result.current.anchor.y).toBe(700);
     });
   });
 });
\ No newline at end of file
diff --git a/__tests__/unit/services/ttsService.test.ts b/__tests__/unit/services/ttsService.test.ts
new file mode 100644
index 00000000..4e46d45b
--- /dev/null
+++ b/__tests__/unit/services/ttsService.test.ts
@@ -0,0 +1,302 @@
+/**
+ * TTS Service Unit Tests
+ *
+ * Tests for backbone/vocoder download, model lifecycle, audio generation,
+ * file persistence, and playback control.
+ * Priority: P1 - Core TTS functionality.
+ */
+
+jest.mock('llama.rn', () => ({
+  initLlama: jest.fn(),
+}));
+
+jest.mock('react-native-fs', () => ({
+  DocumentDirectoryPath: '/mock/docs',
+  exists: jest.fn(),
+  mkdir: jest.fn(),
+  unlink: jest.fn(),
+  downloadFile: jest.fn(),
+  writeFile: jest.fn(),
+  readFile: jest.fn(),
+  stat: jest.fn(),
+  readDir: jest.fn(),
+}));
+
+jest.mock('react-native-audio-api', () => ({
+  AudioContext: jest.fn().mockImplementation(() => ({
+    createBuffer: jest.fn().mockReturnValue({ copyToChannel: jest.fn() }),
+    createBufferSource: jest.fn().mockReturnValue({
+      connect: jest.fn(),
+      start: jest.fn(),
+      stop: jest.fn(),
+      playbackRate: { value: 1.0 },
+      onended: null,
+      buffer: null,
+    }),
+    destination: {},
+    close: jest.fn(),
+  })),
+}));
+
+jest.mock('../../../src/utils/logger', () => ({
+  __esModule: true,
+  default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+
+import RNFS from 'react-native-fs';
+import { initLlama } from 'llama.rn';
+import { ttsService } from '../../../src/services/ttsService';
+import { TTS_BACKBONE_MODEL } from '../../../src/constants/ttsModels';
+
+const mockRNFS = RNFS as jest.Mocked<typeof RNFS>;
+const mockInitLlama = initLlama as jest.Mock;
+
+const makeMockContext = (vocoderEnabled = true) => ({
+  initVocoder: jest.fn().mockResolvedValue(undefined),
+  isVocoderEnabled: jest.fn().mockResolvedValue(vocoderEnabled),
+  releaseVocoder: jest.fn().mockResolvedValue(undefined),
+  release: jest.fn().mockResolvedValue(undefined),
+  getFormattedAudioCompletion: jest.fn().mockResolvedValue({ prompt: 'p', grammar: 'g' }),
+  getAudioCompletionGuideTokens: jest.fn().mockResolvedValue([1, 2, 3]),
+  completion: jest.fn().mockResolvedValue({ audio_tokens: [10, 20, 30] }),
+  decodeAudioTokens: jest.fn().mockResolvedValue(new Array(2400).fill(0.1)),
+});
+
+describe('ttsService', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+    // Reset internal state between tests
+    (ttsService as any).context = null;
+    (ttsService as any).isVocoderReady = false;
+    (ttsService as any).isSpeakingFlag = false;
+    (ttsService as any).contextLoadPromise = Promise.resolve();
+  });
+
+  // ─── Paths ────────────────────────────────────────────────────────────────
+
+  describe('paths', () => {
+    it('backbone path uses tts-models directory', () => {
+      expect(ttsService.getBackbonePath()).toBe(
+        `/mock/docs/tts-models/${TTS_BACKBONE_MODEL.backboneFile}`,
+      );
+    });
+
+    it('vocoder path uses tts-models directory', () => {
+      expect(ttsService.getVocoderPath()).toBe(
+        `/mock/docs/tts-models/${TTS_BACKBONE_MODEL.vocoderFile}`,
+      );
+    });
+
+    it('audio file path scoped to conversationId and messageId', () => {
+      expect(ttsService.getAudioFilePath('conv1', 'msg1')).toBe(
+        '/mock/docs/audio-cache/conv1/msg1.pcm',
+      );
+    });
+  });
+
+  // ─── Download ────────────────────────────────────────────────────────────
+
+  describe('downloadBackbone', () => {
+    it('returns existing path without downloading if already present', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(true) // ensureDir
+                     .mockResolvedValueOnce(true); // file exists
+      const path = await ttsService.downloadBackbone();
+      expect(mockRNFS.downloadFile).not.toHaveBeenCalled();
+      expect(path).toBe(ttsService.getBackbonePath());
+    });
+
+    it('downloads and returns path on success', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(false) // dir missing
+                     .mockResolvedValueOnce(false); // file missing
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 200, jobId: 1, bytesWritten: 0 }) });
+
+      const onProgress = jest.fn();
+      const path = await ttsService.downloadBackbone(onProgress);
+
+      expect(mockRNFS.downloadFile).toHaveBeenCalledWith(
+        expect.objectContaining({ fromUrl: TTS_BACKBONE_MODEL.backboneUrl }),
+      );
+      expect(path).toBe(ttsService.getBackbonePath());
+    });
+
+    it('throws and removes partial file on non-200 response', async () => {
+      mockRNFS.exists.mockResolvedValue(false);
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 404, jobId: 1, bytesWritten: 0 }) });
+      mockRNFS.unlink.mockResolvedValue(undefined);
+
+      await expect(ttsService.downloadBackbone()).rejects.toThrow('HTTP 404');
+      expect(mockRNFS.unlink).toHaveBeenCalled();
+    });
+  });
+
+  describe('downloadVocoder', () => {
+    it('downloads vocoder to correct path', async () => {
+      mockRNFS.exists.mockResolvedValue(false);
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.downloadFile.mockReturnValue({ jobId: 1, promise: Promise.resolve({ statusCode: 200, jobId: 1, bytesWritten: 0 }) });
+
+      const path = await ttsService.downloadVocoder();
+      expect(mockRNFS.downloadFile).toHaveBeenCalledWith(
+        expect.objectContaining({ fromUrl: TTS_BACKBONE_MODEL.vocoderUrl }),
+      );
+      expect(path).toBe(ttsService.getVocoderPath());
+    });
+  });
+
+  // ─── Model Lifecycle ─────────────────────────────────────────────────────
+
+  describe('loadModels', () => {
+    it('calls initLlama with backbone path then initVocoder', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+
+      await ttsService.loadModels();
+
+      expect(mockInitLlama).toHaveBeenCalledWith(
+        expect.objectContaining({ model: ttsService.getBackbonePath() }),
+      );
+      expect(ctx.initVocoder).toHaveBeenCalledWith(
+        expect.objectContaining({ path: ttsService.getVocoderPath() }),
+      );
+    });
+
+    it('throws if isVocoderEnabled returns false', async () => {
+      const ctx = makeMockContext(false);
+      mockInitLlama.mockResolvedValue(ctx);
+
+      await expect(ttsService.loadModels()).rejects.toThrow('Vocoder failed to initialize');
+    });
+
+    it('is idempotent — does not double-init if already loaded', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+
+      await ttsService.loadModels();
+      await ttsService.loadModels();
+
+      expect(mockInitLlama).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  describe('unloadModels', () => {
+    it('calls releaseVocoder and release', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+      await ttsService.loadModels();
+
+      await ttsService.unloadModels();
+
+      expect(ctx.releaseVocoder).toHaveBeenCalled();
+      expect(ctx.release).toHaveBeenCalled();
+      expect(ttsService.isLoaded()).toBe(false);
+    });
+  });
+
+  // ─── Generation ──────────────────────────────────────────────────────────
+
+  describe('generate', () => {
+    it('calls completion pipeline in correct order and returns GeneratedAudio', async () => {
+      const ctx = makeMockContext();
+      mockInitLlama.mockResolvedValue(ctx);
+      await ttsService.loadModels();
+
+      const audio = await ttsService.generate('hello world');
+
+      expect(ctx.getFormattedAudioCompletion).toHaveBeenCalled();
+      expect(ctx.getAudioCompletionGuideTokens).toHaveBeenCalledWith('hello world');
+      expect(ctx.completion).toHaveBeenCalled();
+      expect(ctx.decodeAudioTokens).toHaveBeenCalled();
+
+      expect(audio.samples).toBeInstanceOf(Float32Array);
+      expect(audio.waveformData).toHaveLength(200);
+      expect(audio.durationSeconds).toBeGreaterThan(0);
+      expect(audio.sampleRate).toBe(TTS_BACKBONE_MODEL.sampleRate);
+    });
+
+    it('throws if models not loaded', async () => {
+      await expect(ttsService.generate('test')).rejects.toThrow('TTS models not loaded');
+    });
+  });
+
+  describe('saveToFile', () => {
+    it('writes base64-encoded PCM to correct path', async () => {
+      mockRNFS.exists.mockResolvedValue(false);
+      mockRNFS.mkdir.mockResolvedValueOnce(undefined);
+      mockRNFS.writeFile.mockResolvedValueOnce(undefined);
+
+      const audio = {
+        samples: new Float32Array([0.1, 0.2, 0.3]),
+        durationSeconds: 0.01,
+        sampleRate: 24000,
+        waveformData: new Array(200).fill(0.1),
+      };
+
+      const path = await ttsService.saveToFile(audio, 'conv1', 'msg1');
+
+      expect(path).toBe('/mock/docs/audio-cache/conv1/msg1.pcm');
+      expect(mockRNFS.writeFile).toHaveBeenCalledWith(
+        '/mock/docs/audio-cache/conv1/msg1.pcm',
+        expect.any(String),
+        'base64',
+      );
+    });
+  });
+
+  // ─── Stop ────────────────────────────────────────────────────────────────
+
+  describe('stop', () => {
+    it('sets isSpeakingFlag to false', () => {
+      (ttsService as any).isSpeakingFlag = true;
+      ttsService.stop();
+      expect(ttsService.isSpeaking()).toBe(false);
+    });
+
+    it('calls stop on currentSource', () => {
+      const mockSource = { stop: jest.fn() };
+      (ttsService as any).currentSource = mockSource;
+      ttsService.stop();
+      expect(mockSource.stop).toHaveBeenCalled();
+    });
+  });
+
+  // ─── Cache ────────────────────────────────────────────────────────────────
+
+  describe('getAudioCacheSizeMB', () => {
+    it('returns 0 if cache directory does not exist', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(false);
+      const size = await ttsService.getAudioCacheSizeMB();
+      expect(size).toBe(0);
+    });
+
+    it('returns size in MB by summing individual file sizes', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(true);
+      // readDir(cacheRoot) → one conversation directory
+      (mockRNFS as any).readDir
+        .mockResolvedValueOnce([{ isDirectory: () => true, path: '/mock/docs/audio-cache/conv1' }])
+        // readDir(conv1) → two .pcm files, each 2.5 MB
+        .mockResolvedValueOnce([
+          { isDirectory: () => false, size: 2.5 * 1024 * 1024 },
+          { isDirectory: () => false, size: 2.5 * 1024 * 1024 },
+        ]);
+      const size = await ttsService.getAudioCacheSizeMB();
+      expect(size).toBeCloseTo(5);
+    });
+  });
+
+  describe('clearAudioCache', () => {
+    it('unlinks the cache root if it exists', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(true);
+      mockRNFS.unlink.mockResolvedValueOnce(undefined);
+      await ttsService.clearAudioCache();
+      expect(mockRNFS.unlink).toHaveBeenCalledWith('/mock/docs/audio-cache');
+    });
+
+    it('does nothing if cache does not exist', async () => {
+      mockRNFS.exists.mockResolvedValueOnce(false);
+      await ttsService.clearAudioCache();
+      expect(mockRNFS.unlink).not.toHaveBeenCalled();
+    });
+  });
+});
diff --git a/__tests__/unit/stores/ttsStore.test.ts b/__tests__/unit/stores/ttsStore.test.ts
new file mode 100644
index 00000000..568fd9c0
--- /dev/null
+++ b/__tests__/unit/stores/ttsStore.test.ts
@@ -0,0 +1,276 @@
+/**
+ * TTS Store Unit Tests
+ *
+ * Tests for download state, model lifecycle, Chat Mode speak/stop,
+ * Audio Mode generateAndSave/playMessage, and settings persistence.
+ * Priority: P1 - Core TTS state management.
+ */
+
+jest.mock('../../../src/services/ttsService', () => ({
+  ttsService: {
+    isBackboneDownloaded: jest.fn(),
+    isVocoderDownloaded: jest.fn(),
+    downloadBackbone: jest.fn(),
+    downloadVocoder: jest.fn(),
+    deleteModels: jest.fn(),
+    loadModels: jest.fn(),
+    unloadModels: jest.fn(),
+    speak: jest.fn(),
+    stop: jest.fn(),
+    generateAndSave: jest.fn(),
+    playFromFile: jest.fn(),
+    getAudioCacheSizeMB: jest.fn(),
+    clearAudioCache: jest.fn(),
+  },
+}));
+
+jest.mock('../../../src/utils/logger', () => ({
+  __esModule: true,
+  default: { log: jest.fn(), error: jest.fn(), warn: jest.fn() },
+}));
+
+import { useTTSStore } from '../../../src/stores/ttsStore';
+import { ttsService } from '../../../src/services/ttsService';
+
+const mockTTSService = ttsService as jest.Mocked<typeof ttsService>;
+const getState = () => useTTSStore.getState();
+
+const resetState = () => {
+  useTTSStore.setState({
+    isBackboneDownloaded: false,
+    isVocoderDownloaded: false,
+    isDownloadingBackbone: false,
+    isDownloadingVocoder: false,
+    backboneDownloadProgress: 0,
+    vocoderDownloadProgress: 0,
+    isModelLoading: false,
+    isModelLoaded: false,
+    isSpeaking: false,
+    currentMessageId: null,
+    audioCacheSizeMB: 0,
+    settings: {
+      interfaceMode: 'chat',
+      enabled: true,
+      autoPlay: false,
+      speed: 1.0,
+      voiceId: '0',
+      kokoroVoiceId: 'af_heart',
+    },
+    error: null,
+  });
+};
+
+describe('ttsStore', () => {
+  beforeEach(() => {
+    resetState();
+    jest.clearAllMocks();
+  });
+
+  // ─── Download ─────────────────────────────────────────────────────────────
+
+  describe('checkDownloadStatus', () => {
+    it('reflects backbone and vocoder download state', async () => {
+      mockTTSService.isBackboneDownloaded.mockResolvedValue(true);
+      mockTTSService.isVocoderDownloaded.mockResolvedValue(false);
+
+      await getState().checkDownloadStatus();
+
+      expect(getState().isBackboneDownloaded).toBe(true);
+      expect(getState().isVocoderDownloaded).toBe(false);
+    });
+  });
+
+  describe('downloadModels', () => {
+    it('sets progress states and marks both downloaded on success', async () => {
+      mockTTSService.downloadBackbone.mockImplementation(async (onProgress) => {
+        onProgress?.(0.5);
+        onProgress?.(1.0);
+        return '/path/backbone';
+      });
+      mockTTSService.downloadVocoder.mockImplementation(async (onProgress) => {
+        onProgress?.(1.0);
+        return '/path/vocoder';
+      });
+
+      await getState().downloadModels();
+
+      const state = getState();
+      expect(state.isBackboneDownloaded).toBe(true);
+      expect(state.isVocoderDownloaded).toBe(true);
+      expect(state.isDownloadingBackbone).toBe(false);
+      expect(state.isDownloadingVocoder).toBe(false);
+      expect(state.error).toBeNull();
+    });
+
+    it('sets error and resets downloading flags on failure', async () => {
+      mockTTSService.downloadBackbone.mockRejectedValue(new Error('network error'));
+
+      await getState().downloadModels();
+
+      const state = getState();
+      expect(state.error).toBe('network error');
+      expect(state.isDownloadingBackbone).toBe(false);
+      expect(state.isDownloadingVocoder).toBe(false);
+    });
+  });
+
+  // ─── Model lifecycle ─────────────────────────────────────────────────────
+
+  describe('loadModels', () => {
+    it('sets isModelLoaded on success', async () => {
+      mockTTSService.loadModels.mockResolvedValue(undefined);
+      await getState().loadModels();
+      expect(getState().isModelLoaded).toBe(true);
+      expect(getState().isModelLoading).toBe(false);
+    });
+
+    it('sets error on failure', async () => {
+      mockTTSService.loadModels.mockRejectedValue(new Error('OOM'));
+      await getState().loadModels();
+      expect(getState().error).toBe('OOM');
+      expect(getState().isModelLoaded).toBe(false);
+    });
+
+    it('is a no-op if already loaded', async () => {
+      useTTSStore.setState({ isModelLoaded: true });
+      await getState().loadModels();
+      expect(mockTTSService.loadModels).not.toHaveBeenCalled();
+    });
+  });
+
+  // ─── Chat Mode ────────────────────────────────────────────────────────────
+
+  describe('speak', () => {
+    beforeEach(() => {
+      useTTSStore.setState({ isModelLoaded: true });
+    });
+
+    it('sets isSpeaking true then false after completion', async () => {
+      mockTTSService.speak.mockResolvedValue(undefined);
+      mockTTSService.stop.mockReturnValue(undefined);
+
+      const speaking: boolean[] = [];
+      const unsubscribe = useTTSStore.subscribe((s) => speaking.push(s.isSpeaking));
+
+      await getState().speak('hello', 'msg1');
+
+      unsubscribe();
+      expect(speaking).toContain(true);
+      expect(getState().isSpeaking).toBe(false);
+    });
+
+    it('stops speaking the same message when called again', async () => {
+      useTTSStore.setState({ isSpeaking: true, currentMessageId: 'msg1' });
+      mockTTSService.stop.mockReturnValue(undefined);
+
+      await getState().speak('hello', 'msg1');
+
+      expect(mockTTSService.stop).toHaveBeenCalled();
+      expect(mockTTSService.speak).not.toHaveBeenCalled();
+    });
+
+    it('does nothing if TTS disabled', async () => {
+      useTTSStore.setState({ settings: { ...getState().settings, enabled: false } });
+      await getState().speak('hello', 'msg1');
+      expect(mockTTSService.speak).not.toHaveBeenCalled();
+    });
+
+    it('does nothing if model not loaded', async () => {
+      useTTSStore.setState({ isModelLoaded: false });
+      await getState().speak('hello', 'msg1');
+      expect(mockTTSService.speak).not.toHaveBeenCalled();
+    });
+  });
+
+  // ─── Audio Mode ───────────────────────────────────────────────────────────
+
+  describe('generateAndSave', () => {
+    it('returns path, waveformData, durationSeconds and refreshes cache', async () => {
+      const mockAudio = {
+        samples: new Float32Array(100),
+        durationSeconds: 2.5,
+        sampleRate: 24000,
+        waveformData: new Array(200).fill(0.1),
+      };
+      mockTTSService.generateAndSave.mockResolvedValue({
+        path: '/cache/conv1/msg1.pcm',
+        audio: mockAudio,
+      });
+      mockTTSService.getAudioCacheSizeMB.mockResolvedValue(3.2);
+
+      const result = await getState().generateAndSave('hello', 'conv1', 'msg1');
+
+      expect(result.path).toBe('/cache/conv1/msg1.pcm');
+      expect(result.waveformData).toHaveLength(200);
+      expect(result.durationSeconds).toBe(2.5);
+      expect(getState().audioCacheSizeMB).toBeCloseTo(3.2);
+    });
+  });
+
+  describe('playMessage', () => {
+    it('sets isSpeaking true during playback then false after', async () => {
+      mockTTSService.stop.mockReturnValue(undefined);
+      mockTTSService.playFromFile.mockResolvedValue(undefined);
+
+      const speaking: boolean[] = [];
+      const unsubscribe = useTTSStore.subscribe((s) => speaking.push(s.isSpeaking));
+
+      await getState().playMessage('msg1', '/cache/conv1/msg1.pcm');
+
+      unsubscribe();
+      expect(speaking).toContain(true);
+      expect(getState().isSpeaking).toBe(false);
+    });
+
+    it('stops if same message is already playing', async () => {
+      useTTSStore.setState({ isSpeaking: true, currentMessageId: 'msg1' });
+      mockTTSService.stop.mockReturnValue(undefined);
+
+      await getState().playMessage('msg1', '/cache/conv1/msg1.pcm');
+
+      expect(mockTTSService.stop).toHaveBeenCalled();
+      expect(mockTTSService.playFromFile).not.toHaveBeenCalled();
+    });
+  });
+
+  // ─── Settings ─────────────────────────────────────────────────────────────
+
+  describe('updateSettings', () => {
+    it('merges partial settings correctly', () => {
+      getState().updateSettings({ speed: 1.5, autoPlay: true });
+      const { settings } = getState();
+      expect(settings.speed).toBe(1.5);
+      expect(settings.autoPlay).toBe(true);
+      // Other fields untouched
+      expect(settings.enabled).toBe(true);
+      expect(settings.voiceId).toBe('0');
+    });
+
+    it('can switch interfaceMode', () => {
+      getState().updateSettings({ interfaceMode: 'audio' });
+      expect(getState().settings.interfaceMode).toBe('audio');
+    });
+  });
+
+  describe('clearError', () => {
+    it('clears the error field', () => {
+      useTTSStore.setState({ error: 'something went wrong' });
+      getState().clearError();
+      expect(getState().error).toBeNull();
+    });
+  });
+
+  // ─── Cache ────────────────────────────────────────────────────────────────
+
+  describe('clearAudioCache', () => {
+    it('calls ttsService.clearAudioCache and resets size', async () => {
+      useTTSStore.setState({ audioCacheSizeMB: 10 });
+      mockTTSService.clearAudioCache.mockResolvedValue(undefined);
+
+      await getState().clearAudioCache();
+
+      expect(mockTTSService.clearAudioCache).toHaveBeenCalled();
+      expect(getState().audioCacheSizeMB).toBe(0);
+    });
+  });
+});
diff --git a/__tests__/unit/utils/messageContent.test.ts b/__tests__/unit/utils/messageContent.test.ts
index b35b0181..5f79afef 100644
--- a/__tests__/unit/utils/messageContent.test.ts
+++ b/__tests__/unit/utils/messageContent.test.ts
@@ -118,8 +118,8 @@ describe('stripControlTokens', () => {
       expect(stripControlTokens('<|im_start|>assistant\n<|im_end|>')).toBe('');
     });
 
-    it('preserves whitespace in content', () => {
-      expect(stripControlTokens('  Hello  World  ')).toBe('  Hello  World  ');
+    it('trims leading/trailing whitespace in content', () => {
+      expect(stripControlTokens('  Hello  World  ')).toBe('Hello  World');
     });
 
     it('preserves HTML-like tags that are not control tokens', () => {
diff --git a/android/build.gradle b/android/build.gradle
index dad99b02..984e5bed 100644
--- a/android/build.gradle
+++ b/android/build.gradle
@@ -19,3 +19,4 @@ buildscript {
 }
 
 apply plugin: "com.facebook.react.rootproject"
+
diff --git a/docs/PERSONAS_IMPLEMENTATION_PLAN.md b/docs/PERSONAS_IMPLEMENTATION_PLAN.md
index 93ccdd5d..dd1225fc 100644
--- a/docs/PERSONAS_IMPLEMENTATION_PLAN.md
+++ b/docs/PERSONAS_IMPLEMENTATION_PLAN.md
@@ -31,7 +31,8 @@ export type Capability =
   | 'voice'         // STT + TTS
   | 'vision'        // image understanding
   | 'image-gen'     // image generation
-  | 'rag';          // knowledge base search
+  | 'rag'           // knowledge base search (user-uploaded documents)
+  | 'memory-rag';   // cross-conversation RAG — past messages indexed and retrieved
 
 export type SkillTriggerEvent =
   | 'message_received'    // new message in connected app
@@ -109,8 +110,9 @@ export interface Persona {
   capabilities: Capability[];
 
   // What this persona knows
-  knowledgeBaseIds: string[];        // attached RAG knowledge bases (use projectId as KB id)
-  memoryFacts: PersonaMemoryFact[];  // persistent learned facts
+  knowledgeBaseIds: string[];        // attached RAG knowledge bases (user-uploaded documents)
+  conversationMemoryEnabled: boolean; // true = all past conversations for this persona are embedded + searchable
+  memoryFacts: PersonaMemoryFact[];  // persistent learned facts (LLM-extracted, concise)
 
   // What this persona does automatically
   skills: Skill[];
@@ -227,8 +229,9 @@ export const DEFAULT_PERSONAS: Omit<Persona, 'createdAt' | 'updatedAt'>[] = [
     systemPrompt: 'You are Jarvis, a capable and concise personal assistant. You help with anything — questions, tasks, planning, thinking. You are direct, warm, and never verbose unless asked.',
     icon: 'cpu',
     accentColor: '#6366F1',
-    capabilities: ['text', 'voice', 'vision'],
+    capabilities: ['text', 'voice', 'vision', 'memory-rag'],
     knowledgeBaseIds: [],
+    conversationMemoryEnabled: true,  // Jarvis indexes all past conversations — gives it cross-chat intelligence
     memoryFacts: [],
     skills: [],
     integrationIds: [],
@@ -418,6 +421,113 @@ export function buildMemoryContext(facts: PersonaMemoryFact[]): string {
 }
 ```
 
+### conversationRagService.ts (new — cross-conversation memory)
+
+This is what makes Jarvis actually intelligent across sessions. Rather than relying only on extracted `memoryFacts` (brief summaries) or the current context window, Jarvis embeds every conversation message into a per-persona vector store. When a new message arrives, relevant past exchanges are retrieved and injected as context — so Jarvis remembers "we discussed your onboarding last Tuesday" without you having to repeat it.
+
+**How it's different from document KB:**
+
+| | Document KB (`knowledgeBaseIds`) | Conversation RAG (`conversationMemoryEnabled`) |
+|---|---|---|
+| Source | User-uploaded PDFs, notes | Past conversation messages |
+| Indexed when | User uploads a file | After each assistant response |
+| Retrieved by | User explicitly asking about docs | Automatically on every message |
+| Scoped to | Attached knowledge bases | All conversations for this persona |
+
+```typescript
+// src/services/conversationRagService.ts
+
+/**
+ * Indexes completed conversation messages into the persona's vector store.
+ * Called after each assistant turn completes (streaming done).
+ *
+ * Each chunk stored = ~4–6 messages grouped by semantic coherence, not
+ * arbitrary token windows. This preserves conversational context.
+ */
+export async function indexConversationTurn(
+  personaId: string,
+  conversationId: string,
+  messages: Message[],   // recent messages to embed (typically last 4–6)
+): Promise<void> {
+  const chunks = chunkMessagesForEmbedding(messages);
+  for (const chunk of chunks) {
+    const embedding = await embeddingService.embed(chunk.text);
+    await vectorStore.upsert({
+      id: `${conversationId}:${chunk.startIndex}`,
+      embedding,
+      metadata: {
+        personaId,
+        conversationId,
+        timestamp: chunk.timestamp,
+        preview: chunk.text.slice(0, 120),
+      },
+    });
+  }
+}
+
+/**
+ * Retrieves the most relevant past conversation context for the current message.
+ * Returns plain text ready to inject into the system prompt.
+ */
+export async function retrieveRelevantHistory(
+  personaId: string,
+  currentMessage: string,
+  topK = 3,
+): Promise<string> {
+  const queryEmbedding = await embeddingService.embed(currentMessage);
+  const results = await vectorStore.search({
+    embedding: queryEmbedding,
+    filter: { personaId },
+    topK,
+    minScore: 0.72,   // only inject if meaningfully relevant
+  });
+
+  if (results.length === 0) return '';
+
+  const snippets = results.map(r =>
+    `[${formatRelativeDate(r.metadata.timestamp)}]\n${r.metadata.preview}`
+  );
+  return `\n\nRelevant context from past conversations:\n${snippets.join('\n\n---\n\n')}`;
+}
+
+/**
+ * Groups messages into semantically coherent chunks for embedding.
+ * Avoids splitting a user question from its assistant answer.
+ */
+function chunkMessagesForEmbedding(messages: Message[]): EmbeddingChunk[] {
+  // Pair each user message with its following assistant response
+  // Output: chunks of ~300–400 tokens each
+}
+```
+
+**System prompt injection** (in `llm.ts` or wherever the prompt is assembled):
+
+```typescript
+// When conversationMemoryEnabled is true for the active persona:
+if (persona.conversationMemoryEnabled) {
+  const history = await conversationRagService.retrieveRelevantHistory(
+    persona.id,
+    latestUserMessage,
+  );
+  systemPrompt += history;
+}
+```
+
+**Indexing trigger** (after streaming completes, in chatStore or the streaming callback):
+
+```typescript
+// After assistant response is done streaming:
+if (persona.conversationMemoryEnabled) {
+  conversationRagService.indexConversationTurn(
+    persona.id,
+    conversationId,
+    recentMessages.slice(-6),
+  ).catch(() => {});  // fire-and-forget, non-blocking
+}
+```
+
+**Storage:** Uses the existing `ragService` vector store, namespaced by `personaId`. No new storage layer needed — just a new indexing source.
+
 ---
 
 ## Screens
@@ -926,6 +1036,11 @@ export interface Message {
 18. Memory injection into system prompt
 19. `PersonaMemoryScreen`
 20. Memory bar in chat (new fact notification)
+21. `conversationRagService.ts` — cross-conversation RAG for `memory-rag` capability
+    - Index each conversation turn after streaming completes (fire-and-forget)
+    - Retrieve relevant history and inject into system prompt before each LLM call
+    - Jarvis has `conversationMemoryEnabled: true` by default; other personas opt in via PersonaEditScreen
+    - Reuses existing `ragService` vector store, namespaced by `personaId`
 
 ### Phase 5 — Integrations in Chat (tool calls)
 21. Wire integration tool registry entries
diff --git a/docs/TTS_IMPLEMENTATION_PLAN.md b/docs/TTS_IMPLEMENTATION_PLAN.md
index 19b6942c..41f548f4 100644
--- a/docs/TTS_IMPLEMENTATION_PLAN.md
+++ b/docs/TTS_IMPLEMENTATION_PLAN.md
@@ -2,1075 +2,275 @@
 
 ## Product Vision
 
-Two first-class interface modes, switchable from Settings:
+Two first-class interface modes, switchable from Chat Settings or TTS Settings:
 
 | Mode | Primary output | TTS role | Text |
 |---|---|---|---|
 | **Chat Mode** | Text bubbles | Add-on — play button per message | Default visible |
-| **Audio Mode** | Waveform bubbles | Core — auto-generated at completion | Hidden by default, expandable |
+| **Audio Mode** | Waveform bubbles (both sides) | Core — auto-generated at completion | Hidden by default, expandable |
 
-**Audio Mode is the target product experience.** Messages feel like voice note exchanges — not a chat app that also speaks. The user has full per-message audio controls: scrub to position, adjust playback speed, change voice/tone. Text is always available as a "Show transcript" expand.
+**Audio Mode is the target product experience.** Both the user's voice recordings AND the AI's responses appear as waveform audio bubbles — a full voice-note conversation. No text is shown by default; transcript is always accessible via "Show transcript" expand.
 
-Chat Mode is the fallback for devices that can't run TTS models, or users who prefer it.
+- User voice recordings: right-aligned audio bubbles (recorded WAV, played back locally)
+- AI responses: left-aligned audio bubbles (OuteTTS-generated, with 40-bar waveform visualization)
+
+Chat Mode is the fallback for devices that can't run TTS models, or users who prefer text.
 
 ---
 
 ## Decision Log
 
-### Engine
-**OuteTTS 0.3 (500M) + WavTokenizer** via `llama.rn`.
+### Engine (updated)
+
+**Two-tier TTS architecture:**
+
+| Tier | Engine | Use case | Speed | Size |
+|---|---|---|---|---|
+| **Tier 1 — Speak (Chat Mode)** | Kokoro via `react-native-executorch` | On-demand speak button, long-press Speak action | ~1s (streaming) | ~100MB |
+| **Tier 2 — Generate+Save (Audio Mode)** | OuteTTS 0.3 + WavTokenizer via `llama.rn` | Auto-generate waveform bubble after streaming | ~30–120s | ~527MB |
+
+**Why two tiers:**
+- Kokoro via ExecuTorch is fast enough for interactive use (streaming starts < 1s) but outputs raw PCM chunks — no way to write to disk for waveform scrubbing without custom buffering
+- OuteTTS via llama.rn generates the full audio up front, returns `Float32Array` + waveform data + duration in one call — ideal for the saved-file + waveform visualisation pattern Audio Mode requires
+- OuteTTS is NOT suitable for the speak button (too slow, ~30–120s per sentence)
+- Kokoro is NOT currently available as a GGUF via llama.cpp (feature request opened Jan 2025, closed stale Oct 2025, never merged)
+
+**Previous decision (superseded):**
+OuteTTS only via llama.rn for both modes. Superseded because ~1 minute to speak a single sentence is not acceptable for interactive use.
+
+### Platform constraint
 
-- OuteTTS 1.0 (Qwen3 0.6B) is blocked: the DAC vocoder has no GGUF, and llama.cpp PR#12794 is an open draft. The backbone exists on HuggingFace but the decoder is not implemented upstream.
-- OuteTTS 0.3 with WavTokenizer is the **only fully working path** through llama.rn today (confirmed via TTSScreen.tsx in mybigday/llama.rn example app).
-- Upgrade to OuteTTS 1.0 will be a model swap with no architecture change once PR#12794 and llama.rn PR#300 land.
+`react-native-executorch` requires **Android 13 (API 33)** minimum and **iOS 17** minimum.
+
+Current app `minSdkVersion` is **24 (Android 7)**.
+
+**Resolution:** Kokoro speak is available only on Android 13+ / iOS 17+. On older devices, the speak button falls back to OuteTTS (slow but functional). This is detected at runtime — no code path is dead, just slower on older OS.
+
+`minSdkVersion` stays at 24. No breaking change for existing users.
 
 ### Playback
-**react-native-audio-api** (Software Mansion). Implements the Web Audio API spec for React Native. `decodeAudioTokens()` returns `number[]` (Float32 PCM at 24kHz mono) which feeds directly into an `AudioBuffer`.
+**react-native-audio-api** (Software Mansion, already installed). Implements the Web Audio API spec for React Native. Both Kokoro (streaming `Float32Array` chunks) and OuteTTS (full `Float32Array`) pipe through the same `AudioContext → AudioBufferSourceNode` path at 24kHz mono.
 
 ### Audio Persistence (Audio Mode only)
-In Audio Mode, generated PCM is written to disk as a WAV file per message so scrubbing works without re-generating. Files live at:
+In Audio Mode, generated PCM is written to disk as a raw PCM file per message so scrubbing works without re-generating. Files live at:
 
 ```
-${RNFS.DocumentDirectoryPath}/audio-cache/{conversationId}/{messageId}.wav
+${RNFS.DocumentDirectoryPath}/audio-cache/{conversationId}/{messageId}.pcm
 ```
 
 Cache eviction strategy:
 - Keep the last 50 messages worth of audio per conversation
 - User can wipe audio cache from Settings ("Clear audio cache — X MB")
-- Estimated size: ~1–4 MB per message (24kHz mono, varies by length)
+- Estimated size: ~1–4 MB per message (24kHz mono Float32, varies by length)
 
-In Chat Mode, audio is generated on demand, played, then discarded (no disk write).
+In Chat Mode, audio is generated (via Kokoro) on demand, played, then discarded (no disk write).
 
 ### Voice Selection
-OuteTTS 0.3 supports multiple speaker profiles. Expose as a voice picker in TTSSettingsScreen. Store selected voice ID in `ttsStore` settings (persisted). Default: speaker 0 (natural female).
+- **Kokoro voices (Chat Mode speak):** 8 built-in voices (US/GB English, male/female). Stored as `kokoroVoiceId` in `ttsStore` settings. Default: `af_heart`.
+- **OuteTTS voices (Audio Mode waveform):** Single profile (`speaker 0`) — OuteTTS 0.3 multi-speaker not confirmed working via llama.rn. Will expand when OuteTTS 1.0 lands.
 
 ### Device Gate
-Require **flagship tier (8GB+ RAM)**. The memory stack:
-```
-LLM (3B Q4)       ~2.0 GB
-Whisper base       ~150 MB
-OuteTTS backbone   ~454 MB
-WavTokenizer       ~ 73 MB
-OS + app           ~2.0 GB
-─────────────────────────
-Total:             ~4.7 GB   → fits 8GB devices, tight on 6GB
-```
-Show a warning (not a hard block) for 6–8GB devices. Hard block below 6GB. If device is blocked, Audio Mode is unavailable — app defaults to Chat Mode and hides the Audio Mode option.
-
----
-
-## Model Files
-
-| Role | HuggingFace Repo | File | Size |
-|---|---|---|---|
-| TTS Backbone | `OuteAI/OuteTTS-0.3-500M-GGUF` | `OuteTTS-0.3-500M-Q4_K_M.gguf` | 454 MB |
-| Vocoder | `ggml-org/WavTokenizer` | `WavTokenizer-Large-75-Q5_1.gguf` | 73 MB |
+Show a warning (not a hard block) for 6–8GB devices. Hard block below 6GB for Audio Mode (OuteTTS only). Kokoro speak has no RAM gate.
 
-Direct download URLs (HuggingFace resolve):
+Memory stack (worst case — both models loaded simultaneously):
 ```
-https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf
-https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf
+LLM (3B Q4)            ~2.0 GB
+Whisper base           ~150 MB
+OuteTTS backbone       ~454 MB
+WavTokenizer           ~ 73 MB
+Kokoro (XNNPACK .pte)  ~100 MB  ← new
+OS + app               ~2.0 GB
+──────────────────────────────
+Total:                 ~4.8 GB  → fits 8GB devices
 ```
 
-Storage directories:
-```
-${RNFS.DocumentDirectoryPath}/tts-models/     ← model weights
-${RNFS.DocumentDirectoryPath}/audio-cache/    ← per-message WAV files (Audio Mode only)
-```
+Kokoro and OuteTTS are never loaded simultaneously — Kokoro handles Chat Mode speak (OuteTTS not loaded), OuteTTS handles Audio Mode generation (Kokoro not involved).
 
 ---
 
-## New Package
-
-```bash
-npm install react-native-audio-api
-```
-
-iOS: run `pod install` after.
-Android: auto-linked.
-
----
-
-## Interface Mode Setting
-
-### Where it lives
-`ttsStore` settings object gains:
+## Model Files
 
-```typescript
-export type InterfaceMode = 'chat' | 'audio';
-
-export interface TTSSettings {
-  interfaceMode: InterfaceMode; // default: 'chat' until TTS models downloaded, then user can switch
-  enabled: boolean;
-  autoPlay: boolean;            // Chat Mode only — auto-speak after completion
-  speed: number;                // 0.5–2.0, default 1.0
-  voiceId: string;              // OuteTTS speaker profile, default '0'
-}
-```
+### Tier 1 — Kokoro (react-native-executorch)
 
-### Mode switching rules
-- If TTS models not downloaded → `interfaceMode` locked to `'chat'`
-- If device RAM < 6GB → `interfaceMode` locked to `'chat'`, Audio Mode option hidden
-- Switching mode takes effect immediately for new messages; existing messages render in whatever mode they were generated in (Chat Mode messages have no audio file, Audio Mode messages have one)
-- A banner appears at the top of the chat on first switch: "Audio mode on — responses will play as voice notes."
+Downloaded automatically by `react-native-executorch` to its internal cache (`react-native-executorch/` in document directory). No manual download management needed.
 
----
+| File | Source | Size (approx) |
+|---|---|---|
+| `duration_predictor.pte` | HuggingFace: `software-mansion/react-native-executorch-kokoro` | ~10 MB |
+| `synthesizer.pte` | same | ~80 MB |
+| Voice `.bin` files (per voice) | same repo | ~3–5 MB each |
+| Phonemizer data (tagger + lexicon) | same repo | ~5 MB |
 
-## Audio Mode: Message Bubble
+Total cold download: ~100–120 MB. Subsequent launches use cached files.
 
-### Layout (replaces text bubble for assistant messages)
+### Tier 2 — OuteTTS (llama.rn, audio mode only)
 
-```
-┌─────────────────────────────────────────────┐
-│  [avatar]  ●━━━━━━━━━━━━━━━━━━━  0:42  1x  │
-│            [waveform visualization]          │
-│            [Show transcript ▾]               │
-└─────────────────────────────────────────────┘
-```
-
-- **Waveform bar** — static amplitude visualization drawn from PCM data at generation time (no real-time animation needed, just a static shape like WhatsApp)
-- **Scrubber** — draggable progress indicator
-- **Timestamp** — elapsed / total duration
-- **Speed chip** — tappable, cycles 0.5x → 1x → 1.5x → 2x
-- **Show transcript** — expands inline to full text, collapses again
-
-User messages (voice input via Whisper) show the same bubble layout but with the transcript as primary since we have no TTS for user messages.
+| Role | HuggingFace Repo | File | Size |
+|---|---|---|---|
+| TTS Backbone | `OuteAI/OuteTTS-0.3-500M-GGUF` | `OuteTTS-0.3-500M-Q4_K_M.gguf` | 454 MB |
+| Vocoder | `ggml-org/WavTokenizer` | `WavTokenizer-Large-75-Q5_1.gguf` | 73 MB |
 
-### Per-message controls (long press → action sheet)
-- Change voice (re-generates audio with new speaker profile, overwrites cached file)
-- Regenerate audio
-- Copy text
-- Delete message
+Stored at: `${RNFS.DocumentDirectoryPath}/tts-models/`
 
 ---
 
-## Files to Create
-
-### 1. `src/constants/ttsModels.ts`
+## New Packages
 
-```typescript
-export const TTS_BACKBONE_MODEL = {
-  id: 'outetts-0.3-500m-q4',
-  name: 'OuteTTS 0.3',
-  backboneFile: 'OuteTTS-0.3-500M-Q4_K_M.gguf',
-  backboneUrl: 'https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf',
-  backboneSizeMB: 454,
-  vocoderFile: 'WavTokenizer-Large-75-Q5_1.gguf',
-  vocoderUrl: 'https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf',
-  vocoderSizeMB: 73,
-  sampleRate: 24000,
-  description: 'Natural-sounding on-device speech. Requires ~530 MB storage.',
-};
-
-export const TTS_SPEAKER_PROFILES = [
-  { id: '0', label: 'Default' },
-  // Add more as OuteTTS 0.3 speaker profiles are confirmed
-];
-
-export const TTS_MIN_RAM_GB = 6;   // warn below 8, hard block below 6
-export const TTS_BLOCK_RAM_GB = 6; // hard block
-export const TTS_WARN_RAM_GB = 8;  // show warning card
-export const AUDIO_CACHE_MAX_MESSAGES = 50; // per conversation
+```bash
+npm install react-native-executorch
+npm install react-native-executorch-bare-resource-fetcher
+npm install @dr.pogodin/react-native-fs @kesha-antonov/react-native-background-downloader
 ```
 
----
-
-### 2. `src/services/ttsService.ts`
-
-Mirror `whisperService.ts` pattern exactly.
-
-```typescript
-import { initLlama, LlamaContext } from 'llama.rn';
-import RNFS from 'react-native-fs';
-import { AudioContext } from 'react-native-audio-api';
-import logger from '../utils/logger';
-import { TTS_BACKBONE_MODEL } from '../constants/ttsModels';
-
-export interface TTSOptions {
-  speed?: number;    // 0.5–2.0, default 1.0
-  voiceId?: string;  // speaker profile id, default '0'
-}
-
-export interface GeneratedAudio {
-  samples: Float32Array;
-  durationSeconds: number;
-  sampleRate: number;
-  /** Amplitude envelope (downsampled to ~200 points) for waveform visualization */
-  waveformData: number[];
-}
-
-class TTSService {
-  private context: LlamaContext | null = null;
-  private isVocoderReady: boolean = false;
-  private isSpeakingFlag: boolean = false;
-  private audioCtx: AudioContext | null = null;
-  private currentSource: AudioBufferSourceNode | null = null;
-  private contextLoadPromise: Promise<void> = Promise.resolve();
-
-  // ─── Directories & Paths ────────────────────────────────────────────────
-
-  getModelsDir(): string {
-    return `${RNFS.DocumentDirectoryPath}/tts-models`;
-  }
-
-  getAudioCacheDir(conversationId: string): string {
-    return `${RNFS.DocumentDirectoryPath}/audio-cache/${conversationId}`;
-  }
-
-  getAudioFilePath(conversationId: string, messageId: string): string {
-    return `${this.getAudioCacheDir(conversationId)}/${messageId}.wav`;
-  }
-
-  async ensureModelsDirExists(): Promise<void> {
-    const dir = this.getModelsDir();
-    if (!await RNFS.exists(dir)) await RNFS.mkdir(dir);
-  }
-
-  async ensureAudioCacheDirExists(conversationId: string): Promise<void> {
-    const dir = this.getAudioCacheDir(conversationId);
-    if (!await RNFS.exists(dir)) await RNFS.mkdir(dir);
-  }
-
-  getBackbonePath(): string {
-    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.backboneFile}`;
-  }
-
-  getVocoderPath(): string {
-    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.vocoderFile}`;
-  }
-
-  async isBackboneDownloaded(): Promise<boolean> {
-    return RNFS.exists(this.getBackbonePath());
-  }
-
-  async isVocoderDownloaded(): Promise<boolean> {
-    return RNFS.exists(this.getVocoderPath());
-  }
-
-  async areBothModelsDownloaded(): Promise<boolean> {
-    return (await this.isBackboneDownloaded()) && (await this.isVocoderDownloaded());
-  }
-
-  async isAudioCached(conversationId: string, messageId: string): Promise<boolean> {
-    return RNFS.exists(this.getAudioFilePath(conversationId, messageId));
-  }
-
-  async getAudioCacheSizeMB(): Promise<number> {
-    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
-    if (!await RNFS.exists(cacheRoot)) return 0;
-    const stat = await RNFS.stat(cacheRoot);
-    return stat.size / (1024 * 1024);
-  }
-
-  async clearAudioCache(): Promise<void> {
-    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
-    if (await RNFS.exists(cacheRoot)) await RNFS.unlink(cacheRoot);
-  }
-
-  // ─── Download ────────────────────────────────────────────────────────────
-
-  async downloadBackbone(onProgress?: (p: number) => void): Promise<string> {
-    await this.ensureModelsDirExists();
-    const dest = this.getBackbonePath();
-    if (await RNFS.exists(dest)) return dest;
-    const dl = RNFS.downloadFile({
-      fromUrl: TTS_BACKBONE_MODEL.backboneUrl,
-      toFile: dest,
-      progressDivider: 1,
-      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
-    });
-    const result = await dl.promise;
-    if (result.statusCode !== 200) {
-      await RNFS.unlink(dest).catch(() => {});
-      throw new Error(`Backbone download failed: HTTP ${result.statusCode}`);
-    }
-    return dest;
-  }
-
-  async downloadVocoder(onProgress?: (p: number) => void): Promise<string> {
-    await this.ensureModelsDirExists();
-    const dest = this.getVocoderPath();
-    if (await RNFS.exists(dest)) return dest;
-    const dl = RNFS.downloadFile({
-      fromUrl: TTS_BACKBONE_MODEL.vocoderUrl,
-      toFile: dest,
-      progressDivider: 1,
-      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
-    });
-    const result = await dl.promise;
-    if (result.statusCode !== 200) {
-      await RNFS.unlink(dest).catch(() => {});
-      throw new Error(`Vocoder download failed: HTTP ${result.statusCode}`);
-    }
-    return dest;
-  }
-
-  async deleteModels(): Promise<void> {
-    await this.unloadModels();
-    const bp = this.getBackbonePath();
-    const vp = this.getVocoderPath();
-    if (await RNFS.exists(bp)) await RNFS.unlink(bp);
-    if (await RNFS.exists(vp)) await RNFS.unlink(vp);
-  }
-
-  // ─── Model Lifecycle ─────────────────────────────────────────────────────
-
-  async loadModels(): Promise<void> {
-    if (this.context && this.isVocoderReady) return;
-
-    this.contextLoadPromise = this.contextLoadPromise.then(async () => {
-      if (this.context && this.isVocoderReady) return;
-
-      logger.log('[TTS] Loading backbone...');
-      this.context = await initLlama({
-        model: this.getBackbonePath(),
-        n_ctx: 8192,
-        n_threads: 4,
-      });
-
-      logger.log('[TTS] Loading vocoder...');
-      await this.context.initVocoder({
-        path: this.getVocoderPath(),
-        n_batch: 4096,
-      });
+iOS: `pod install` after.
 
-      this.isVocoderReady = await this.context.isVocoderEnabled();
-      if (!this.isVocoderReady) {
-        throw new Error('Vocoder failed to initialize — check model files.');
-      }
-
-      logger.log('[TTS] Ready.');
-    });
-
-    return this.contextLoadPromise;
-  }
-
-  async unloadModels(): Promise<void> {
-    this.stop();
-    if (this.context) {
-      await this.context.releaseVocoder().catch(() => {});
-      await this.context.release().catch(() => {});
-      this.context = null;
-    }
-    this.isVocoderReady = false;
-    this.audioCtx?.close().catch(() => {});
-    this.audioCtx = null;
-  }
-
-  isLoaded(): boolean {
-    return this.context !== null && this.isVocoderReady;
-  }
-
-  // ─── Audio Generation ────────────────────────────────────────────────────
-
-  /**
-   * Generate PCM audio for `text`. Does NOT play it.
-   * Returns samples + metadata needed for waveform rendering and playback.
-   */
-  async generate(text: string, options: TTSOptions = {}): Promise<GeneratedAudio> {
-    if (!this.context || !this.isVocoderReady) {
-      throw new Error('TTS models not loaded.');
-    }
-
-    const speakerId = options.voiceId ?? '0';
-    const { prompt, grammar } = await this.context.getFormattedAudioCompletion(
-      speakerId === '0' ? null : speakerId,
-      text,
-    );
-    const guideTokens = await this.context.getAudioCompletionGuideTokens(text);
-
-    const result = await this.context.completion({
-      prompt,
-      grammar,
-      guide_tokens: guideTokens,
-      n_predict: 4096,
-      temperature: 0.7,
-      top_p: 0.9,
-      stop: ['<|im_end|>'],
-    });
-
-    const pcmArray = await this.context.decodeAudioTokens(result.audio_tokens);
-    const samples = new Float32Array(pcmArray);
-    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
-    const durationSeconds = samples.length / sampleRate;
-    const waveformData = this.downsampleForWaveform(samples, 200);
-
-    return { samples, durationSeconds, sampleRate, waveformData };
-  }
-
-  /**
-   * Write PCM samples to a WAV file on disk.
-   * Used in Audio Mode to persist audio per message.
-   */
-  async saveToFile(audio: GeneratedAudio, conversationId: string, messageId: string): Promise<string> {
-    await this.ensureAudioCacheDirExists(conversationId);
-    const path = this.getAudioFilePath(conversationId, messageId);
-    const wavBuffer = this.encodeWAV(audio.samples, audio.sampleRate);
-    await RNFS.writeFile(path, wavBuffer, 'base64');
-    return path;
-  }
-
-  /**
-   * Generate + save in one step (Audio Mode convenience).
-   */
-  async generateAndSave(
-    text: string,
-    conversationId: string,
-    messageId: string,
-    options: TTSOptions = {},
-  ): Promise<{ path: string; audio: GeneratedAudio }> {
-    const audio = await this.generate(text, options);
-    const path = await this.saveToFile(audio, conversationId, messageId);
-    return { path, audio };
-  }
-
-  // ─── Playback ────────────────────────────────────────────────────────────
-
-  async playFromSamples(samples: Float32Array, speed: number = 1.0, startOffset: number = 0): Promise<void> {
-    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
-
-    this.audioCtx?.close().catch(() => {});
-    this.audioCtx = new AudioContext({ sampleRate });
-
-    const buffer = this.audioCtx.createBuffer(1, samples.length, sampleRate);
-    buffer.copyToChannel(samples, 0);
-
-    const source = this.audioCtx.createBufferSource();
-    source.buffer = buffer;
-    source.playbackRate.value = speed;
-    source.connect(this.audioCtx.destination);
-
-    this.currentSource = source;
-    this.isSpeakingFlag = true;
-
-    return new Promise((resolve) => {
-      source.onended = () => {
-        this.currentSource = null;
-        this.isSpeakingFlag = false;
-        resolve();
-      };
-      source.start(0, startOffset);
-    });
-  }
-
-  async playFromFile(filePath: string, speed: number = 1.0, startOffset: number = 0): Promise<void> {
-    const base64 = await RNFS.readFile(filePath, 'base64');
-    const samples = this.decodeWAV(base64);
-    return this.playFromSamples(samples, speed, startOffset);
-  }
-
-  /**
-   * Chat Mode convenience: generate + play + discard (no disk write).
-   */
-  async speak(text: string, options: TTSOptions = {}): Promise<void> {
-    if (this.isSpeakingFlag) this.stop();
-    const audio = await this.generate(text, options);
-    if (!this.isSpeakingFlag) { // may have been stopped during generation
-      await this.playFromSamples(audio.samples, options.speed ?? 1.0);
-    }
-  }
-
-  stop(): void {
-    this.isSpeakingFlag = false;
-    try {
-      this.currentSource?.stop();
-    } catch {
-      // already stopped
-    }
-    this.currentSource = null;
-  }
-
-  isSpeaking(): boolean {
-    return this.isSpeakingFlag;
-  }
-
-  // ─── Utilities ───────────────────────────────────────────────────────────
-
-  private downsampleForWaveform(samples: Float32Array, points: number): number[] {
-    const blockSize = Math.floor(samples.length / points);
-    const result: number[] = [];
-    for (let i = 0; i < points; i++) {
-      let sum = 0;
-      for (let j = 0; j < blockSize; j++) {
-        sum += Math.abs(samples[i * blockSize + j]);
-      }
-      result.push(sum / blockSize);
-    }
-    return result;
-  }
-
-  private encodeWAV(samples: Float32Array, sampleRate: number): string {
-    // Standard 16-bit PCM WAV encoding → base64
-    // Implementation: write RIFF header + PCM data
-    const buffer = new ArrayBuffer(44 + samples.length * 2);
-    const view = new DataView(buffer);
-    const writeString = (offset: number, s: string) => {
-      for (let i = 0; i < s.length; i++) view.setUint8(offset + i, s.charCodeAt(i));
-    };
-    writeString(0, 'RIFF');
-    view.setUint32(4, 36 + samples.length * 2, true);
-    writeString(8, 'WAVE');
-    writeString(12, 'fmt ');
-    view.setUint32(16, 16, true);
-    view.setUint16(20, 1, true);
-    view.setUint16(22, 1, true);
-    view.setUint32(24, sampleRate, true);
-    view.setUint32(28, sampleRate * 2, true);
-    view.setUint16(32, 2, true);
-    view.setUint16(34, 16, true);
-    writeString(36, 'data');
-    view.setUint32(40, samples.length * 2, true);
-    for (let i = 0; i < samples.length; i++) {
-      view.setInt16(44 + i * 2, Math.max(-32768, Math.min(32767, samples[i] * 32768)), true);
-    }
-    return Buffer.from(buffer).toString('base64');
-  }
-
-  private decodeWAV(base64: string): Float32Array {
-    const buffer = Buffer.from(base64, 'base64');
-    const view = new DataView(buffer.buffer);
-    const sampleCount = (buffer.length - 44) / 2;
-    const samples = new Float32Array(sampleCount);
-    for (let i = 0; i < sampleCount; i++) {
-      samples[i] = view.getInt16(44 + i * 2, true) / 32768;
-    }
-    return samples;
-  }
-}
-
-export const ttsService = new TTSService();
-```
+**Note:** `react-native-executorch-bare-resource-fetcher` requires its own RNFS fork (`@dr.pogodin/react-native-fs`) alongside the existing `react-native-fs`. Both can coexist.
 
 ---
 
-### 3. `src/stores/ttsStore.ts`
+## Architecture
 
-Mirror `whisperStore.ts` pattern, using Zustand with `persist`.
+### Initialization (`App.tsx`)
 
 ```typescript
-import { create } from 'zustand';
-import { persist, createJSONStorage } from 'zustand/middleware';
-import AsyncStorage from '@react-native-async-storage/async-storage';
-import { ttsService } from '../services/ttsService';
-import logger from '../utils/logger';
-
-export type InterfaceMode = 'chat' | 'audio';
-
-export interface TTSSettings {
-  interfaceMode: InterfaceMode;
-  enabled: boolean;
-  autoPlay: boolean;     // Chat Mode only
-  speed: number;         // 0.5–2.0
-  voiceId: string;       // OuteTTS speaker profile
-}
+import { initExecutorch } from 'react-native-executorch';
+import { BareResourceFetcher } from 'react-native-executorch-bare-resource-fetcher';
 
-export interface TTSState {
-  // Download state
-  isBackboneDownloaded: boolean;
-  isVocoderDownloaded: boolean;
-  isDownloadingBackbone: boolean;
-  isDownloadingVocoder: boolean;
-  backboneDownloadProgress: number;
-  vocoderDownloadProgress: number;
-
-  // Model lifecycle
-  isModelLoading: boolean;
-  isModelLoaded: boolean;
-
-  // Playback
-  isSpeaking: boolean;
-  currentMessageId: string | null;
-  playbackPosition: number;  // seconds, for scrubber
-
-  // Cache
-  audioCacheSizeMB: number;
-
-  // Settings (persisted)
-  settings: TTSSettings;
-
-  error: string | null;
-
-  // Actions
-  checkDownloadStatus: () => Promise<void>;
-  downloadModels: () => Promise<void>;
-  deleteModels: () => Promise<void>;
-  loadModels: () => Promise<void>;
-  unloadModels: () => Promise<void>;
-
-  // Chat Mode
-  speak: (text: string, messageId: string) => Promise<void>;
-  stop: () => void;
-
-  // Audio Mode
-  generateAndSave: (text: string, conversationId: string, messageId: string) => Promise<{ path: string; waveformData: number[]; durationSeconds: number }>;
-  playMessage: (messageId: string, filePath: string, startOffset?: number) => Promise<void>;
-  stopPlayback: () => void;
-
-  // Cache management
-  refreshCacheSize: () => Promise<void>;
-  clearAudioCache: () => Promise<void>;
-
-  updateSettings: (patch: Partial<TTSSettings>) => void;
-  clearError: () => void;
-}
-
-export const useTTSStore = create<TTSState>()(
-  persist(
-    (set, get) => ({
-      isBackboneDownloaded: false,
-      isVocoderDownloaded: false,
-      isDownloadingBackbone: false,
-      isDownloadingVocoder: false,
-      backboneDownloadProgress: 0,
-      vocoderDownloadProgress: 0,
-      isModelLoading: false,
-      isModelLoaded: false,
-      isSpeaking: false,
-      currentMessageId: null,
-      playbackPosition: 0,
-      audioCacheSizeMB: 0,
-      settings: {
-        interfaceMode: 'chat',
-        enabled: true,
-        autoPlay: false,
-        speed: 1.0,
-        voiceId: '0',
-      },
-      error: null,
-
-      checkDownloadStatus: async () => {
-        const [backbone, vocoder] = await Promise.all([
-          ttsService.isBackboneDownloaded(),
-          ttsService.isVocoderDownloaded(),
-        ]);
-        set({ isBackboneDownloaded: backbone, isVocoderDownloaded: vocoder });
-      },
-
-      downloadModels: async () => {
-        set({ error: null });
-        try {
-          set({ isDownloadingBackbone: true, backboneDownloadProgress: 0 });
-          await ttsService.downloadBackbone((p) => set({ backboneDownloadProgress: p }));
-          set({ isDownloadingBackbone: false, isBackboneDownloaded: true });
-
-          set({ isDownloadingVocoder: true, vocoderDownloadProgress: 0 });
-          await ttsService.downloadVocoder((p) => set({ vocoderDownloadProgress: p }));
-          set({ isDownloadingVocoder: false, isVocoderDownloaded: true });
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Download failed';
-          logger.error('[TTS Store] Download error:', msg);
-          set({ isDownloadingBackbone: false, isDownloadingVocoder: false, error: msg });
-        }
-      },
-
-      deleteModels: async () => {
-        await ttsService.deleteModels();
-        set({ isBackboneDownloaded: false, isVocoderDownloaded: false, isModelLoaded: false });
-      },
-
-      loadModels: async () => {
-        if (get().isModelLoaded || get().isModelLoading) return;
-        set({ isModelLoading: true, error: null });
-        try {
-          await ttsService.loadModels();
-          set({ isModelLoaded: true });
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Failed to load TTS models';
-          logger.error('[TTS Store] Load error:', msg);
-          set({ error: msg });
-        } finally {
-          set({ isModelLoading: false });
-        }
-      },
-
-      unloadModels: async () => {
-        await ttsService.unloadModels();
-        set({ isModelLoaded: false, isSpeaking: false, currentMessageId: null });
-      },
-
-      // ── Chat Mode ──────────────────────────────────────────────────────────
-
-      speak: async (text: string, messageId: string) => {
-        const { isModelLoaded, settings } = get();
-        if (!settings.enabled) return;
-        if (!isModelLoaded) return;
-
-        if (get().currentMessageId === messageId && get().isSpeaking) {
-          get().stop();
-          return;
-        }
-
-        ttsService.stop();
-        set({ isSpeaking: true, currentMessageId: messageId, error: null });
-
-        try {
-          await ttsService.speak(text, { speed: settings.speed, voiceId: settings.voiceId });
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Speech failed';
-          logger.error('[TTS Store] Speak error:', msg);
-          set({ error: msg });
-        } finally {
-          set({ isSpeaking: false, currentMessageId: null });
-        }
-      },
-
-      stop: () => {
-        ttsService.stop();
-        set({ isSpeaking: false, currentMessageId: null });
-      },
-
-      // ── Audio Mode ─────────────────────────────────────────────────────────
-
-      generateAndSave: async (text: string, conversationId: string, messageId: string) => {
-        const { settings } = get();
-        const { path, audio } = await ttsService.generateAndSave(
-          text,
-          conversationId,
-          messageId,
-          { voiceId: settings.voiceId },
-        );
-        await get().refreshCacheSize();
-        return { path, waveformData: audio.waveformData, durationSeconds: audio.durationSeconds };
-      },
-
-      playMessage: async (messageId: string, filePath: string, startOffset: number = 0) => {
-        const { settings } = get();
-
-        if (get().currentMessageId === messageId && get().isSpeaking) {
-          get().stopPlayback();
-          return;
-        }
-
-        ttsService.stop();
-        set({ isSpeaking: true, currentMessageId: messageId, playbackPosition: startOffset });
-
-        try {
-          await ttsService.playFromFile(filePath, settings.speed, startOffset);
-        } catch (err) {
-          const msg = err instanceof Error ? err.message : 'Playback failed';
-          logger.error('[TTS Store] Playback error:', msg);
-          set({ error: msg });
-        } finally {
-          set({ isSpeaking: false, currentMessageId: null, playbackPosition: 0 });
-        }
-      },
-
-      stopPlayback: () => {
-        ttsService.stop();
-        set({ isSpeaking: false, currentMessageId: null, playbackPosition: 0 });
-      },
-
-      // ── Cache ──────────────────────────────────────────────────────────────
-
-      refreshCacheSize: async () => {
-        const mb = await ttsService.getAudioCacheSizeMB();
-        set({ audioCacheSizeMB: mb });
-      },
-
-      clearAudioCache: async () => {
-        await ttsService.clearAudioCache();
-        set({ audioCacheSizeMB: 0 });
-      },
-
-      updateSettings: (patch) => {
-        set((state) => ({ settings: { ...state.settings, ...patch } }));
-      },
-
-      clearError: () => set({ error: null }),
-    }),
-    {
-      name: 'tts-store',
-      storage: createJSONStorage(() => AsyncStorage),
-      partialize: (state) => ({ settings: state.settings }),
-    }
-  )
-);
+// Called once at startup, before any model hook is used
+initExecutorch({ resourceFetcher: BareResourceFetcher });
 ```
 
----
+### KokoroTTSManager component
 
-### 4. `src/hooks/useTTS.ts`
+`react-native-executorch`'s `useTextToSpeech` is a React hook — it must live in a component. A `KokoroTTSManager` component mounts near the root, holds the hook instance, and exposes its methods via a module-level ref (`kokoroRef`).
 
-```typescript
-import { useEffect, useCallback } from 'react';
-import { useTTSStore } from '../stores/ttsStore';
-import { hardwareService } from '../services/hardware';
-import { TTS_BLOCK_RAM_GB, TTS_WARN_RAM_GB } from '../constants/ttsModels';
-
-export function useTTS() {
-  const store = useTTSStore();
-
-  useEffect(() => {
-    store.checkDownloadStatus();
-  }, []);
-
-  const canRunOnDevice = useCallback(async (): Promise<{ allowed: boolean; warning: boolean }> => {
-    const ramGB = await hardwareService.getTotalMemoryGB();
-    return {
-      allowed: ramGB >= TTS_BLOCK_RAM_GB,
-      warning: ramGB < TTS_WARN_RAM_GB,
-    };
-  }, []);
-
-  const speakMessage = useCallback(
-    (text: string, messageId: string) => {
-      if (!store.isModelLoaded && store.isBackboneDownloaded && store.isVocoderDownloaded) {
-        store.loadModels().then(() => store.speak(text, messageId));
-        return;
-      }
-      store.speak(text, messageId);
-    },
-    [store]
-  );
-
-  return {
-    ...store,
-    speakMessage,
-    canRunOnDevice,
-    areBothDownloaded: store.isBackboneDownloaded && store.isVocoderDownloaded,
-    isDownloading: store.isDownloadingBackbone || store.isDownloadingVocoder,
-    overallDownloadProgress:
-      store.backboneDownloadProgress * 0.86 + store.vocoderDownloadProgress * 0.14,
-    isAudioMode: store.settings.interfaceMode === 'audio',
-    isChatMode: store.settings.interfaceMode === 'chat',
-  };
-}
 ```
-
----
-
-### 5. `src/components/AudioMessageBubble/index.tsx` *(Audio Mode only)*
-
-Replaces `ChatMessage` assistant bubble when `interfaceMode === 'audio'`.
-
-```typescript
-interface AudioMessageBubbleProps {
-  messageId: string;
-  conversationId: string;
-  audioPath: string;          // path to WAV on disk
-  waveformData: number[];     // 200-point amplitude array
-  durationSeconds: number;
-  isGenerating?: boolean;     // true while TTS is still running
-}
+App
+└── KokoroTTSManager          ← mounts useTextToSpeech, wires to kokoroRef
+    └── AppNavigator
+        └── ChatScreen
+            └── TTSButton     ← calls kokoroRef.stream(text, callbacks)
 ```
 
-**Layout:**
-- Static waveform bar (200 rect bars, amplitude-scaled, filled up to scrubber position)
-- Draggable scrubber thumb
-- `MM:SS` elapsed / total
-- Speed chip (cycles 0.5x → 1x → 1.5x → 2x, persists to store)
-- "Show transcript" collapse/expand
-- Long press → action sheet (Change voice, Regenerate, Copy text, Delete)
-
----
-
-### 6. `src/components/TTSButton/index.tsx` *(Chat Mode only)*
-
-Play/stop button that appears on each assistant message bubble. Unchanged from original plan — only rendered when `interfaceMode === 'chat'`.
+### Speak flow (Chat Mode — Kokoro, fast)
 
-```typescript
-// Don't render in Audio Mode or if TTS disabled/not downloaded
-if (settings.interfaceMode === 'audio' || !settings.enabled || !areBothDownloaded) return null;
 ```
-
----
-
-### 7. `src/screens/TTSSettingsScreen/index.tsx`
-
-Accessible from SettingsScreen → "Text to Speech" row.
-
-**Sections:**
-1. **Header** — back button + "Text to Speech" title
-2. **Interface Mode card** — segmented control: `Chat` / `Audio`
-   - If device RAM < `TTS_BLOCK_RAM_GB`: Audio option is greyed out with "Requires 6GB+ RAM"
-   - If RAM is between block and warn thresholds: yellow warning under the control
-3. **Master toggle card** — enable/disable TTS (Chat Mode only — in Audio Mode, TTS is always on)
-4. **Model download card** — download status for both files with separate progress bars; "Download (527 MB)" / "Remove" buttons
-5. **Voice card** (shown when downloaded) — voice picker from `TTS_SPEAKER_PROFILES`
-6. **Playback card** (shown when downloaded) — Speed slider (0.5–2.0x), Auto-play toggle (Chat Mode only)
-7. **Audio cache card** (Audio Mode only) — "Audio cache: X MB" + "Clear cache" button
-8. **Device compatibility card** — RAM check with status
-9. **Privacy card** — "All speech generated on your device. Nothing is sent to any server."
-
----
-
-### 8. `src/stores/index.ts`
-
-Add:
-```typescript
-export { useTTSStore } from './ttsStore';
+TTSButton tap
+  → kokoroRef.stream({ text, onNext: playChunk, onBegin, onEnd })
+  → AudioContext buffers played as Float32Array chunks arrive
+  → Streaming: audio starts < 1s after tap
 ```
 
-### 9. `src/services/index.ts`
+### Voice input flow (Audio Mode — user side)
 
-Add:
-```typescript
-export { ttsService } from './ttsService';
 ```
-
-### 10. `src/navigation/types.ts`
-
-Add `TTSSettings: undefined` to `RootStackParamList`.
-
-### 11. `src/navigation/AppNavigator.tsx`
-
-```tsx
-<RootStack.Screen name="TTSSettings" component={TTSSettingsScreen} options={{ headerShown: false }} />
+User taps mic button
+  → audioRecorderService.startRecording() — records WAV to disk
+  → User releases mic
+  → audioRecorderService.stopRecording() → { path, durationSeconds }
+  → whisperService.transcribeFile(path) — file-based STT
+  → onAutoSend(transcript, { uri: path, format: 'wav', durationSeconds })
+  → ChatInput builds MediaAttachment { type: 'audio', uri, durationSeconds }
+  → onSend(transcript, [audioAttachment]) — content = transcript, attachment = WAV
+  → MessageRenderer: user message with audio attachment → right-aligned AudioMessageBubble
+  → LLM receives transcript as text input (standard text generation)
 ```
 
-### 12. `src/screens/index.ts`
-
-Export `TTSSettingsScreen` and `AudioMessageBubble`.
+For models that natively support audio input (e.g. Qwen2-Audio): WAV is passed directly as `input_audio` to the model — Whisper is bypassed entirely.
 
-### 13. `src/screens/SettingsScreen.tsx`
+### Generate+Save flow (Audio Mode — AI side)
 
-Add nav row pointing to `TTSSettings` (after the Voice row):
-```tsx
-<TouchableOpacity onPress={() => navigation.navigate('TTSSettings')}>
-  <Icon name="volume-2" />
-  <Text>Text to Speech</Text>
-  <Icon name="chevron-right" />
-</TouchableOpacity>
 ```
-
-### 14. `src/components/ChatMessage/index.tsx`
-
-Mode-branch the assistant message render path:
-
-```tsx
-import { AudioMessageBubble } from '../AudioMessageBubble';
-import { TTSButton } from '../TTSButton';
-
-// In assistant message render:
-const { settings } = useTTSStore();
-
-if (settings.interfaceMode === 'audio' && message.audioPath) {
-  return (
-    <AudioMessageBubble
-      messageId={message.id}
-      conversationId={conversationId}
-      audioPath={message.audioPath}
-      waveformData={message.waveformData ?? []}
-      durationSeconds={message.audioDurationSeconds ?? 0}
-      isGenerating={message.isGeneratingAudio}
-    />
-  );
-}
-
-// Chat Mode: existing text bubble + TTSButton
+Streaming LLM response ends
+  → triggerAudioModeGeneration(conversationId, messageId, content)
+    (reads fresh message from useChatStore.getState() — not stale closure)
+  → ttsService.generateAndSave(text, ctx, options)
+  → OuteTTS runs inference → Float32Array + waveformData + duration
+  → Write PCM to disk → update message { audioPath, waveformData, audioDurationSeconds }
+  → MessageRenderer shows left-aligned AudioMessageBubble
 ```
 
-This requires adding `audioPath`, `waveformData`, `audioDurationSeconds`, and `isGeneratingAudio` fields to the message model.
+---
 
-### 15. Message model update (`src/types/` or wherever `Message` is defined)
+## ttsStore additions
 
 ```typescript
-export interface Message {
-  // ... existing fields ...
-  audioPath?: string;              // Audio Mode: path to WAV on disk
-  waveformData?: number[];         // Audio Mode: 200-point amplitude envelope
-  audioDurationSeconds?: number;   // Audio Mode: total duration
-  isGeneratingAudio?: boolean;     // true while TTS is running for this message
-}
+// Kokoro state
+kokoroReady: boolean;           // useTextToSpeech.isReady
+kokoroDownloadProgress: number; // 0–1, during initial model download
+kokoroVoiceId: KokoroVoiceId;  // persisted setting
+
+// Actions
+setKokoroReady: (ready: boolean, progress: number) => void;
+kokoroSpeak: (text: string, messageId: string) => void;  // delegates to kokoroRef
+kokoroStop: () => void;
 ```
 
-### 16. Chat completion flow
-
-**Chat Mode (autoPlay):** unchanged from original plan — call `speak()` after streaming completes when `autoPlay: true`.
-
-**Audio Mode:** after streaming completes, immediately trigger `generateAndSave()` and update the message record with the returned `audioPath`, `waveformData`, `durationSeconds`. Set `isGeneratingAudio: true` on the message while generation runs so the bubble shows a loading state.
-
+The existing `speak()` action becomes:
 ```typescript
-// After streaming completes, if Audio Mode:
-if (settings.interfaceMode === 'audio') {
-  updateMessage(lastMessage.id, { isGeneratingAudio: true });
-  const { path, waveformData, durationSeconds } = await ttsStore.generateAndSave(
-    stripControlTokens(lastMessage.content),
-    conversationId,
-    lastMessage.id,
-  );
-  updateMessage(lastMessage.id, {
-    audioPath: path,
-    waveformData,
-    audioDurationSeconds: durationSeconds,
-    isGeneratingAudio: false,
-  });
+speak: (text, messageId) => {
+  if (kokoroReady) {
+    kokoroSpeak(text, messageId);  // fast path
+  } else {
+    // OuteTTS fallback (slow, Android <13 or first launch before Kokoro loads)
+    outeTTSSpeak(text, messageId);
+  }
 }
 ```
 
 ---
 
-## Tests to Write
-
-### `__tests__/unit/services/ttsService.test.ts`
-- `generate` calls `getFormattedAudioCompletion`, `getAudioCompletionGuideTokens`, `completion`, `decodeAudioTokens` in order
-- `generate` returns correct `durationSeconds` and 200-point `waveformData`
-- `saveToFile` writes a valid WAV file to the correct path
-- `generateAndSave` calls both and returns path + audio
-- `playFromFile` reads WAV, decodes, and calls `playFromSamples`
-- `stop` sets `isSpeakingFlag` to false and calls `currentSource.stop()`
-- `encodeWAV` / `decodeWAV` round-trip preserves samples (within 16-bit quantization error)
-- `getAudioCacheSizeMB` returns correct value
-- `clearAudioCache` removes the cache directory
-
-### `__tests__/unit/stores/ttsStore.test.ts`
-- `generateAndSave` sets correct waveformData and calls `refreshCacheSize`
-- `playMessage` sets `isSpeaking: true`, then `false` after completion
-- `playMessage` on same messageId while playing → calls `stopPlayback`
-- `updateSettings` merges partial settings correctly
-- Settings persisted: `interfaceMode`, `speed`, `voiceId`, `enabled` survive re-hydration
-
-### `__tests__/integration/tts.test.ts`
-- **Chat Mode full flow:** download → load → speak → stop
-- **Audio Mode full flow:** download → load → generateAndSave → playMessage → stop
-- **Auto-play:** Chat Mode with `autoPlay: true`, streaming completes → `speak` called
-- **Audio Mode post-completion:** streaming completes → `generateAndSave` called → message updated with `audioPath`
-- **Mode switch:** switching `interfaceMode` from `'chat'` to `'audio'` takes effect for next message
+## Kokoro Voice IDs
 
----
-
-## Implementation Order
-
-1. `src/constants/ttsModels.ts`
-2. `src/services/ttsService.ts` (with WAV encode/decode + `generate`/`generateAndSave`/`playFromFile`)
-3. `src/stores/ttsStore.ts` (with Audio Mode actions)
-4. `src/hooks/useTTS.ts`
-5. `src/stores/index.ts` — add export
-6. `src/services/index.ts` — add export
-7. `src/navigation/types.ts` — add route
-8. Message model — add `audioPath`, `waveformData`, `audioDurationSeconds`, `isGeneratingAudio`
-9. `src/components/AudioMessageBubble/index.tsx`
-10. `src/components/TTSButton/index.tsx` (Chat Mode only, unchanged)
-11. `src/screens/TTSSettingsScreen/index.tsx` (with Interface Mode section)
-12. `src/screens/index.ts` — add exports
-13. `src/navigation/AppNavigator.tsx` — add screen
-14. `src/screens/SettingsScreen.tsx` — add nav row
-15. `src/components/ChatMessage/index.tsx` — mode-branch render
-16. Wire Audio Mode generation into chat completion flow
-17. Write all tests
-18. `npm install react-native-audio-api` + `pod install`
+| ID | Label | Accent | Gender |
+|---|---|---|---|
+| `af_heart` | Heart | US English | Female |
+| `af_river` | River | US English | Female |
+| `af_sarah` | Sarah | US English | Female |
+| `am_adam` | Adam | US English | Male |
+| `am_michael` | Michael | US English | Male |
+| `am_santa` | Santa | US English | Male |
+| `bf_emma` | Emma | British English | Female |
+| `bm_daniel` | Daniel | British English | Male |
 
 ---
 
-## Memory Safety
+## Files to Create / Modify
 
-Before calling `loadModels()`, check available memory:
+### New files
+- `src/components/KokoroTTSManager.tsx` — mounts the hook, exposes via ref
+- `src/constants/kokoroModels.ts` — voice/model constants mirroring executorch exports
 
-```typescript
-const available = await hardwareService.getAvailableMemoryGB();
-if (available < 1.0) {
-  throw new Error('Not enough free memory. Try closing image generation first.');
-}
-```
+### Modified files
+- `App.tsx` — add `initExecutorch()` call + mount `<KokoroTTSManager>`
+- `src/stores/ttsStore.ts` — add Kokoro state + `kokoroVoiceId` setting
+- `src/services/ttsService.ts` — no change to OuteTTS path
+- `src/components/TTSButton/index.tsx` — use Kokoro speak when available
+- `src/screens/TTSSettingsScreen/index.tsx` — add voice picker (8 Kokoro voices)
 
-This check belongs in `useTTSStore.loadModels()` before calling `ttsService.loadModels()`.
+### android/build.gradle
+- Bump `minSdkVersion` for executorch: **leave at 24**, guard Kokoro at runtime via `Platform.Version >= 33`
 
 ---
 
-## Future: Upgrade to OuteTTS 1.0
-
-When llama.cpp PR#12794 (DAC decoder) merges and llama.rn PR#300 (codec.cpp integration) ships:
-
-1. Add `TTS_BACKBONE_MODEL_V2` to `ttsModels.ts` (backbone + DAC vocoder GGUF)
-2. `ttsService.ts` API is unchanged — model-agnostic
-3. Store gets a `modelVersion` setting; 0.3 and 1.0 can coexist on disk
+## Status
+
+| Task | Status |
+|---|---|
+| OuteTTS speak (Chat Mode) | ✅ Implemented (slow, functional) |
+| OuteTTS generate+save (Audio Mode — AI side) | ✅ Implemented |
+| Stale-closure bug fix (reads fresh store state) | ✅ Fixed |
+| TTSButton + Speak long-press action | ✅ Implemented |
+| Generation vs playback state (spinner) | ✅ Implemented |
+| 300-char text truncation | ✅ Implemented |
+| checkDownloadStatus on app start | ✅ Implemented |
+| User voice recording → audio bubble (Audio Mode) | ✅ Implemented |
+| Auto-send on voice stop in Audio Mode | ✅ Implemented |
+| User audio bubble right-aligned | ✅ Implemented |
+| TTS section in Chat Settings modal | ✅ Implemented |
+| Chat Settings modal: TTS Settings deep link | ✅ Implemented |
+| Multimodal audio input (bypass Whisper for audio-capable models) | ✅ Implemented |
+| Kokoro via react-native-executorch | 🔲 Not started |
+| KokoroTTSManager component | 🔲 Not started |
+| Voice picker in TTSSettingsScreen | 🔲 Not started |
+| Kokoro → OuteTTS fallback for Android <13 | 🔲 Not started |
diff --git a/ios/Podfile.lock b/ios/Podfile.lock
index a076829d..3f58a70e 100644
--- a/ios/Podfile.lock
+++ b/ios/Podfile.lock
@@ -2797,6 +2797,121 @@ PODS:
     - React-perflogger (= 0.83.1)
     - React-utils (= 0.83.1)
     - SocketRocket
+  - RNAudioAPI (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - RNAudioAPI/audioapi (= 0.11.7)
+    - SocketRocket
+    - Yoga
+  - RNAudioAPI/audioapi (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - RNAudioAPI/audioapi/audioapi_dsp (= 0.11.7)
+    - RNAudioAPI/audioapi/ios (= 0.11.7)
+    - SocketRocket
+    - Yoga
+  - RNAudioAPI/audioapi/audioapi_dsp (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - SocketRocket
+    - Yoga
+  - RNAudioAPI/audioapi/ios (0.11.7):
+    - boost
+    - DoubleConversion
+    - fast_float
+    - fmt
+    - glog
+    - hermes-engine
+    - RCT-Folly
+    - RCT-Folly/Fabric
+    - RCTRequired
+    - RCTTypeSafety
+    - React-Core
+    - React-debug
+    - React-Fabric
+    - React-featureflags
+    - React-graphics
+    - React-ImageManager
+    - React-jsi
+    - React-NativeModulesApple
+    - React-RCTFabric
+    - React-renderercss
+    - React-rendererdebug
+    - React-utils
+    - ReactCodegen
+    - ReactCommon/turbomodule/bridging
+    - ReactCommon/turbomodule/core
+    - SocketRocket
+    - Yoga
   - RNCAsyncStorage (2.2.0):
     - boost
     - DoubleConversion
@@ -3368,6 +3483,7 @@ DEPENDENCIES:
   - ReactAppDependencyProvider (from `build/generated/ios/ReactAppDependencyProvider`)
   - ReactCodegen (from `build/generated/ios/ReactCodegen`)
   - ReactCommon/turbomodule/core (from `../node_modules/react-native/ReactCommon`)
+  - RNAudioAPI (from `../node_modules/react-native-audio-api`)
   - "RNCAsyncStorage (from `../node_modules/@react-native-async-storage/async-storage`)"
   - RNDeviceInfo (from `../node_modules/react-native-device-info`)
   - RNFS (from `../node_modules/react-native-fs`)
@@ -3566,6 +3682,8 @@ EXTERNAL SOURCES:
     :path: build/generated/ios/ReactCodegen
   ReactCommon:
     :path: "../node_modules/react-native/ReactCommon"
+  RNAudioAPI:
+    :path: "../node_modules/react-native-audio-api"
   RNCAsyncStorage:
     :path: "../node_modules/@react-native-async-storage/async-storage"
   RNDeviceInfo:
@@ -3684,6 +3802,7 @@ SPEC CHECKSUMS:
   ReactAppDependencyProvider: 0eb286cc274abb059ee601b862ebddac2e681d01
   ReactCodegen: 3d48510bcef445f6403c0004047d4d9cbb915435
   ReactCommon: ac934cb340aee91282ecd6f273a26d24d4c55cae
+  RNAudioAPI: 106257d5f3713bb667d6d74ebb3105c9cf5d60db
   RNCAsyncStorage: 29f0230e1a25f36c20b05f65e2eb8958d6526e82
   RNDeviceInfo: 36d7f232bfe7c9b5c494cb7793230424ed32c388
   RNFS: 89de7d7f4c0f6bafa05343c578f61118c8282ed8
diff --git a/jest.setup.ts b/jest.setup.ts
index 15d0f8cb..af694a3d 100644
--- a/jest.setup.ts
+++ b/jest.setup.ts
@@ -149,6 +149,61 @@ jest.mock('whisper.rn', () => ({
   },
 }), { virtual: true });
 
+// react-native-audio-api mock
+jest.mock('react-native-audio-api', () => ({
+  AudioContext: jest.fn().mockImplementation(() => ({
+    createBuffer: jest.fn().mockReturnValue({ copyToChannel: jest.fn() }),
+    createBufferSource: jest.fn().mockReturnValue({
+      connect: jest.fn(),
+      start: jest.fn(),
+      stop: jest.fn(),
+      playbackRate: { value: 1.0 },
+      onEnded: null,
+      buffer: null,
+    }),
+    destination: {},
+    close: jest.fn(),
+  })),
+  AudioRecorder: jest.fn().mockImplementation(() => ({
+    enableFileOutput: jest.fn().mockReturnValue({ status: 'success', path: '/mock/audio/input.wav' }),
+    start: jest.fn().mockReturnValue({ status: 'success', path: '/mock/audio/input.wav' }),
+    stop: jest.fn().mockReturnValue({ status: 'success', path: '/mock/audio/input.wav', size: 1024, duration: 1.0 }),
+    pause: jest.fn(),
+    resume: jest.fn(),
+    isRecording: jest.fn().mockReturnValue(false),
+    isPaused: jest.fn().mockReturnValue(false),
+  })),
+  FileFormat: { Wav: 0, Caf: 1, M4A: 2, Flac: 3 },
+  FileDirectory: { Document: 0, Cache: 1 },
+}), { virtual: true });
+
+// @react-native-community/slider mock
+jest.mock('@react-native-community/slider', () => {
+  const { View } = require('react-native');
+  return { __esModule: true, default: View };
+});
+
+// react-native-executorch mock
+const mockVoiceConfig = { id: 'mock_voice' };
+jest.mock('react-native-executorch', () => ({
+  useTextToSpeech: jest.fn(() => ({
+    isReady: true,
+    downloadProgress: 1,
+    error: null,
+    stream: jest.fn(() => Promise.resolve()),
+    streamStop: jest.fn(),
+  })),
+  KOKORO_MEDIUM: 'kokoro-medium',
+  KOKORO_VOICE_AF_HEART: mockVoiceConfig,
+  KOKORO_VOICE_AF_RIVER: mockVoiceConfig,
+  KOKORO_VOICE_AF_SARAH: mockVoiceConfig,
+  KOKORO_VOICE_AM_ADAM: mockVoiceConfig,
+  KOKORO_VOICE_AM_MICHAEL: mockVoiceConfig,
+  KOKORO_VOICE_AM_SANTA: mockVoiceConfig,
+  KOKORO_VOICE_BF_EMMA: mockVoiceConfig,
+  KOKORO_VOICE_BM_DANIEL: mockVoiceConfig,
+}));
+
 // react-native-fs mock
 jest.mock('react-native-fs', () => ({
   DocumentDirectoryPath: '/mock/documents',
diff --git a/package-lock.json b/package-lock.json
index 9353548f..1d6a7f40 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -9,7 +9,9 @@
       "version": "0.0.86",
       "hasInstallScript": true,
       "dependencies": {
+        "@dr.pogodin/react-native-fs": "^2.38.1",
         "@gorhom/bottom-sheet": "^5.2.8",
+        "@kesha-antonov/react-native-background-downloader": "^4.5.4",
         "@op-engineering/op-sqlite": "^15.2.5",
         "@react-native-async-storage/async-storage": "^2.2.0",
         "@react-native-community/blur": "^4.4.1",
@@ -31,7 +33,10 @@
         "patch-package": "^8.0.1",
         "react": "19.2.0",
         "react-native": "0.83.1",
+        "react-native-audio-api": "^0.11.7",
         "react-native-device-info": "^15.0.1",
+        "react-native-executorch": "^0.8.1",
+        "react-native-executorch-bare-resource-fetcher": "^0.8.0",
         "react-native-fs": "^2.20.0",
         "react-native-gesture-handler": "^2.30.0",
         "react-native-haptic-feedback": "^2.3.3",
@@ -2113,6 +2118,51 @@
       "devOptional": true,
       "license": "MIT"
     },
+    "node_modules/@dr.pogodin/react-native-fs": {
+      "version": "2.38.1",
+      "resolved": "https://registry.npmjs.org/@dr.pogodin/react-native-fs/-/react-native-fs-2.38.1.tgz",
+      "integrity": "sha512-H5uxbEy61as7m5p4dNhv4a/huO8g9r4weu0FM/UjlgRd1PSYqpZaJBi2nhDGums/N+MrK8IZFOHVV5ukHWX8UQ==",
+      "license": "MIT",
+      "workspaces": [
+        "example"
+      ],
+      "dependencies": {
+        "buffer": "^6.0.3",
+        "http-status-codes": "^2.3.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/birdofpreyru"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-native": "*"
+      }
+    },
+    "node_modules/@dr.pogodin/react-native-fs/node_modules/buffer": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-6.0.3.tgz",
+      "integrity": "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "base64-js": "^1.3.1",
+        "ieee754": "^1.2.1"
+      }
+    },
     "node_modules/@egjs/hammerjs": {
       "version": "2.0.17",
       "resolved": "https://registry.npmjs.org/@egjs/hammerjs/-/hammerjs-2.0.17.tgz",
@@ -2559,6 +2609,15 @@
         "@hapi/hoek": "^9.0.0"
       }
     },
+    "node_modules/@huggingface/jinja": {
+      "version": "0.5.6",
+      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.6.tgz",
+      "integrity": "sha512-MyMWyLnjqo+KRJYSH7oWNbsOn5onuIvfXYPcc0WOGxU0eHUV7oAYUoQTl2BMdu7ml+ea/bu11UM+EshbeHwtIA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/@humanwhocodes/config-array": {
       "version": "0.13.0",
       "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.13.0.tgz",
@@ -3110,6 +3169,15 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
+    "node_modules/@kesha-antonov/react-native-background-downloader": {
+      "version": "4.5.4",
+      "resolved": "https://registry.npmjs.org/@kesha-antonov/react-native-background-downloader/-/react-native-background-downloader-4.5.4.tgz",
+      "integrity": "sha512-WH9n7Sy8MebWiVZqZYpvP4q2sJeOIiNLrbHB64ue/YYsXnWtdJ3iMQowv/QEmU2Cw9biI1d2k8LFHKV9oACLsw==",
+      "license": "Apache-2.0",
+      "peerDependencies": {
+        "react-native": ">=0.57.0"
+      }
+    },
     "node_modules/@motionone/animation": {
       "version": "10.18.0",
       "resolved": "https://registry.npmjs.org/@motionone/animation/-/animation-10.18.0.tgz",
@@ -8090,6 +8158,12 @@
         "node": ">= 0.8"
       }
     },
+    "node_modules/http-status-codes": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/http-status-codes/-/http-status-codes-2.3.0.tgz",
+      "integrity": "sha512-RJ8XvFvpPM/Dmc5SV+dC4y5PCeOhT3x1Hq0NU3rjGeg5a/CqlhZ7uudknPwZFz4aeAXDcbAyaeP7GAo9lvngtA==",
+      "license": "MIT"
+    },
     "node_modules/https-proxy-agent": {
       "version": "7.0.6",
       "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
@@ -8146,7 +8220,6 @@
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
       "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "devOptional": true,
       "funding": [
         {
           "type": "github",
@@ -9609,6 +9682,24 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/jsonrepair": {
+      "version": "3.13.3",
+      "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.13.3.tgz",
+      "integrity": "sha512-BTznj0owIt2CBAH/LTo7+1I5pMvl1e1033LRl/HUowlZmJOIhzC0zbX5bxMngLkfT4WnzPP26QnW5wMr2g9tsQ==",
+      "license": "ISC",
+      "bin": {
+        "jsonrepair": "bin/cli.js"
+      }
+    },
+    "node_modules/jsonschema": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/jsonschema/-/jsonschema-1.5.0.tgz",
+      "integrity": "sha512-K+A9hhqbn0f3pJX17Q/7H6yQfD/5OXgdrR5UE12gMXCiN9D5Xq2o5mddV2QEcX/bjla99ASsAAQUyMCCRWAEhw==",
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      }
+    },
     "node_modules/jsx-ast-utils": {
       "version": "3.3.5",
       "resolved": "https://registry.npmjs.org/jsx-ast-utils/-/jsx-ast-utils-3.3.5.tgz",
@@ -11862,6 +11953,15 @@
         "node": ">=8.0"
       }
     },
+    "node_modules/pngjs": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/pngjs/-/pngjs-7.0.0.tgz",
+      "integrity": "sha512-LKWqWJRhstyYo9pGvgor/ivk2w94eSjE3RGVuzLGlr3NmD8bf7RcYGze1mNdEHRP6TRP6rMuDHk5t44hnTRyow==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.19.0"
+      }
+    },
     "node_modules/popmotion": {
       "version": "11.0.3",
       "resolved": "https://registry.npmjs.org/popmotion/-/popmotion-11.0.3.tgz",
@@ -12220,6 +12320,34 @@
         }
       }
     },
+    "node_modules/react-native-audio-api": {
+      "version": "0.11.7",
+      "resolved": "https://registry.npmjs.org/react-native-audio-api/-/react-native-audio-api-0.11.7.tgz",
+      "integrity": "sha512-2oIoP77Tn2nlouRVfEC3bAsuSyKU6xhGNkSnVXTLLQQZslEDoYX2cN9pVRZoWOqhFrLT8q4IZI9HaFgYL13L1A==",
+      "license": "MIT",
+      "dependencies": {
+        "semver": "^7.7.3"
+      },
+      "bin": {
+        "setup-rn-audio-api-web": "scripts/setup-rn-audio-api-web.js"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-native": "*"
+      }
+    },
+    "node_modules/react-native-audio-api/node_modules/semver": {
+      "version": "7.7.4",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
+      "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
+      "license": "ISC",
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/react-native-device-info": {
       "version": "15.0.1",
       "resolved": "https://registry.npmjs.org/react-native-device-info/-/react-native-device-info-15.0.1.tgz",
@@ -12229,6 +12357,38 @@
         "react-native": "*"
       }
     },
+    "node_modules/react-native-executorch": {
+      "version": "0.8.1",
+      "resolved": "https://registry.npmjs.org/react-native-executorch/-/react-native-executorch-0.8.1.tgz",
+      "integrity": "sha512-DEVWs+Ki7p1C8mEgsHiabZizO/kDM0zELlJ+JFCfNCb2RrraMUXBTZIARWHPUbxpG17nqFswIZmwjUoNK5V36g==",
+      "license": "MIT",
+      "workspaces": [
+        "example"
+      ],
+      "dependencies": {
+        "@huggingface/jinja": "^0.5.0",
+        "jsonrepair": "^3.12.0",
+        "jsonschema": "^1.5.0",
+        "pngjs": "^7.0.0",
+        "zod": "^4.3.6"
+      },
+      "peerDependencies": {
+        "react": "*",
+        "react-native": "*"
+      }
+    },
+    "node_modules/react-native-executorch-bare-resource-fetcher": {
+      "version": "0.8.0",
+      "resolved": "https://registry.npmjs.org/react-native-executorch-bare-resource-fetcher/-/react-native-executorch-bare-resource-fetcher-0.8.0.tgz",
+      "integrity": "sha512-PzSzK31qnKmwW06+JCbpQML24u3XiqYcWKQG0Y1cwPmkOqz0VppI0ZOeCZh03/03SMyuvwwEgteJtgO0uSP8sg==",
+      "license": "MIT",
+      "peerDependencies": {
+        "@dr.pogodin/react-native-fs": "^2.0.0",
+        "@kesha-antonov/react-native-background-downloader": "^4.0.0",
+        "react-native": "*",
+        "react-native-executorch": "*"
+      }
+    },
     "node_modules/react-native-fit-image": {
       "version": "1.5.5",
       "resolved": "https://registry.npmjs.org/react-native-fit-image/-/react-native-fit-image-1.5.5.tgz",
@@ -14716,7 +14876,6 @@
       "version": "4.3.6",
       "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz",
       "integrity": "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg==",
-      "dev": true,
       "license": "MIT",
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
diff --git a/package.json b/package.json
index 0650d784..54ceb6a2 100644
--- a/package.json
+++ b/package.json
@@ -20,7 +20,9 @@
     "postinstall": "patch-package"
   },
   "dependencies": {
+    "@dr.pogodin/react-native-fs": "^2.38.1",
     "@gorhom/bottom-sheet": "^5.2.8",
+    "@kesha-antonov/react-native-background-downloader": "^4.5.4",
     "@op-engineering/op-sqlite": "^15.2.5",
     "@react-native-async-storage/async-storage": "^2.2.0",
     "@react-native-community/blur": "^4.4.1",
@@ -42,7 +44,10 @@
     "patch-package": "^8.0.1",
     "react": "19.2.0",
     "react-native": "0.83.1",
+    "react-native-audio-api": "^0.11.7",
     "react-native-device-info": "^15.0.1",
+    "react-native-executorch": "^0.8.1",
+    "react-native-executorch-bare-resource-fetcher": "^0.8.0",
     "react-native-fs": "^2.20.0",
     "react-native-gesture-handler": "^2.30.0",
     "react-native-haptic-feedback": "^2.3.3",
diff --git a/src/components/AudioMessageBubble/PlaybackControls.tsx b/src/components/AudioMessageBubble/PlaybackControls.tsx
new file mode 100644
index 00000000..1972a263
--- /dev/null
+++ b/src/components/AudioMessageBubble/PlaybackControls.tsx
@@ -0,0 +1,264 @@
+import React, { useState, useCallback, useEffect, useRef } from 'react';
+import {
+  View,
+  Text,
+  TouchableOpacity,
+  ActivityIndicator,
+} from 'react-native';
+import { ScrollView } from 'react-native-gesture-handler';
+import Slider from '@react-native-community/slider';
+import { stripMarkdownForSpeech } from '../../utils/messageContent';
+import { MarkdownText } from '../MarkdownText';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTTSStore } from '../../stores/ttsStore';
+import type { ThemeColors } from '../../theme';
+
+const SPEED_STEPS: number[] = [0.5, 0.8, 0.9, 1.0, 1.1, 1.2, 1.5, 2.0];
+
+function formatDuration(seconds: number): string {
+  const m = Math.floor(seconds / 60);
+  const s = Math.floor(seconds % 60);
+  return `${m}:${s.toString().padStart(2, '0')}`;
+}
+
+interface PlaybackState {
+  isThisPlaying: boolean;
+  isThisPaused: boolean;
+  isThisAudible: boolean;
+  isThisLoading: boolean;
+}
+
+/** Derives playback state for a given messageId from TTS store selectors */
+export function usePlaybackState(messageId: string): PlaybackState {
+  const isSpeaking = useTTSStore((s) => s.isSpeaking);
+  const isPaused = useTTSStore((s) => s.isPaused);
+  const isAudioPlaying = useTTSStore((s) => s.isAudioPlaying);
+  const currentMessageId = useTTSStore((s) => s.currentMessageId);
+
+  const isThisPlaying = isSpeaking && currentMessageId === messageId && !isPaused;
+  const isThisPaused = isSpeaking && currentMessageId === messageId && isPaused;
+  const isThisAudible = isAudioPlaying && currentMessageId === messageId && !isPaused;
+  const isThisLoading = isThisPlaying && !isThisAudible;
+
+  return { isThisPlaying, isThisPaused, isThisAudible, isThisLoading };
+}
+
+/** Hook for wall-clock elapsed timer */
+export function useElapsedTimer(
+  playback: { isThisAudible: boolean; isThisPaused: boolean },
+  seekOffsetRef: React.MutableRefObject<number>,
+) {
+  const { isThisAudible, isThisPaused } = playback;
+  // playSessionId is a monotonic counter that increments on every new play —
+  // guarantees the effect re-runs even if boolean deps appear unchanged.
+  const playSessionId = useTTSStore((s) => s.playSessionId);
+  const [localElapsed, setLocalElapsed] = useState(0);
+  const startTimeRef = useRef<number>(0);
+  const pausedAtRef = useRef<number>(0);
+
+  useEffect(() => {
+    console.log('[Timer] effect: isThisAudible=', isThisAudible, 'isThisPaused=', isThisPaused, 'playSessionId=', playSessionId);
+    if (!isThisAudible && !isThisPaused) {
+      if (seekOffsetRef.current === 0) {
+        setLocalElapsed(0);
+        pausedAtRef.current = 0;
+      }
+      console.log('[Timer] not audible, not paused — resetting');
+      return;
+    }
+    if (isThisPaused) {
+      pausedAtRef.current = localElapsed;
+      console.log('[Timer] paused at', localElapsed);
+      return;
+    }
+    const offset = seekOffsetRef.current || pausedAtRef.current;
+    seekOffsetRef.current = 0;
+    startTimeRef.current = Date.now() - offset * 1000;
+    console.log('[Timer] STARTING interval, offset=', offset);
+    const id = setInterval(() => {
+      setLocalElapsed((Date.now() - startTimeRef.current) / 1000);
+    }, 50);
+    return () => { console.log('[Timer] CLEARING interval'); clearInterval(id); };
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isThisAudible, isThisPaused, playSessionId]);
+
+  return { localElapsed, setLocalElapsed };
+}
+
+/** Play/pause button with loading states */
+export const PlayButton: React.FC<{
+  isLoading: boolean;
+  isThisLoading: boolean;
+  isThisPlaying: boolean;
+  onPlayPause: () => void;
+  colors: ThemeColors;
+  styles: any;
+}> = ({ isLoading, isThisLoading, isThisPlaying, onPlayPause, colors, styles }) => {
+  if (isLoading) {
+    return (
+      <View style={[styles.playButton, styles.playButtonDisabled]}>
+        <Icon name="play" size={16} color={colors.primary} />
+      </View>
+    );
+  }
+  if (isThisLoading) {
+    return (
+      <View style={styles.playButton}>
+        <ActivityIndicator size="small" color={colors.primary} />
+      </View>
+    );
+  }
+  return (
+    <TouchableOpacity
+      onPress={onPlayPause}
+      style={styles.playButton}
+      hitSlop={{ top: 8, left: 8, right: 8 }}
+    >
+      <Icon
+        name={isThisPlaying ? 'pause' : 'play'}
+        size={16}
+        color={colors.primary}
+      />
+    </TouchableOpacity>
+  );
+};
+
+/** Speed cycle chip */
+export const SpeedChip: React.FC<{
+  styles: any;
+}> = ({ styles }) => {
+  const speed = useTTSStore((s) => s.settings.speed);
+  const updateSettings = useTTSStore((s) => s.updateSettings);
+
+  const handleSpeedCycle = useCallback(() => {
+    let idx = SPEED_STEPS.indexOf(speed);
+    if (idx < 0) {
+      idx = SPEED_STEPS.findIndex((s) => s > speed) - 1;
+      if (idx < 0) idx = 0;
+    }
+    const next = (idx + 1) % SPEED_STEPS.length;
+    updateSettings({ speed: SPEED_STEPS[next] });
+  }, [speed, updateSettings]);
+
+  return (
+    <TouchableOpacity
+      onPress={handleSpeedCycle}
+      style={styles.speedChip}
+      hitSlop={{ top: 8, left: 8, right: 8 }}
+    >
+      <Text style={styles.speedText}>{speed}x</Text>
+    </TouchableOpacity>
+  );
+};
+
+/** Duration display */
+export const DurationText: React.FC<{
+  isLoading: boolean;
+  totalDuration: number;
+  styles: any;
+}> = ({ isLoading, totalDuration, styles }) => (
+  <Text style={styles.duration}>
+    {isLoading ? '—' : formatDuration(totalDuration)}
+  </Text>
+);
+
+/** Seekable progress bar using native Slider component */
+export const SeekBar: React.FC<{
+  displayProgress: number;
+  colors: ThemeColors;
+  styles: any;
+  onSeek: (fraction: number) => void;
+}> = ({ displayProgress, colors, styles, onSeek }) => {
+  const [isSeeking, setIsSeeking] = useState(false);
+  const [seekValue, setSeekValue] = useState(0);
+
+  return (
+    <Slider
+      style={styles.seekSlider}
+      value={isSeeking ? seekValue : displayProgress}
+      minimumValue={0}
+      maximumValue={1}
+      minimumTrackTintColor="transparent"
+      maximumTrackTintColor="transparent"
+      thumbTintColor={colors.primary}
+      onSlidingStart={(val) => { setIsSeeking(true); setSeekValue(val); }}
+      onValueChange={(val) => { if (isSeeking) setSeekValue(val); }}
+      onSlidingComplete={(val) => { setIsSeeking(false); onSeek(val); }}
+    />
+  );
+};
+
+/** Transcript toggle and content */
+export const TranscriptToggle: React.FC<{
+  transcript?: string;
+  colors: ThemeColors;
+  styles: any;
+  isOpen: boolean;
+  onToggle: (v: boolean) => void;
+}> = ({ transcript, colors, styles, isOpen, onToggle }) => {
+  if (!transcript) return null;
+
+  return (
+    <TouchableOpacity
+      onPress={() => onToggle(!isOpen)}
+      style={styles.transcriptToggle}
+    >
+      <Text style={styles.transcriptToggleText}>
+        {isOpen ? 'Hide transcript' : 'Show transcript'}
+      </Text>
+      <Icon
+        name={isOpen ? 'chevron-up' : 'chevron-down'}
+        size={11}
+        color={colors.textMuted}
+      />
+    </TouchableOpacity>
+  );
+};
+
+export const TranscriptContent: React.FC<{
+  transcript: string;
+  styles: any;
+}> = ({ transcript, styles }) => (
+  <ScrollView style={styles.transcriptScroll} nestedScrollEnabled>
+    <View style={styles.transcriptContent}>
+      <MarkdownText>{transcript}</MarkdownText>
+    </View>
+  </ScrollView>
+);
+
+/** Hook for seek logic */
+interface SeekHandlerParams {
+  transcript: string | undefined;
+  audioPath: string;
+  messageId: string;
+  totalDurationRef: React.MutableRefObject<number>;
+  seekOffsetRef: React.MutableRefObject<number>;
+  setLocalElapsed: (v: number) => void;
+  setIsSeeking: (v: boolean) => void;
+}
+
+export function useSeekHandler({
+  transcript, audioPath, messageId,
+  totalDurationRef, seekOffsetRef, setLocalElapsed, setIsSeeking,
+}: SeekHandlerParams) {
+  const stop = useTTSStore((s) => s.stop);
+  const speak = useTTSStore((s) => s.speak);
+
+  return useCallback((fraction: number) => {
+    if (!transcript || audioPath) return;
+    const text = stripMarkdownForSpeech(transcript);
+    const charOffset = Math.floor(fraction * text.length);
+    const seekPoint = text.lastIndexOf('. ', charOffset) + 2 || charOffset;
+    const remaining = text.slice(seekPoint).trim();
+    console.log(`[AudioBubble] seeking to ${Math.round(fraction * 100)}%`, 'charOffset:', charOffset, 'remaining:', remaining.length, 'chars');
+    if (!remaining) return;
+    const seekSeconds = Math.floor(fraction * totalDurationRef.current);
+    seekOffsetRef.current = seekSeconds;
+    setLocalElapsed(seekSeconds);
+    setIsSeeking(true);
+    stop();
+    setTimeout(() => {
+      speak(remaining, messageId).finally(() => setIsSeeking(false));
+    }, 200);
+  }, [transcript, audioPath, stop, speak, messageId, totalDurationRef, seekOffsetRef, setLocalElapsed, setIsSeeking]);
+}
diff --git a/src/components/AudioMessageBubble/index.tsx b/src/components/AudioMessageBubble/index.tsx
new file mode 100644
index 00000000..c18cfa6c
--- /dev/null
+++ b/src/components/AudioMessageBubble/index.tsx
@@ -0,0 +1,390 @@
+import React, { useState, useCallback, useEffect, useRef, useMemo } from 'react';
+import {
+  View,
+  Text,
+  TouchableOpacity,
+  StyleSheet,
+  Animated,
+} from 'react-native';
+import { stripMarkdownForSpeech } from '../../utils/messageContent';
+import { useTheme, useThemedStyles } from '../../theme';
+import { useTTSStore } from '../../stores/ttsStore';
+import { triggerHaptic } from '../../utils/haptics';
+import { TYPOGRAPHY, SPACING } from '../../constants';
+import type { ThemeColors, ThemeShadows } from '../../theme';
+import { ActionMenuSheet } from '../ChatMessage/components/ActionMenuSheet';
+import { createStyles as createChatStyles } from '../ChatMessage/styles';
+import {
+  usePlaybackState,
+  useElapsedTimer,
+  useSeekHandler,
+  PlayButton,
+  SpeedChip,
+  DurationText,
+  SeekBar,
+  TranscriptToggle,
+  TranscriptContent,
+} from './PlaybackControls';
+
+const WAVEFORM_BARS = 48;
+
+interface AudioMessageBubbleProps {
+  messageId: string;
+  audioPath: string;
+  waveformData: number[];
+  durationSeconds: number;
+  transcript?: string;
+  isUser?: boolean;
+  isLoading?: boolean;
+  _reasoningContent?: string;
+  onCopy?: (content: string) => void;
+  onRetry?: () => void;
+  onEdit?: (newContent: string) => void;
+}
+
+function subsample(data: number[], count: number): number[] {
+  if (data.length === 0) {
+    return Array.from({ length: count }, (_, i) => 0.25 + 0.25 * Math.sin((i / count) * Math.PI * 4));
+  }
+  const step = data.length / count;
+  const result: number[] = [];
+  for (let i = 0; i < count; i++) {
+    result.push(data[Math.floor(i * step)] ?? 0.1);
+  }
+  return result;
+}
+
+function normalize(data: number[]): number[] {
+  const max = Math.max(...data, 0.001);
+  return data.map((v) => v / max);
+}
+
+/** WhatsApp-style waveform — bars tint as the playhead passes over them.
+ *  Played bars are full color, unplayed bars are muted. */
+const WaveformBars: React.FC<{
+  data: number[];
+  colors: ThemeColors;
+  /** 0–1 playback progress — bars behind the playhead are tinted */
+  progress?: number;
+}> = ({ data, colors, progress = 0 }) => {
+  const bars = useMemo(() => normalize(subsample(data, WAVEFORM_BARS)), [data]);
+
+  return (
+    <View style={barStyles.container}>
+      {bars.map((shape, i) => {
+        const played = progress > 0 && (i / bars.length) < progress;
+        return (
+          <View
+            key={i}
+            style={[
+              barStyles.bar,
+              {
+                height: Math.max(6, Math.round(shape * 32)),
+                backgroundColor: colors.primary,
+                opacity: played ? (0.7 + shape * 0.3) : (0.2 + shape * 0.25),
+              },
+            ]}
+          />
+        );
+      })}
+    </View>
+  );
+};
+
+const barStyles = StyleSheet.create({
+  container: {
+    flex: 1,
+    flexDirection: 'row',
+    alignItems: 'center',
+    gap: 1.5,
+    height: 40,
+    overflow: 'hidden',
+  },
+  bar: {
+    flex: 1,
+    borderRadius: 2,
+  },
+});
+
+/** Three pulsing dots shown while the LLM is generating */
+const ThinkingDots: React.FC<{ colors: ThemeColors }> = ({ colors }) => {
+  const dots = useRef([new Animated.Value(0.3), new Animated.Value(0.3), new Animated.Value(0.3)]).current;
+
+  useEffect(() => {
+    const anims = dots.map((v, i) =>
+      Animated.loop(
+        Animated.sequence([
+          Animated.delay(i * 150),
+          Animated.timing(v, { toValue: 1, duration: 300, useNativeDriver: false }),
+          Animated.timing(v, { toValue: 0.3, duration: 300, useNativeDriver: false }),
+        ]),
+      ),
+    );
+    anims.forEach((a) => a.start());
+    return () => anims.forEach((a) => a.stop());
+  }, [dots]);
+
+  return (
+    <View style={dotStyles.container}>
+      {dots.map((v, i) => (
+        <Animated.View key={i} style={[dotStyles.dot, { backgroundColor: colors.primary, opacity: v }]} />
+      ))}
+    </View>
+  );
+};
+
+const dotStyles = StyleSheet.create({
+  container: {
+    flex: 1,
+    flexDirection: 'row',
+    alignItems: 'center',
+    gap: 6,
+    paddingHorizontal: 4,
+    height: 32,
+  },
+  dot: {
+    width: 7,
+    height: 7,
+    borderRadius: 4,
+  },
+});
+
+export const AudioMessageBubble: React.FC<AudioMessageBubbleProps> = ({
+  messageId,
+  audioPath,
+  waveformData,
+  durationSeconds,
+  transcript,
+  isUser = false,
+  isLoading = false,
+  _reasoningContent,
+  onCopy,
+  onRetry,
+  onEdit,
+}) => {
+  const { colors } = useTheme();
+  const styles = useThemedStyles(createStyles);
+  const chatStyles = useThemedStyles(createChatStyles);
+  const [showActionMenu, setShowActionMenu] = useState(false);
+  const speed = useTTSStore((s) => s.settings.speed);
+  const playMessage = useTTSStore((s) => s.playMessage);
+  const speak = useTTSStore((s) => s.speak);
+
+  const { isThisPlaying, isThisPaused, isThisAudible, isThisLoading } = usePlaybackState(messageId);
+  const currentMessageId = useTTSStore((s) => s.currentMessageId);
+
+  useEffect(() => {
+    console.log('[AudioBubble] state: messageId=', messageId, 'currentMessageId=', currentMessageId, 'isThisAudible=', isThisAudible, 'isThisPlaying=', isThisPlaying);
+  }, [messageId, currentMessageId, isThisAudible, isThisPlaying]);
+  const [showTranscript, setShowTranscript] = useState(false);
+  const [isSeeking, setIsSeeking] = useState(false);
+  const seekOffsetRef = useRef<number>(0);
+  const { localElapsed, setLocalElapsed } = useElapsedTimer({ isThisAudible, isThisPaused }, seekOffsetRef);
+
+  const handlePlayPause = useCallback(() => {
+    const { pause, resume } = useTTSStore.getState();
+    if (isThisPaused) { resume(); return; }
+    if (isThisPlaying) { pause(); return; }
+    if (audioPath) {
+      playMessage(messageId, audioPath);
+    } else {
+      const text = stripMarkdownForSpeech(transcript ?? '');
+      speak(text, messageId);
+    }
+  }, [isThisPlaying, isThisPaused, playMessage, speak, messageId, audioPath, transcript]);
+
+  const totalDurationRef = useRef(0);
+  const totalDuration = useMemo(() => {
+    if (!audioPath && transcript) {
+      const wordCount = transcript.trim().split(/\s+/).filter(Boolean).length;
+      return Math.max(1, wordCount / (2.5 * speed));
+    }
+    return durationSeconds;
+  }, [audioPath, transcript, speed, durationSeconds]);
+  totalDurationRef.current = totalDuration;
+
+  const handleSeek = useSeekHandler({
+    transcript, audioPath, messageId,
+    totalDurationRef, seekOffsetRef, setLocalElapsed, setIsSeeking,
+  });
+
+  const isThisActive = ((isThisPlaying || isThisPaused) && currentMessageId === messageId) || isSeeking;
+  const progress = isThisActive ? Math.min(1, localElapsed / Math.max(1, totalDuration)) : 0;
+
+  // Waveform + seekbar overlay — seekbar sits on top of the waveform, centered vertically
+  const waveformWithSeek = (
+    <View style={styles.waveformSeekContainer}>
+      {isLoading && !isUser
+        ? <ThinkingDots colors={colors} />
+        : <WaveformBars data={waveformData} colors={colors} progress={progress} />}
+      {!isLoading && (
+        <View style={styles.seekOverlay}>
+          <SeekBar displayProgress={progress} colors={colors} styles={styles} onSeek={handleSeek} />
+        </View>
+      )}
+    </View>
+  );
+
+  const handleLongPress = useCallback(() => {
+    if (isLoading) return;
+    triggerHaptic('impactMedium');
+    setShowActionMenu(true);
+  }, [isLoading]);
+
+  const showActions = !!(onCopy || onRetry || onEdit);
+
+  return (
+    <View style={[styles.bubble, isUser && styles.bubbleUser]} testID={`audio-bubble-${messageId}`}>
+      <TouchableOpacity
+        activeOpacity={0.9}
+        onLongPress={handleLongPress}
+        delayLongPress={300}
+        disabled={!showActions}
+      >
+        <View style={styles.playRow}>
+          <PlayButton isLoading={isLoading} isThisLoading={isThisLoading} isThisPlaying={isThisPlaying} onPlayPause={handlePlayPause} colors={colors} styles={styles} />
+          {waveformWithSeek}
+        </View>
+
+        <View style={styles.metaRow}>
+          <TranscriptToggle transcript={transcript} colors={colors} styles={styles} onToggle={setShowTranscript} isOpen={showTranscript} />
+          <View style={styles.metaRight}>
+            <DurationText isLoading={isLoading} totalDuration={totalDuration} styles={styles} />
+            <SpeedChip styles={styles} />
+            {showActions && !isLoading && (
+              <TouchableOpacity style={styles.actionHint} onPress={() => { triggerHaptic('impactLight'); setShowActionMenu(true); }}>
+                <Text style={styles.actionHintText}>•••</Text>
+              </TouchableOpacity>
+            )}
+          </View>
+        </View>
+      </TouchableOpacity>
+
+      {showTranscript && transcript ? (
+        <TranscriptContent transcript={transcript} styles={styles} />
+      ) : null}
+
+      <ActionMenuSheet
+        visible={showActionMenu}
+        onClose={() => setShowActionMenu(false)}
+        isUser={isUser}
+        canEdit={isUser && !!onEdit}
+        canRetry={!!onRetry}
+        canGenerateImage={false}
+        canSpeak={false}
+        styles={chatStyles}
+        onCopy={() => { onCopy?.(transcript ?? ''); setShowActionMenu(false); }}
+        onEdit={() => setShowActionMenu(false)}
+        onRetry={() => { onRetry?.(); setShowActionMenu(false); }}
+        onGenerateImage={() => setShowActionMenu(false)}
+        onSpeak={() => setShowActionMenu(false)}
+      />
+    </View>
+  );
+};
+
+const createStyles = (colors: ThemeColors, _shadows: ThemeShadows) => ({
+  bubble: {
+    backgroundColor: colors.surface,
+    borderRadius: 12,
+    borderWidth: 1,
+    borderColor: colors.border,
+    padding: SPACING.md,
+    width: '88%' as const,
+    alignSelf: 'flex-start' as const,
+    gap: SPACING.sm,
+    overflow: 'hidden' as const,
+  },
+  bubbleUser: {
+    alignSelf: 'flex-end' as const,
+    backgroundColor: `${colors.primary}18`,
+    borderColor: `${colors.primary}40`,
+  },
+  playRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: SPACING.xs,
+  },
+  metaRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+  },
+  metaRight: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: SPACING.sm,
+  },
+  playButton: {
+    width: 28,
+    height: 28,
+    borderRadius: 14,
+    backgroundColor: `${colors.primary}20`,
+    alignItems: 'center' as const,
+    justifyContent: 'center' as const,
+  },
+  playButtonDisabled: {
+    opacity: 0.35,
+  },
+  duration: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textMuted,
+    minWidth: 32,
+    textAlign: 'right' as const,
+  },
+  speedChip: {
+    backgroundColor: colors.surfaceLight,
+    borderRadius: 10,
+    paddingHorizontal: SPACING.sm,
+    paddingVertical: SPACING.xs,
+    borderWidth: 1,
+    borderColor: colors.border,
+  },
+  speedText: {
+    ...TYPOGRAPHY.metaSmall,
+    color: colors.textSecondary,
+  },
+  waveformSeekContainer: {
+    flex: 1,
+    position: 'relative' as const,
+    marginLeft: SPACING.sm,
+  },
+  seekOverlay: {
+    position: 'absolute' as const,
+    top: 0,
+    left: -16,
+    right: -16,
+    bottom: 0,
+    justifyContent: 'center' as const,
+  },
+  seekSlider: {
+    height: 40,
+  },
+  transcriptToggle: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: SPACING.xs,
+  },
+  transcriptToggleText: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textMuted,
+  },
+  transcriptContent: {
+    paddingTop: SPACING.xs,
+  },
+  transcriptScroll: {
+    maxHeight: 120,
+  },
+  transcriptText: {
+    ...TYPOGRAPHY.bodySmall,
+    lineHeight: 20,
+  },
+  actionHint: {
+    padding: 4,
+  },
+  actionHintText: {
+    ...TYPOGRAPHY.bodySmall,
+    color: colors.textMuted,
+    letterSpacing: 1,
+  },
+});
diff --git a/src/components/ChatInput/Attachments.tsx b/src/components/ChatInput/Attachments.tsx
index bdf90cdf..b96e3b53 100644
--- a/src/components/ChatInput/Attachments.tsx
+++ b/src/components/ChatInput/Attachments.tsx
@@ -101,9 +101,21 @@ export function useAttachments(setAlertState: (state: AlertState) => void) {
     }
   };
 
+  const addAudioAttachment = (uri: string, audioFormat: 'wav' | 'mp3', audioDurationSeconds?: number) => {
+    const attachment: MediaAttachment = {
+      id: nextAttachmentId(),
+      type: 'audio',
+      uri,
+      audioFormat,
+      audioDurationSeconds,
+      fileName: uri.split('/').pop(),
+    };
+    setAttachments(prev => [...prev, attachment]);
+  };
+
   const clearAttachments = () => setAttachments([]);
 
-  return { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument };
+  return { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument, addAudioAttachment };
 }
 
 // ─── AttachmentPreview component ─────────────────────────────────────────────
@@ -135,6 +147,11 @@ export const AttachmentPreview: React.FC<AttachmentPreviewProps> = ({ attachment
               source={{ uri: attachment.uri }}
               style={styles.attachmentImage}
             />
+          ) : attachment.type === 'audio' ? (
+            <View testID={`audio-preview-${attachment.id}`} style={styles.documentPreview}>
+              <Icon name="mic" size={24} color={colors.primary} />
+              <Text style={styles.documentName} numberOfLines={2}>Voice</Text>
+            </View>
           ) : (
             <View testID={`document-preview-${attachment.id}`} style={styles.documentPreview}>
               <Icon name="file-text" size={24} color={colors.primary} />
diff --git a/src/components/ChatInput/AudioModeLayout.tsx b/src/components/ChatInput/AudioModeLayout.tsx
new file mode 100644
index 00000000..ee2a1a4c
--- /dev/null
+++ b/src/components/ChatInput/AudioModeLayout.tsx
@@ -0,0 +1,239 @@
+import React from 'react';
+import { View, TouchableOpacity, Text, ActivityIndicator } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTheme } from '../../theme';
+import { ImageModeState, MediaAttachment } from '../../types';
+import { VoiceRecordButton } from '../VoiceRecordButton';
+import { triggerHaptic } from '../../utils/haptics';
+import { CustomAlert, hideAlert, AlertState } from '../CustomAlert';
+import { QueueRow } from './Toolbar';
+import { AttachmentPreview } from './Attachments';
+import { AttachPickerPopover, VoicePickerPopover, QuickSettingsPopover } from './Popovers';
+import { useTTSStore } from '../../stores/ttsStore';
+import type { KOKORO_VOICES } from '../../constants/kokoroModels';
+
+interface AudioModeLayoutProps {
+  styles: any;
+  disabled?: boolean;
+  isGenerating?: boolean;
+  imageMode: ImageModeState;
+  imageModelLoaded: boolean;
+  supportsThinking: boolean;
+  supportsToolCalling: boolean;
+  enabledToolCount: number;
+  thinkingEnabled: boolean;
+  currentVoice: typeof KOKORO_VOICES[number];
+  // Attachments
+  attachments: MediaAttachment[];
+  onRemoveAttachment: (id: string) => void;
+  // Queue
+  queueCount: number;
+  queuedTexts: string[];
+  onClearQueue?: () => void;
+  // Voice recording
+  isRecording: boolean;
+  voiceAvailable: boolean;
+  isModelLoading: boolean;
+  isTranscribing: boolean;
+  partialResult: string;
+  error: string | null;
+  onStartRecording: () => void;
+  onStopRecording: () => void;
+  onCancelRecording: () => void;
+  // Handlers
+  onStop?: () => void;
+  onImageModeToggle: () => void;
+  onThinkingToggle: () => void;
+  onToolsPress?: () => void;
+  onVisionPress: () => void;
+  onPickDocument: () => void;
+  // Popovers
+  attachPicker: any;
+  voicePicker: any;
+  quickSettings: any;
+  supportsVision: boolean;
+  // Alert
+  alertState: AlertState;
+  setAlertState: (s: AlertState) => void;
+}
+
+export const AudioModeLayout: React.FC<AudioModeLayoutProps> = ({
+  styles,
+  disabled,
+  isGenerating,
+  imageMode,
+  imageModelLoaded,
+  supportsThinking,
+  supportsToolCalling,
+  enabledToolCount,
+  thinkingEnabled,
+  currentVoice,
+  attachments,
+  onRemoveAttachment,
+  queueCount,
+  queuedTexts,
+  onClearQueue,
+  isRecording,
+  voiceAvailable,
+  isModelLoading,
+  isTranscribing,
+  partialResult,
+  error,
+  onStartRecording,
+  onStopRecording,
+  onCancelRecording,
+  onStop,
+  onImageModeToggle,
+  onThinkingToggle,
+  onToolsPress,
+  onVisionPress,
+  onPickDocument,
+  attachPicker,
+  voicePicker,
+  quickSettings,
+  supportsVision,
+  alertState,
+  setAlertState,
+}) => {
+  const { colors } = useTheme();
+  const isChangingVoice = useTTSStore((s) => s.settings.kokoroVoiceId !== s.kokoroActiveVoiceId);
+
+  const handleStop = () => {
+    if (onStop && isGenerating) {
+      triggerHaptic('impactLight');
+      onStop();
+    }
+  };
+
+  const audioStopButton = isGenerating && onStop ? (
+    <TouchableOpacity
+      testID="stop-button"
+      style={styles.circleButton}
+      onPress={handleStop}
+    >
+      <Icon name="square" size={18} color={colors.background} />
+    </TouchableOpacity>
+  ) : null;
+
+  return (
+    <View style={styles.container}>
+      <AttachmentPreview attachments={attachments} onRemove={onRemoveAttachment} />
+      <QueueRow
+        queueCount={queueCount}
+        queuedTexts={queuedTexts}
+        onClearQueue={onClearQueue}
+      />
+      <View style={styles.audioModeRow}>
+        <TouchableOpacity
+          ref={attachPicker.triggerRef}
+          style={styles.pillIconButton}
+          onPress={() => attachPicker.show()}
+          disabled={disabled}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="plus" size={20} color={disabled ? colors.textMuted : colors.textSecondary} />
+        </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.pillIconButton}
+          onPress={() => {
+            triggerHaptic('impactLight');
+            useTTSStore.getState().updateSettings({ interfaceMode: 'chat' });
+          }}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="message-square" size={18} color={colors.textSecondary} />
+        </TouchableOpacity>
+        <TouchableOpacity
+          style={styles.pillIconButton}
+          onPress={onImageModeToggle}
+          disabled={disabled || !imageModelLoaded}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="image" size={18} color={imageMode === 'force' ? colors.primary : !imageModelLoaded ? colors.textMuted : colors.textSecondary} />
+        </TouchableOpacity>
+        {supportsThinking && (
+          <TouchableOpacity
+            style={styles.pillIconButton}
+            onPress={onThinkingToggle}
+            disabled={disabled}
+            hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+          >
+            <Icon name="zap" size={18} color={thinkingEnabled ? colors.primary : (disabled ? colors.textMuted : colors.textSecondary)} />
+          </TouchableOpacity>
+        )}
+        <TouchableOpacity
+          style={styles.pillIconButton}
+          onPress={() => { triggerHaptic('impactLight'); onToolsPress?.(); }}
+          disabled={disabled || !supportsToolCalling}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          <Icon name="tool" size={18} color={enabledToolCount > 0 ? colors.primary : !supportsToolCalling ? colors.textMuted : colors.textSecondary} />
+        </TouchableOpacity>
+        <TouchableOpacity
+          ref={voicePicker.triggerRef}
+          style={styles.audioVoiceButton}
+          onPress={() => voicePicker.show()}
+          hitSlop={{ top: 4, bottom: 4, left: 8, right: 8 }}
+        >
+          {isChangingVoice
+            ? <ActivityIndicator size="small" color={colors.textMuted} />
+            : <Icon name="user" size={14} color={colors.textSecondary} />}
+          <Text style={styles.audioVoiceLabel}>{currentVoice.label}</Text>
+        </TouchableOpacity>
+
+        {isGenerating && onStop ? (
+          audioStopButton
+        ) : (
+          <VoiceRecordButton
+            isRecording={isRecording}
+            isAvailable={voiceAvailable}
+            isModelLoading={isModelLoading}
+            isTranscribing={isTranscribing}
+            partialResult={partialResult}
+            error={error}
+            disabled={disabled}
+            onStartRecording={onStartRecording}
+            onStopRecording={onStopRecording}
+            onCancelRecording={onCancelRecording}
+          />
+        )}
+      </View>
+
+      <AttachPickerPopover
+        visible={attachPicker.visible}
+        onClose={attachPicker.hide}
+        anchorY={attachPicker.anchor.y}
+        anchorX={attachPicker.anchor.x}
+        supportsVision={supportsVision}
+        onPhoto={onVisionPress}
+        onDocument={onPickDocument}
+      />
+      <VoicePickerPopover
+        visible={voicePicker.visible}
+        onClose={voicePicker.hide}
+        anchorY={voicePicker.anchor.y}
+        anchorX={voicePicker.anchor.x}
+      />
+      <QuickSettingsPopover
+        visible={quickSettings.visible}
+        onClose={quickSettings.hide}
+        anchorY={quickSettings.anchor.y}
+        anchorX={quickSettings.anchor.x}
+        imageMode={imageMode}
+        onImageModeToggle={onImageModeToggle}
+        imageModelLoaded={imageModelLoaded}
+        supportsThinking={supportsThinking}
+        supportsToolCalling={supportsToolCalling}
+        enabledToolCount={enabledToolCount}
+        onToolsPress={onToolsPress}
+      />
+      <CustomAlert
+        visible={alertState.visible}
+        title={alertState.title}
+        message={alertState.message}
+        buttons={alertState.buttons}
+        onClose={() => setAlertState(hideAlert())}
+      />
+    </View>
+  );
+};
diff --git a/src/components/ChatInput/Popovers.tsx b/src/components/ChatInput/Popovers.tsx
index 52a61b69..aaa27521 100644
--- a/src/components/ChatInput/Popovers.tsx
+++ b/src/components/ChatInput/Popovers.tsx
@@ -1,11 +1,16 @@
 import React from 'react';
-import { View, TouchableOpacity, Text, StyleSheet, Modal, TouchableWithoutFeedback } from 'react-native';
+import { View, TouchableOpacity, Text, StyleSheet, Modal, TouchableWithoutFeedback, ActivityIndicator } from 'react-native';
 import Icon from 'react-native-vector-icons/Feather';
+import { useNavigation } from '@react-navigation/native';
 import { useTheme } from '../../theme';
 import { ImageModeState } from '../../types';
-import { useAppStore } from '../../stores';
+import { useAppStore, useTTSStore } from '../../stores';
 import { triggerHaptic } from '../../utils/haptics';
-import { FONTS } from '../../constants';
+import { FONTS, TYPOGRAPHY } from '../../constants';
+import { KOKORO_VOICES } from '../../constants/kokoroModels';
+import type { KokoroVoiceId } from '../../constants/kokoroModels';
+import type { NativeStackNavigationProp } from '@react-navigation/native-stack';
+import type { RootStackParamList } from '../../navigation/types';
 
 // ─── Shared Styles ──────────────────────────────────────────────────────────
 
@@ -100,11 +105,30 @@ export const QuickSettingsPopover: React.FC<QuickSettingsPopoverProps> = ({
 }) => {
   const { colors } = useTheme();
   const { settings, updateSettings } = useAppStore();
+  const { settings: ttsSettings, isBackboneDownloaded, isVocoderDownloaded, isModelLoaded, loadModels, unloadModels, updateSettings: updateTTSSettings } = useTTSStore();
+  const navigation = useNavigation<NativeStackNavigationProp<RootStackParamList>>();
 
   if (!visible) return null;
 
   const imgBadge = getImageModeBadge(imageMode, colors);
   const tools = getToolsStyle(supportsToolCalling, enabledToolCount, colors);
+  const ttsAvailable = isBackboneDownloaded && isVocoderDownloaded;
+  const ttsMode = ttsSettings.interfaceMode;
+  const ttsBadge = !ttsAvailable
+    ? { label: 'N/A', bg: colors.textMuted }
+    : ttsMode === 'audio'
+      ? { label: 'Audio', bg: colors.primary }
+      : { label: 'Chat', bg: `${colors.textMuted}80` };
+
+  const handleTTSToggle = () => {
+    triggerHaptic('impactLight');
+    if (!ttsAvailable) { onClose(); navigation.navigate('TTSSettings'); return; }
+    onClose();
+    const next = ttsMode === 'audio' ? 'chat' : 'audio';
+    updateTTSSettings({ interfaceMode: next });
+    if (next === 'audio' && !isModelLoaded) { loadModels(); }
+    if (next === 'chat' && isModelLoaded) { unloadModels(); }
+  };
 
   return (
     <Modal transparent visible={visible} animationType="fade" onRequestClose={onClose}>
@@ -150,6 +174,18 @@ export const QuickSettingsPopover: React.FC<QuickSettingsPopoverProps> = ({
                 </TouchableOpacity>
               )}
 
+              <TouchableOpacity
+                testID="quick-tts-mode"
+                style={popoverStyles.row}
+                onPress={handleTTSToggle}
+              >
+                <Icon name={ttsMode === 'audio' ? 'volume-2' : 'volume-1'} size={16} color={ttsAvailable ? colors.text : colors.textMuted} />
+                <Text style={[popoverStyles.rowLabel, { color: ttsAvailable ? colors.text : colors.textMuted }]}>Voice</Text>
+                <View style={[popoverStyles.badge, { backgroundColor: ttsBadge.bg }]}>
+                  <Text style={[popoverStyles.badgeText, { color: colors.background }]}>{ttsBadge.label}</Text>
+                </View>
+              </TouchableOpacity>
+
               <TouchableOpacity
                 testID="quick-tools"
                 style={popoverStyles.row}
@@ -227,3 +263,92 @@ export const AttachPickerPopover: React.FC<AttachPickerPopoverProps> = ({
     </Modal>
   );
 };
+
+// ─── Voice Picker Popover ──────────────────────────────────────────────────
+
+interface VoicePickerPopoverProps {
+  visible: boolean;
+  onClose: () => void;
+  anchorY: number;
+  anchorX: number;
+}
+
+export const VoicePickerPopover: React.FC<VoicePickerPopoverProps> = ({
+  visible, onClose, anchorY, anchorX,
+}) => {
+  const { colors } = useTheme();
+  const kokoroVoiceId = useTTSStore((s) => s.settings.kokoroVoiceId);
+  const isChangingVoice = useTTSStore((s) => s.settings.kokoroVoiceId !== s.kokoroActiveVoiceId);
+  const { isSpeaking, stop, updateSettings } = useTTSStore();
+
+  if (!visible) return null;
+
+  const handleSelect = (voice: typeof KOKORO_VOICES[number]) => {
+    triggerHaptic('impactLight');
+    // Stop playback first — KokoroTTSManager defers voice config changes
+    // until isSpeaking is false, so no native crash
+    if (isSpeaking) { stop(); }
+    updateSettings({ kokoroVoiceId: voice.id as KokoroVoiceId, speed: voice.defaultSpeed });
+    onClose();
+  };
+
+  return (
+    <Modal transparent visible={visible} animationType="fade" onRequestClose={onClose}>
+      <TouchableWithoutFeedback onPress={onClose}>
+        <View style={popoverStyles.overlay}>
+          <TouchableWithoutFeedback>
+            <View style={[popoverStyles.popover, voicePickerStyles.popover, {
+              backgroundColor: colors.surface,
+              borderColor: colors.border,
+              bottom: anchorY + 8,
+              right: anchorX,
+            }]}>
+              {KOKORO_VOICES.map((voice) => {
+                const isActive = voice.id === kokoroVoiceId;
+                return (
+                  <TouchableOpacity
+                    key={voice.id}
+                    style={popoverStyles.row}
+                    onPress={() => handleSelect(voice)}
+                  >
+                    <Icon
+                      name="user"
+                      size={14}
+                      color={isActive ? colors.primary : colors.textMuted}
+                    />
+                    <View style={voicePickerStyles.labelCol}>
+                      <Text style={[popoverStyles.rowLabel, { color: isActive ? colors.primary : colors.text }]}>
+                        {voice.label}
+                      </Text>
+                      <Text style={[voicePickerStyles.accent, { color: colors.textMuted }]}>
+                        {voice.persona}
+                      </Text>
+                    </View>
+                    {isActive && (
+                      isChangingVoice
+                        ? <ActivityIndicator size="small" color={colors.primary} />
+                        : <Icon name="check" size={14} color={colors.primary} />
+                    )}
+                  </TouchableOpacity>
+                );
+              })}
+            </View>
+          </TouchableWithoutFeedback>
+        </View>
+      </TouchableWithoutFeedback>
+    </Modal>
+  );
+};
+
+const voicePickerStyles = StyleSheet.create({
+  popover: {
+    minWidth: 200,
+  },
+  labelCol: {
+    flex: 1,
+  },
+  accent: {
+    ...TYPOGRAPHY.meta,
+    marginTop: 1,
+  },
+});
diff --git a/src/components/ChatInput/Voice.ts b/src/components/ChatInput/Voice.ts
index 1cc66a19..616b6bca 100644
--- a/src/components/ChatInput/Voice.ts
+++ b/src/components/ChatInput/Voice.ts
@@ -1,35 +1,195 @@
-import { useEffect, useRef } from 'react';
+import { useEffect, useRef, useState } from 'react';
 import { useWhisperTranscription } from '../../hooks/useWhisperTranscription';
-import { useWhisperStore } from '../../stores';
+import { useWhisperStore, useChatStore } from '../../stores';
+import { useTTSStore } from '../../stores/ttsStore';
+import { llmService } from '../../services/llm';
+import { audioRecorderService } from '../../services/audioRecorderService';
+import { whisperService } from '../../services/whisperService';
+import logger from '../../utils/logger';
 
 interface UseVoiceInputParams {
   conversationId?: string | null;
   onTranscript: (text: string) => void;
+  onAudioAttachment?: (uri: string, format: 'wav' | 'mp3', durationSeconds?: number) => void;
+  /** Called in Audio Mode to auto-send. Includes audio info so caller can build attachment atomically. */
+  onAutoSend?: (text: string, audio: { uri: string; format: 'wav' | 'mp3'; durationSeconds: number }) => void;
 }
 
-export function useVoiceInput({ conversationId, onTranscript }: UseVoiceInputParams) {
+export function useVoiceInput({ conversationId, onTranscript, onAudioAttachment, onAutoSend }: UseVoiceInputParams) {
   const recordingConversationIdRef = useRef<string | null>(null);
   const onTranscriptRef = useRef(onTranscript);
   onTranscriptRef.current = onTranscript;
+  const onAudioAttachmentRef = useRef(onAudioAttachment);
+  onAudioAttachmentRef.current = onAudioAttachment;
+  const onAutoSendRef = useRef(onAutoSend);
+  onAutoSendRef.current = onAutoSend;
   const { downloadedModelId } = useWhisperStore();
+  const [isDirectRecording, setIsDirectRecording] = useState(false);
+  const [isAudioModeRecording, setIsAudioModeRecording] = useState(false);
+  const [isTranscribingFile, setIsTranscribingFile] = useState(false);
+  const [directError, setDirectError] = useState<string | null>(null);
 
   const {
-    isRecording,
+    isRecording: isWhisperRecording,
     isModelLoading,
-    isTranscribing,
+    isTranscribing: isWhisperTranscribing,
     partialResult,
     finalResult,
-    error,
-    startRecording: startRecordingBase,
-    stopRecording,
+    error: whisperError,
+    startRecording: startWhisperRecording,
+    stopRecording: stopWhisperRecording,
     clearResult,
   } = useWhisperTranscription();
 
-  const voiceAvailable = !!downloadedModelId;
+  const supportsDirectAudio = (): boolean => {
+    const support = llmService.getMultimodalSupport();
+    return Boolean(support?.audio) && audioRecorderService.supportsDirectAudioInput();
+  };
+
+  const isInAudioInterfaceMode = (): boolean =>
+    useTTSStore.getState().settings.interfaceMode === 'audio';
+
+  // Use file-based transcription path when: Audio Mode + Whisper available + not direct audio model
+  const shouldUseFilePath = (): boolean =>
+    isInAudioInterfaceMode() && !!downloadedModelId && !supportsDirectAudio();
+
+  const isTranscribing = isWhisperTranscribing || isTranscribingFile;
+  const isRecording = isDirectRecording || isAudioModeRecording || isWhisperRecording;
+  const error = directError ?? whisperError;
+
+  // voiceAvailable: direct audio OR whisper downloaded
+  const voiceAvailable = supportsDirectAudio() || !!downloadedModelId;
 
   const startRecording = async () => {
     recordingConversationIdRef.current = conversationId || null;
-    await startRecordingBase();
+    setDirectError(null);
+    // Stop any TTS playback before recording — mic and speaker shouldn't overlap
+    const tts = useTTSStore.getState();
+    if (tts.isSpeaking) { tts.stop(); }
+
+    if (supportsDirectAudio()) {
+      try {
+        setIsDirectRecording(true);
+        await audioRecorderService.startRecording();
+      } catch (err) {
+        setIsDirectRecording(false);
+        const msg = err instanceof Error ? err.message : 'Recording failed';
+        logger.error('[Voice] Direct audio recording error:', err);
+        setDirectError(msg);
+      }
+      return;
+    }
+
+    if (shouldUseFilePath()) {
+      try {
+        setIsAudioModeRecording(true);
+        await audioRecorderService.startRecording();
+      } catch (err) {
+        setIsAudioModeRecording(false);
+        const msg = err instanceof Error ? err.message : 'Recording failed';
+        logger.error('[Voice] Audio mode recording error:', err);
+        setDirectError(msg);
+      }
+      return;
+    }
+
+    await startWhisperRecording();
+  };
+
+  const stopRecording = async () => {
+    if (isDirectRecording) {
+      try {
+        const { path, durationSeconds } = await audioRecorderService.stopRecording();
+        setIsDirectRecording(false);
+        if (!recordingConversationIdRef.current || recordingConversationIdRef.current === conversationId) {
+          const format = audioRecorderService.getFormat();
+          // In Audio Mode, auto-send directly — no transcription needed for multimodal models
+          if (onAutoSendRef.current && isInAudioInterfaceMode()) {
+            onAutoSendRef.current('', { uri: path, format, durationSeconds });
+
+            // Parallel transcription: send audio to model immediately, transcribe in background
+            // so the voice bubble gets a transcript for display/playback review
+            if (downloadedModelId) {
+              const convId = conversationId;
+              whisperService.transcribeFile(path).then(text => {
+                if (!text?.trim() || !convId) return;
+                const conv = useChatStore.getState().conversations.find(c => c.id === convId);
+                const msg = conv?.messages.find(m =>
+                  m.role === 'user' && m.attachments?.some(a => a.uri === path),
+                );
+                if (msg) {
+                  useChatStore.getState().updateMessageContent(convId, msg.id, text.trim());
+                }
+              }).catch(err => logger.error('[Voice] Background transcription error:', err));
+            }
+          } else {
+            onAudioAttachmentRef.current?.(path, format, durationSeconds);
+          }
+        }
+        recordingConversationIdRef.current = null;
+      } catch (err) {
+        setIsDirectRecording(false);
+        logger.error('[Voice] Failed to stop direct recording:', err);
+      }
+      return;
+    }
+
+    if (isAudioModeRecording) {
+      try {
+        const { path, durationSeconds } = await audioRecorderService.stopRecording();
+        setIsAudioModeRecording(false);
+        if (recordingConversationIdRef.current && recordingConversationIdRef.current !== conversationId) {
+          recordingConversationIdRef.current = null;
+          return;
+        }
+        setIsTranscribingFile(true);
+        let text = '';
+        try {
+          text = await whisperService.transcribeFile(path);
+        } catch (transcribeErr) {
+          logger.error('[Voice] File transcription error:', transcribeErr);
+        }
+        setIsTranscribingFile(false);
+        recordingConversationIdRef.current = null;
+        if (text.trim()) {
+          if (onAutoSendRef.current) {
+            onAutoSendRef.current(text.trim(), { uri: path, format: 'wav', durationSeconds });
+          } else {
+            onAudioAttachmentRef.current?.(path, 'wav', durationSeconds);
+            onTranscriptRef.current(text.trim());
+          }
+        } else {
+          // Transcription returned nothing — clip too short or too quiet
+          setDirectError("Couldn't hear that — try again");
+          setTimeout(() => setDirectError(null), 3000);
+        }
+      } catch (err) {
+        setIsAudioModeRecording(false);
+        setIsTranscribingFile(false);
+        logger.error('[Voice] Failed to stop audio mode recording:', err);
+      }
+      return;
+    }
+
+    await stopWhisperRecording();
+  };
+
+  const cancelRecording = () => {
+    if (isDirectRecording) {
+      audioRecorderService.cancelRecording();
+      setIsDirectRecording(false);
+      recordingConversationIdRef.current = null;
+      return;
+    }
+    if (isAudioModeRecording) {
+      audioRecorderService.cancelRecording();
+      setIsAudioModeRecording(false);
+      recordingConversationIdRef.current = null;
+      return;
+    }
+    stopWhisperRecording();
+    clearResult();
+    recordingConversationIdRef.current = null;
   };
 
   useEffect(() => {
@@ -49,5 +209,20 @@ export function useVoiceInput({ conversationId, onTranscript }: UseVoiceInputPar
     }
   }, [finalResult, clearResult, conversationId]);
 
-  return { isRecording, isModelLoading, isTranscribing, partialResult, error, voiceAvailable, startRecording, stopRecording, clearResult };
+  return {
+    isRecording,
+    isModelLoading,
+    isTranscribing,
+    partialResult,
+    error,
+    voiceAvailable,
+    startRecording,
+    stopRecording,
+    cancelRecording,
+    clearResult,
+    /** True when model accepts audio directly (no Whisper needed) */
+    isDirectAudioMode: supportsDirectAudio(),
+    /** True when recording in Audio Mode for file-based transcription */
+    isAudioModeRecording,
+  };
 }
diff --git a/src/components/ChatInput/index.tsx b/src/components/ChatInput/index.tsx
index 1ebbb496..0f2a97a1 100644
--- a/src/components/ChatInput/index.tsx
+++ b/src/components/ChatInput/index.tsx
@@ -1,4 +1,4 @@
-import React, { useState, useRef, useEffect } from 'react';
+import React, { useState, useRef, useEffect, useMemo } from 'react';
 import { View, TextInput, TouchableOpacity, Animated, StyleSheet } from 'react-native';
 import Icon from 'react-native-vector-icons/Feather';
 import { useTheme, useThemedStyles } from '../../theme';
@@ -13,6 +13,10 @@ import { AttachmentPreview, useAttachments } from './Attachments';
 import { useVoiceInput } from './Voice';
 import { QuickSettingsPopover, AttachPickerPopover } from './Popovers';
 import { useKeyboardAwarePopover } from './useKeyboardAwarePopover';
+import { useTTSStore } from '../../stores/ttsStore';
+import { useAppStore } from '../../stores';
+import { KOKORO_VOICES } from '../../constants/kokoroModels';
+import { AudioModeLayout } from './AudioModeLayout';
 
 interface ChatInputProps {
   onSend: (message: string, attachments?: MediaAttachment[], imageMode?: ImageModeState) => void;
@@ -33,7 +37,6 @@ interface ChatInputProps {
   supportsToolCalling?: boolean;
   supportsThinking?: boolean;
   onRepairVision?: () => void;
-  /** When set, mounts a single AttachStep for that index. Only one at a time to avoid waypoint dots. */
   activeSpotlight?: number | null;
 }
 
@@ -69,7 +72,9 @@ export const ChatInput: React.FC<ChatInputProps> = ({
   const [alertState, setAlertState] = useState<AlertState>(initialAlertState);
   const quickSettings = useKeyboardAwarePopover();
   const attachPicker = useKeyboardAwarePopover();
+  const voicePicker = useKeyboardAwarePopover();
   const inputRef = useRef<TextInput>(null);
+  const attachmentsRef = useRef<MediaAttachment[]>([]);
   const hasText = message.length > 0;
   const iconsAnim = useRef(new Animated.Value(0)).current;
 
@@ -81,9 +86,17 @@ export const ChatInput: React.FC<ChatInputProps> = ({
     }).start();
   }, [hasText, iconsAnim]);
 
-  const { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument } = useAttachments(setAlertState);
+  const { attachments, removeAttachment, clearAttachments, handlePickImage, handlePickDocument, addAudioAttachment } = useAttachments(setAlertState);
+  attachmentsRef.current = attachments;
+  const ttsInterfaceMode = useTTSStore((s) => s.settings.interfaceMode);
+  const kokoroVoiceId = useTTSStore((s) => s.settings.kokoroVoiceId);
+  const isAudioMode = ttsInterfaceMode === 'audio';
+  const currentVoice = useMemo(
+    () => KOKORO_VOICES.find((v) => v.id === kokoroVoiceId) ?? KOKORO_VOICES[0],
+    [kokoroVoiceId],
+  );
 
-  const { isRecording, isModelLoading, isTranscribing, partialResult, error, voiceAvailable, startRecording, stopRecording, clearResult } = useVoiceInput({
+  const { isRecording, isModelLoading, isTranscribing, partialResult, error, voiceAvailable, startRecording, stopRecording, cancelRecording } = useVoiceInput({
     conversationId,
     onTranscript: (text) => {
       setMessage(prev => {
@@ -91,8 +104,33 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         return prefix + text;
       });
     },
+    onAudioAttachment: (uri, format, durationSeconds) => {
+      addAudioAttachment(uri, format, durationSeconds);
+    },
+    onAutoSend: isAudioMode ? (text, audio) => {
+      const audioAttachment: MediaAttachment = {
+        id: `audio-${Date.now()}`,
+        type: 'audio',
+        uri: audio.uri,
+        audioFormat: audio.format,
+        audioDurationSeconds: audio.durationSeconds,
+        fileName: audio.uri.split('/').pop(),
+      };
+      triggerHaptic('impactMedium');
+      const all = [...attachmentsRef.current, audioAttachment];
+      onSend(text, all, imageMode);
+      clearAttachments();
+    } : undefined,
   });
 
+  const { settings: appSettings, updateSettings: updateAppSettings } = useAppStore();
+  const thinkingEnabled = appSettings.thinkingEnabled;
+
+  const handleThinkingToggle = () => {
+    triggerHaptic('impactLight');
+    updateAppSettings({ thinkingEnabled: !thinkingEnabled });
+  };
+
   const canSend = (message.trim().length > 0 || attachments.length > 0) && !disabled;
 
   const handleSend = () => {
@@ -137,9 +175,49 @@ export const ChatInput: React.FC<ChatInputProps> = ({
     }
   };
 
-  const handleQuickSettingsPress = () => quickSettings.show();
-
-  const handleAttachPress = () => attachPicker.show();
+  // ─── Audio mode: simplified mic-only layout ─────────────────────────────────
+  if (isAudioMode) {
+    return (
+      <AudioModeLayout
+        styles={styles}
+        disabled={disabled}
+        isGenerating={isGenerating}
+        imageMode={imageMode}
+        imageModelLoaded={imageModelLoaded}
+        supportsThinking={supportsThinking}
+        supportsToolCalling={supportsToolCalling}
+        enabledToolCount={enabledToolCount}
+        thinkingEnabled={thinkingEnabled}
+        currentVoice={currentVoice}
+        attachments={attachments}
+        onRemoveAttachment={removeAttachment}
+        queueCount={queueCount}
+        queuedTexts={queuedTexts}
+        onClearQueue={onClearQueue}
+        isRecording={isRecording}
+        voiceAvailable={voiceAvailable}
+        isModelLoading={isModelLoading}
+        isTranscribing={isTranscribing}
+        partialResult={partialResult}
+        error={error}
+        onStartRecording={startRecording}
+        onStopRecording={stopRecording}
+        onCancelRecording={cancelRecording}
+        onStop={onStop}
+        onImageModeToggle={handleImageModeToggle}
+        onThinkingToggle={handleThinkingToggle}
+        onToolsPress={onToolsPress}
+        onVisionPress={handleVisionPress}
+        onPickDocument={handlePickDocument}
+        attachPicker={attachPicker}
+        voicePicker={voicePicker}
+        quickSettings={quickSettings}
+        supportsVision={supportsVision}
+        alertState={alertState}
+        setAlertState={setAlertState}
+      />
+    );
+  }
 
   const actionButton = canSend ? (
     <TouchableOpacity
@@ -168,12 +246,12 @@ export const ChatInput: React.FC<ChatInputProps> = ({
       disabled={disabled}
       onStartRecording={startRecording}
       onStopRecording={stopRecording}
-      onCancelRecording={() => { stopRecording(); clearResult(); }}
+      onCancelRecording={cancelRecording}
       asSendButton
     />
   );
 
-  const content = (
+  return (
     <View style={styles.container}>
       <AttachmentPreview attachments={attachments} onRemove={removeAttachment} />
       <QueueRow
@@ -182,7 +260,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         onClearQueue={onClearQueue}
       />
       <View style={styles.mainRow}>
-        {/* Pill: text input + right icons */}
         <View style={styles.pill}>
           <TextInput
             ref={inputRef}
@@ -198,7 +275,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
             blurOnSubmit={false}
             returnKeyType="default"
           />
-          {/* Icons collapse when user starts typing, reappear when input is empty */}
           <Animated.View
             pointerEvents={hasText ? 'none' : 'auto'}
             style={[styles.pillIcons, {
@@ -207,38 +283,40 @@ export const ChatInput: React.FC<ChatInputProps> = ({
               overflow: 'hidden' as const,
             }]}
           >
-            {/* Attach button — opens picker for image or document */}
             <TouchableOpacity
               ref={attachPicker.triggerRef}
               testID="attach-button"
               style={styles.pillIconButton}
-              onPress={handleAttachPress}
+              onPress={() => attachPicker.show()}
               disabled={disabled}
               hitSlop={{ top: 4, bottom: 4, left: 4, right: 4 }}
             >
-              <Icon
-                name="plus"
-                size={20}
-                color={disabled ? colors.textMuted : colors.textSecondary}
-              />
+              <Icon name="plus" size={20} color={disabled ? colors.textMuted : colors.textSecondary} />
             </TouchableOpacity>
-
-            {/* Quick settings button */}
+            {supportsThinking && (
+              <TouchableOpacity
+                testID="thinking-toggle-button"
+                style={styles.pillIconButton}
+                onPress={handleThinkingToggle}
+                disabled={disabled}
+                hitSlop={{ top: 4, bottom: 4, left: 4, right: 4 }}
+              >
+                <Icon name="zap" size={18} color={thinkingEnabled ? colors.primary : (disabled ? colors.textMuted : colors.textSecondary)} />
+              </TouchableOpacity>
+            )}
             <TouchableOpacity
               ref={quickSettings.triggerRef}
               testID="quick-settings-button"
               style={styles.pillIconButton}
-              onPress={handleQuickSettingsPress}
+              onPress={() => quickSettings.show()}
               disabled={disabled}
               hitSlop={{ top: 4, bottom: 4, left: 4, right: 4 }}
             >
               <Icon name="settings" size={18} color={disabled ? colors.textMuted : colors.textSecondary} />
             </TouchableOpacity>
-
           </Animated.View>
         </View>
 
-        {/* Circular action button — conditionally wrapped with AttachStep */}
         {activeSpotlight === 12 ? (
           <AttachStep index={12} style={spotlightStyles.centered}>{actionButton}</AttachStep>
         ) : actionButton}
@@ -253,7 +331,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         onPhoto={handleVisionPress}
         onDocument={handlePickDocument}
       />
-
       <QuickSettingsPopover
         visible={quickSettings.visible}
         onClose={quickSettings.hide}
@@ -267,7 +344,6 @@ export const ChatInput: React.FC<ChatInputProps> = ({
         enabledToolCount={enabledToolCount}
         onToolsPress={onToolsPress}
       />
-
       <CustomAlert
         visible={alertState.visible}
         title={alertState.title}
@@ -277,11 +353,8 @@ export const ChatInput: React.FC<ChatInputProps> = ({
       />
     </View>
   );
-
-  return content;
 };
 
 const spotlightStyles = StyleSheet.create({
   centered: { alignSelf: 'center' },
 });
-
diff --git a/src/components/ChatInput/styles.ts b/src/components/ChatInput/styles.ts
index a9f8df69..7aab9a88 100644
--- a/src/components/ChatInput/styles.ts
+++ b/src/components/ChatInput/styles.ts
@@ -1,5 +1,5 @@
 import type { ThemeColors, ThemeShadows } from '../../theme';
-import { FONTS } from '../../constants';
+import { FONTS, TYPOGRAPHY, SPACING } from '../../constants';
 import { Platform } from 'react-native';
 
 export const PILL_ICON_SIZE = 32;
@@ -208,4 +208,25 @@ export const createStyles = (colors: ThemeColors, _shadows: ThemeShadows) => ({
     fontWeight: '500' as const,
     color: colors.primary,
   },
+  // Audio mode layout
+  audioModeRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'center' as const,
+    gap: SPACING.md,
+    paddingVertical: SPACING.xs,
+  },
+  // Voice cycle button — shows icon + voice name
+  audioVoiceButton: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    gap: 4,
+    paddingHorizontal: SPACING.sm,
+    height: 32,
+    borderRadius: 16,
+  },
+  audioVoiceLabel: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textSecondary,
+  },
 });
diff --git a/src/components/ChatInput/useKeyboardAwarePopover.ts b/src/components/ChatInput/useKeyboardAwarePopover.ts
index 13cdfaa4..dc4f0b7b 100644
--- a/src/components/ChatInput/useKeyboardAwarePopover.ts
+++ b/src/components/ChatInput/useKeyboardAwarePopover.ts
@@ -1,13 +1,15 @@
 import { useRef, useEffect, useState, useCallback } from 'react';
 import { Keyboard, Dimensions, Platform, StatusBar, TouchableOpacity } from 'react-native';
-import { SPACING } from '../../constants';
 
 /**
  * Hook that manages keyboard-aware popover positioning.
  * When the keyboard is visible, dismisses it and waits for `keyboardDidHide`
  * before measuring position to ensure correct coordinates.
+ *
+ * anchorY → distance from screen bottom to trigger top (popover sits above trigger)
+ * anchorX → distance from screen right to trigger right edge (popover right-aligns with trigger)
  */
-export function useKeyboardAwarePopover(offsetX: number = SPACING.md) {
+export function useKeyboardAwarePopover() {
     const [anchor, setAnchor] = useState({ y: 0, x: 0 });
     const [visible, setVisible] = useState(false);
     const triggerRef = useRef<React.ElementRef<typeof TouchableOpacity>>(null);
@@ -27,13 +29,15 @@ export function useKeyboardAwarePopover(offsetX: number = SPACING.md) {
 
     const show = useCallback(() => {
         const measureAndShow = () => {
-            triggerRef.current?.measureInWindow?.((...args: number[]) => {
-                const screenH = Dimensions.get('window').height;
-                // On Android, measureInWindow Y includes the status bar but
-                // Dimensions.get('window').height may not — subtract the offset
-                // so the popover sits snugly above the trigger button.
+            triggerRef.current?.measureInWindow?.((btnX: number, btnY: number, btnW: number) => {
+                const { height: screenH, width: screenW } = Dimensions.get('window');
+                // On Android, measureInWindow Y includes the status bar height.
                 const statusBarOffset = Platform.OS === 'android' ? (StatusBar.currentHeight ?? 0) : 0;
-                setAnchor({ y: screenH - (args[1] ?? 0) - statusBarOffset, x: offsetX });
+                // bottom: how far the popover bottom sits above the screen bottom (= above the trigger)
+                const y = screenH - (btnY ?? 0) - statusBarOffset;
+                // right: align popover's right edge with the trigger button's right edge
+                const x = screenW - ((btnX ?? 0) + (btnW ?? 0));
+                setAnchor({ y, x });
             });
             setVisible(true);
         };
@@ -54,7 +58,7 @@ export function useKeyboardAwarePopover(offsetX: number = SPACING.md) {
         } else {
             measureAndShow();
         }
-    }, [offsetX]);
+    }, []);
 
     const hide = useCallback(() => setVisible(false), []);
 
diff --git a/src/components/ChatMessage/components/ActionMenuSheet.tsx b/src/components/ChatMessage/components/ActionMenuSheet.tsx
index 1f380fe2..802bc5db 100644
--- a/src/components/ChatMessage/components/ActionMenuSheet.tsx
+++ b/src/components/ChatMessage/components/ActionMenuSheet.tsx
@@ -12,11 +12,13 @@ interface ActionMenuSheetProps {
   canEdit: boolean;
   canRetry: boolean;
   canGenerateImage: boolean;
+  canSpeak: boolean;
   styles: any;
   onCopy: () => void;
   onEdit: () => void;
   onRetry: () => void;
   onGenerateImage: () => void;
+  onSpeak: () => void;
 }
 
 export function ActionMenuSheet({
@@ -26,11 +28,13 @@ export function ActionMenuSheet({
   canEdit,
   canRetry,
   canGenerateImage,
+  canSpeak,
   styles,
   onCopy,
   onEdit,
   onRetry,
   onGenerateImage,
+  onSpeak,
 }: ActionMenuSheetProps) {
   const { colors } = useTheme();
 
@@ -89,6 +93,18 @@ export function ActionMenuSheet({
             <Text style={styles.actionSheetText}>Generate Image</Text>
           </AnimatedPressable>
         )}
+
+        {!isUser && canSpeak && (
+          <AnimatedPressable
+            testID="action-speak"
+            hapticType="selection"
+            style={styles.actionSheetItem}
+            onPress={onSpeak}
+          >
+            <Icon name="volume-2" size={18} color={colors.textSecondary} />
+            <Text style={styles.actionSheetText}>Speak</Text>
+          </AnimatedPressable>
+        )}
       </View>
     </AppSheet>
   );
diff --git a/src/components/ChatMessage/components/MessageAttachments.tsx b/src/components/ChatMessage/components/MessageAttachments.tsx
index adead2c9..b798a2fc 100644
--- a/src/components/ChatMessage/components/MessageAttachments.tsx
+++ b/src/components/ChatMessage/components/MessageAttachments.tsx
@@ -78,7 +78,22 @@ export function MessageAttachments({
   return (
     <View testID="message-attachments" style={styles.attachmentsContainer}>
       {attachments.map((attachment, index) =>
-        attachment.type === 'document' ? (
+        attachment.type === 'audio' ? (
+          <View
+            key={attachment.id}
+            style={[
+              styles.documentBadge,
+              isUser ? styles.documentBadgeUser : styles.documentBadgeAssistant,
+            ]}
+          >
+            <Icon name="mic" size={14} color={isUser ? colors.background : colors.textSecondary} />
+            <Text
+              style={[styles.documentBadgeText, isUser ? styles.documentBadgeTextUser : styles.documentBadgeTextAssistant]}
+            >
+              Voice message
+            </Text>
+          </View>
+        ) : attachment.type === 'document' ? (
           <TouchableOpacity
             key={attachment.id}
             testID={`document-badge-${index}`}
diff --git a/src/components/ChatMessage/components/MessageContent.tsx b/src/components/ChatMessage/components/MessageContent.tsx
index e2fa7afc..cbaefaac 100644
--- a/src/components/ChatMessage/components/MessageContent.tsx
+++ b/src/components/ChatMessage/components/MessageContent.tsx
@@ -43,6 +43,17 @@ export function MessageContent({
         </Text>
       );
     }
+    // No content but may have thinking — render ThinkingBlock alone (audio mode above-bubble use case)
+    if (parsedContent.thinking) {
+      return (
+        <ThinkingBlock
+          parsedContent={parsedContent}
+          showThinking={showThinking}
+          onToggle={onToggleThinking}
+          styles={styles}
+        />
+      );
+    }
     return null;
   }
 
diff --git a/src/components/ChatMessage/index.tsx b/src/components/ChatMessage/index.tsx
index d80310b7..6a6a20e4 100644
--- a/src/components/ChatMessage/index.tsx
+++ b/src/components/ChatMessage/index.tsx
@@ -1,6 +1,7 @@
 import React, { useState } from 'react';
 import { View, Text, TouchableOpacity, Clipboard } from 'react-native';
 import { useTheme, useThemedStyles } from '../../theme';
+import { useTTSStore } from '../../stores/ttsStore';
 import Icon from 'react-native-vector-icons/Feather';
 import { stripControlTokens } from '../../utils/messageContent';
 import { CustomAlert, showAlert, hideAlert, AlertState, initialAlertState } from '../CustomAlert';
@@ -133,14 +134,16 @@ type MetaRowProps = {
   isStreaming?: boolean;
   showActions: boolean;
   onMenuOpen: () => void;
+  metaExtra?: React.ReactNode;
 };
 
-const MessageMetaRow: React.FC<MetaRowProps> = ({ message, styles, isStreaming, showActions, onMenuOpen }) => (
+const MessageMetaRow: React.FC<MetaRowProps> = ({ message, styles, isStreaming, showActions, onMenuOpen, metaExtra }) => (
   <View style={styles.metaRow}>
     <Text style={styles.timestamp}>{formatTime(message.timestamp)}</Text>
     {message.generationTimeMs != null && message.role === 'assistant' && (
       <Text style={styles.generationTime}>{formatDuration(message.generationTimeMs)}</Text>
     )}
+    {metaExtra}
     {showActions && !isStreaming && (
       <TouchableOpacity style={styles.actionHint} onPress={onMenuOpen}>
         <Text style={styles.actionHintText}>•••</Text>
@@ -157,7 +160,9 @@ const ToolCallWithThinking: React.FC<{
   return (
     <View style={styles.systemInfoContainer}>
       {!!tc?.thinking && (
-        <ThinkingBlock parsedContent={tc} showThinking={showThinking} onToggle={onToggle} styles={styles} />
+        <View style={styles.thinkingBlockWrapper}>
+          <ThinkingBlock parsedContent={tc} showThinking={showThinking} onToggle={onToggle} styles={styles} />
+        </View>
       )}
       {hasText && (
         <View testID="tool-call-pre-text" style={styles.toolCallPreText}>
@@ -179,11 +184,17 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
   onGenerateImage,
   showActions = true,
   canGenerateImage = false,
+  canSpeak: canSpeakProp = false,
+  onSpeak: onSpeakProp,
   showGenerationDetails = false,
   animateEntry = false,
+  metaExtra,
 }) => {
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
+  const ttsCanSpeak = useTTSStore(
+    s => s.settings.enabled && s.isBackboneDownloaded && s.isVocoderDownloaded,
+  );
   const [showActionMenu, setShowActionMenu] = useState(false);
   const [isEditing, setIsEditing] = useState(false);
   const [editedContent, setEditedContent] = useState(message.content);
@@ -242,6 +253,22 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
     setShowActionMenu(false);
   };
 
+  const canSpeak = !isUser && !isStreaming && (canSpeakProp || ttsCanSpeak);
+
+  const handleSpeak = () => {
+    setShowActionMenu(false);
+    if (onSpeakProp) {
+      onSpeakProp();
+      return;
+    }
+    const tts = useTTSStore.getState();
+    if (!tts.isModelLoaded) {
+      tts.loadModels().then(() => useTTSStore.getState().speak(displayContent, message.id));
+    } else {
+      tts.speak(displayContent, message.id);
+    }
+  };
+
   if (message.isSystemInfo) {
     return <SystemInfoMessage content={displayContent} styles={styles}
       alertState={alertState} onCloseAlert={() => setAlertState(hideAlert())} />;
@@ -291,6 +318,7 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
         isStreaming={isStreaming}
         showActions={showActions}
         onMenuOpen={() => setShowActionMenu(true)}
+        metaExtra={metaExtra}
       />
 
       {showGenerationDetails && !isUser && message.generationMeta && (
@@ -310,11 +338,13 @@ export const ChatMessage: React.FC<ChatMessageProps> = ({
         canEdit={!!onEdit}
         canRetry={!!onRetry}
         canGenerateImage={canGenerateImage && !!onGenerateImage}
+        canSpeak={canSpeak}
         styles={styles}
         onCopy={handleCopy}
         onEdit={handleEdit}
         onRetry={handleRetry}
         onGenerateImage={handleGenerateImage}
+        onSpeak={handleSpeak}
       />
       <EditSheet
         visible={isEditing}
diff --git a/src/components/ChatMessage/styles.ts b/src/components/ChatMessage/styles.ts
index 83c79a22..3b331281 100644
--- a/src/components/ChatMessage/styles.ts
+++ b/src/components/ChatMessage/styles.ts
@@ -174,6 +174,11 @@ const createThinkingStyles = (colors: ThemeColors) => ({
     overflow: 'hidden' as const,
     width: '100%' as const,
   },
+  /** Constrains the ThinkingBlock when rendered outside a message bubble (e.g. ToolCallWithThinking) */
+  thinkingBlockWrapper: {
+    width: '88%' as const,
+    alignSelf: 'flex-start' as const,
+  },
   thinkingHeader: {
     flexDirection: 'row' as const,
     alignItems: 'flex-start' as const,
diff --git a/src/components/ChatMessage/types.ts b/src/components/ChatMessage/types.ts
index f93ef8ec..becd367a 100644
--- a/src/components/ChatMessage/types.ts
+++ b/src/components/ChatMessage/types.ts
@@ -10,8 +10,12 @@ export interface ChatMessageProps {
   onGenerateImage?: (prompt: string) => void;
   showActions?: boolean;
   canGenerateImage?: boolean;
+  canSpeak?: boolean;
+  onSpeak?: () => void;
   showGenerationDetails?: boolean;
   animateEntry?: boolean;
+  /** Extra element rendered at the end of the meta row (e.g. TTSButton) */
+  metaExtra?: React.ReactNode;
 }
 
 export interface ParsedContent {
diff --git a/src/components/GenerationSettingsModal/ImageQualitySliders.tsx b/src/components/GenerationSettingsModal/ImageQualitySliders.tsx
index f1e0544d..2feac93a 100644
--- a/src/components/GenerationSettingsModal/ImageQualitySliders.tsx
+++ b/src/components/GenerationSettingsModal/ImageQualitySliders.tsx
@@ -1,6 +1,6 @@
 import React from 'react';
 import { View, Text, Switch, Platform, TouchableOpacity } from 'react-native';
-import Slider from '@react-native-community/slider';
+import { NumericStepper } from '../NumericStepper';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { useClearGpuCache } from '../../hooks/useImageGenerationSettings';
@@ -24,70 +24,38 @@ const ClearGPUCacheButton: React.FC = () => {
   );
 };
 
-/** Basic sliders: Image Steps + Image Size */
+/** Basic controls: Image Steps + Image Size */
 export const ImageQualityBasicSliders: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
 
   return (
     <>
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Image Steps</Text>
-          <Text style={styles.settingValue}>{settings.imageSteps || 8}</Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          4-8 steps for speed, 20-50 for quality
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={4}
-          maximumValue={50}
-          step={1}
+        <Text style={styles.settingLabel}>Image Steps</Text>
+        <Text style={styles.settingDescription}>4-8 steps for speed, 20-50 for quality</Text>
+        <NumericStepper
           value={settings.imageSteps || 8}
-          onSlidingComplete={(value) => updateSettings({ imageSteps: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={4} max={50} step={1}
+          onChange={(value) => updateSettings({ imageSteps: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>4</Text>
-          <Text style={styles.sliderMinMax}>50</Text>
-        </View>
       </View>
 
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Image Size</Text>
-          <Text style={styles.settingValue}>
-            {settings.imageWidth ?? 256}x{settings.imageHeight ?? 256}
-          </Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          Output resolution (smaller = faster, larger = more detail)
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={128}
-          maximumValue={512}
-          step={64}
+        <Text style={styles.settingLabel}>Image Size</Text>
+        <Text style={styles.settingDescription}>Output resolution (smaller = faster, larger = more detail)</Text>
+        <NumericStepper
           value={settings.imageWidth ?? 256}
-          onSlidingComplete={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={128} max={512} step={64}
+          formatValue={(v) => `${v}x${v}`}
+          onChange={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>128</Text>
-          <Text style={styles.sliderMinMax}>512</Text>
-        </View>
       </View>
     </>
   );
 };
 
-/** Advanced sliders: Guidance Scale, Image Threads, GPU Acceleration */
+/** Advanced controls: Guidance Scale, Image Threads, GPU Acceleration */
 export const ImageQualityAdvancedSliders: React.FC = () => {
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
@@ -96,53 +64,23 @@ export const ImageQualityAdvancedSliders: React.FC = () => {
   return (
     <>
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Guidance Scale</Text>
-          <Text style={styles.settingValue}>{(settings.imageGuidanceScale || 7.5).toFixed(1)}</Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          Higher = follows prompt more strictly (5-15 range)
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={20}
-          step={0.5}
+        <Text style={styles.settingLabel}>Guidance Scale</Text>
+        <Text style={styles.settingDescription}>Higher = follows prompt more strictly (5-15 range)</Text>
+        <NumericStepper
           value={settings.imageGuidanceScale || 7.5}
-          onSlidingComplete={(value) => updateSettings({ imageGuidanceScale: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={1} max={20} step={0.5} decimals={1}
+          onChange={(value) => updateSettings({ imageGuidanceScale: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>1</Text>
-          <Text style={styles.sliderMinMax}>20</Text>
-        </View>
       </View>
 
       <View style={styles.settingGroup}>
-        <View style={styles.settingHeader}>
-          <Text style={styles.settingLabel}>Image Threads</Text>
-          <Text style={styles.settingValue}>{settings.imageThreads ?? 4}</Text>
-        </View>
-        <Text style={styles.settingDescription}>
-          CPU threads used for image generation. Takes effect next time the image model loads.
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={8}
-          step={1}
+        <Text style={styles.settingLabel}>Image Threads</Text>
+        <Text style={styles.settingDescription}>CPU threads used for image generation. Takes effect next time the image model loads.</Text>
+        <NumericStepper
           value={settings.imageThreads ?? 4}
-          onSlidingComplete={(value) => updateSettings({ imageThreads: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surfaceLight}
-          thumbTintColor={colors.primary}
+          min={1} max={8} step={1}
+          onChange={(value) => updateSettings({ imageThreads: value })}
         />
-        <View style={styles.sliderLabels}>
-          <Text style={styles.sliderMinMax}>1</Text>
-          <Text style={styles.sliderMinMax}>8</Text>
-        </View>
       </View>
 
       {Platform.OS === 'android' && (
@@ -157,7 +95,7 @@ export const ImageQualityAdvancedSliders: React.FC = () => {
             />
           </View>
           <Text style={styles.settingDescription}>
-            Use GPU for faster image generation. First run may be slower while optimizing for your device. For best performance, use NPU models on supported Snapdragon devices.
+            Use GPU for faster image generation. First run may be slower while optimizing for your device.
           </Text>
           {(settings.imageUseOpenCL ?? true) && <ClearGPUCacheButton />}
         </View>
diff --git a/src/components/GenerationSettingsModal/TTSSection.tsx b/src/components/GenerationSettingsModal/TTSSection.tsx
new file mode 100644
index 00000000..a4a7af8d
--- /dev/null
+++ b/src/components/GenerationSettingsModal/TTSSection.tsx
@@ -0,0 +1,250 @@
+import React from 'react';
+import { View, Text, Switch, TouchableOpacity, ActivityIndicator } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
+import { NumericStepper } from '../NumericStepper';
+import { useTheme, useThemedStyles } from '../../theme';
+import type { ThemeColors, ThemeShadows } from '../../theme';
+import { SPACING } from '../../constants';
+import { useTTSStore } from '../../stores/ttsStore';
+import { KOKORO_VOICES, isExecutorchSupported } from '../../constants/kokoroModels';
+import type { KokoroVoiceId } from '../../constants/kokoroModels';
+import { createStyles as createModalStyles } from './styles';
+
+const createLocalStyles = (colors: ThemeColors, _shadows: ThemeShadows) => ({
+  modeChipDisabled: { opacity: 0.4 as const },
+  linkButton: {
+    alignSelf: 'flex-start' as const,
+    paddingHorizontal: SPACING.md,
+    paddingVertical: SPACING.sm,
+    borderRadius: 8,
+    borderWidth: 1,
+    borderColor: colors.border,
+    marginTop: SPACING.sm,
+  },
+  linkButtonRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.xs },
+  flex1: { flex: 1 },
+  toggleRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+    marginBottom: SPACING.lg,
+  },
+  toggleInfo: { flex: 1 },
+  noBottomMargin: { marginBottom: 0 },
+  divider: { height: 1, backgroundColor: colors.border, marginBottom: SPACING.lg },
+  voiceRow: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+    paddingVertical: SPACING.sm,
+  },
+  voiceRowBorder: { borderTopWidth: 1, borderTopColor: colors.border },
+  voiceInfo: { flex: 1 },
+  voiceName: { fontSize: 13, color: colors.text },
+  voiceMeta: { fontSize: 11, color: colors.textMuted, marginTop: 2 },
+  voiceSectionHeader: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    justifyContent: 'space-between' as const,
+    marginBottom: SPACING.sm,
+  },
+  voiceSectionLabel: { fontSize: 11, color: colors.textMuted, textTransform: 'uppercase' as const, letterSpacing: 0.3 },
+  downloadRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.sm, marginBottom: SPACING.md },
+  downloadText: { fontSize: 12, color: colors.textSecondary, flex: 1 },
+});
+
+// ─── Mode Picker ──────────────────────────────────────────────────────────────
+
+const ModePicker: React.FC<{ areBothDownloaded: boolean }> = ({ areBothDownloaded }) => {
+  const modal = useThemedStyles(createModalStyles);
+  const local = useThemedStyles(createLocalStyles);
+  const {
+    settings, updateSettings,
+    isModelLoaded, loadModels, unloadModels,
+    kokoroReady,
+  } = useTTSStore();
+  const mode = settings.interfaceMode;
+  // Audio mode needs OuteTTS (waveform generation)
+  const audioEnabled = areBothDownloaded;
+
+  const handleModeChange = (next: 'chat' | 'audio') => {
+    if (next === 'audio' && !audioEnabled) { return; }
+    updateSettings({ interfaceMode: next });
+    if (next === 'audio' && !isModelLoaded && areBothDownloaded) { loadModels(); }
+    if (next === 'chat' && isModelLoaded && !kokoroReady) { unloadModels(); }
+  };
+
+  return (
+    <View style={modal.modeToggleContainer}>
+      <View style={modal.modeToggleInfo}>
+        <Text style={modal.modeToggleLabel}>Interface Mode</Text>
+        <Text style={modal.modeToggleDesc}>
+          {mode === 'audio'
+            ? 'Audio Mode — responses rendered as voice notes'
+            : 'Chat Mode — play button added to text messages'}
+        </Text>
+      </View>
+      <View style={modal.modeToggleButtons}>
+        {(['chat', 'audio'] as const).map((m) => {
+          const active = mode === m;
+          const disabled = m === 'audio' && !audioEnabled;
+          return (
+            <TouchableOpacity
+              key={m}
+              style={[modal.modeButton, active && modal.modeButtonActive, disabled && local.modeChipDisabled]}
+              onPress={() => handleModeChange(m)}
+              disabled={disabled}
+            >
+              <Text style={[modal.modeButtonText, active && modal.modeButtonTextActive]}>
+                {m === 'chat' ? 'Chat' : 'Audio'}
+              </Text>
+            </TouchableOpacity>
+          );
+        })}
+      </View>
+    </View>
+  );
+};
+
+// ─── Voice Picker ─────────────────────────────────────────────────────────────
+
+const VoicePicker: React.FC = () => {
+  const { colors } = useTheme();
+  const local = useThemedStyles(createLocalStyles);
+  const { settings, updateSettings, kokoroReady, kokoroDownloadProgress, kokoroActiveVoiceId } = useTTSStore();
+  const isChangingVoice = settings.kokoroVoiceId !== kokoroActiveVoiceId;
+  const supported = isExecutorchSupported();
+
+  return (
+    <View>
+      <View style={local.voiceSectionHeader}>
+        <Text style={local.voiceSectionLabel}>Voice</Text>
+        {supported && !kokoroReady && (
+          kokoroDownloadProgress > 0
+            ? <Text style={local.voiceSectionLabel}>{Math.round(kokoroDownloadProgress * 100)}%</Text>
+            : <ActivityIndicator size="small" color={colors.textMuted} />
+        )}
+        {supported && kokoroReady && (
+          <Icon name="check-circle" size={12} color={colors.primary} />
+        )}
+        {!supported && (
+          <Text style={local.voiceSectionLabel}>Android 13+ only</Text>
+        )}
+      </View>
+
+      {KOKORO_VOICES.map((voice, i) => {
+        const active = settings.kokoroVoiceId === voice.id;
+        return (
+          <TouchableOpacity
+            key={voice.id}
+            style={[local.voiceRow, i > 0 && local.voiceRowBorder]}
+            onPress={() => updateSettings({ kokoroVoiceId: voice.id as KokoroVoiceId })}
+            disabled={!supported}
+          >
+            <View style={local.voiceInfo}>
+              <Text style={[local.voiceName, { color: supported ? colors.text : colors.textMuted }]}>
+                {voice.label}
+              </Text>
+              <Text style={local.voiceMeta}>{voice.accent} · {voice.gender}</Text>
+            </View>
+            {active && (
+              isChangingVoice
+                ? <ActivityIndicator size="small" color={colors.primary} />
+                : <Icon name="check" size={13} color={colors.primary} />
+            )}
+          </TouchableOpacity>
+        );
+      })}
+
+      <View style={[local.divider, { marginTop: SPACING.md }]} />
+    </View>
+  );
+};
+
+// ─── Main TTS Section ─────────────────────────────────────────────────────────
+
+interface TTSSectionProps {
+  onNavigateToTTSSettings?: () => void;
+}
+
+export const TTSSection: React.FC<TTSSectionProps> = ({ onNavigateToTTSSettings }) => {
+  const { colors } = useTheme();
+  const modal = useThemedStyles(createModalStyles);
+  const local = useThemedStyles(createLocalStyles);
+  const {
+    settings, updateSettings,
+    isBackboneDownloaded, isVocoderDownloaded,
+    kokoroReady,
+  } = useTTSStore();
+
+  const areBothDownloaded = isBackboneDownloaded && isVocoderDownloaded;
+  const hasAnySpeech = kokoroReady || areBothDownloaded;
+  const trackColor = { false: colors.surfaceLight, true: `${colors.primary}80` };
+  const isChatMode = settings.interfaceMode === 'chat';
+
+  if (!hasAnySpeech) {
+    return (
+      <View style={modal.sectionCard}>
+        <Text style={modal.settingDescription}>
+          No voice models downloaded. Go to TTS Settings to download them.
+        </Text>
+        {onNavigateToTTSSettings && (
+          <TouchableOpacity style={local.linkButton} onPress={onNavigateToTTSSettings}>
+            <View style={local.linkButtonRow}>
+              <Icon name="external-link" size={13} color={colors.textSecondary} />
+              <Text style={modal.modeButtonText}>TTS Settings</Text>
+            </View>
+          </TouchableOpacity>
+        )}
+      </View>
+    );
+  }
+
+  return (
+    <View style={modal.sectionCard}>
+      <ModePicker areBothDownloaded={areBothDownloaded} />
+
+      {isChatMode && (
+        <View style={local.toggleRow}>
+          <View style={local.toggleInfo}>
+            <Text style={modal.modeToggleLabel}>Enable TTS</Text>
+            <Text style={modal.modeToggleDesc}>Show play buttons on assistant messages</Text>
+          </View>
+          <Switch
+            value={settings.enabled}
+            onValueChange={(v) => updateSettings({ enabled: v })}
+            trackColor={trackColor}
+            thumbColor={settings.enabled ? colors.primary : colors.textMuted}
+          />
+        </View>
+      )}
+
+      <VoicePicker />
+
+      <View style={modal.settingGroup}>
+        <Text style={modal.settingLabel}>Speed</Text>
+        <NumericStepper
+          value={settings.speed}
+          min={0.5} max={2.0} step={0.1} decimals={1}
+          formatValue={(v) => `${v.toFixed(1)}x`}
+          onChange={(v) => updateSettings({ speed: v })}
+        />
+      </View>
+
+      {isChatMode && (
+        <View style={[local.toggleRow, local.noBottomMargin]}>
+          <View style={local.toggleInfo}>
+            <Text style={modal.modeToggleLabel}>Auto-play</Text>
+            <Text style={modal.modeToggleDesc}>Speak AI responses automatically</Text>
+          </View>
+          <Switch
+            value={settings.autoPlay}
+            onValueChange={(v) => updateSettings({ autoPlay: v })}
+            trackColor={trackColor}
+            thumbColor={settings.autoPlay ? colors.primary : colors.textMuted}
+          />
+        </View>
+      )}
+    </View>
+  );
+};
diff --git a/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx b/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx
index 0b017e57..3d44a999 100644
--- a/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx
+++ b/src/components/GenerationSettingsModal/TextGenerationAdvanced.tsx
@@ -1,7 +1,7 @@
 import React from 'react';
 import { View, Text, TouchableOpacity } from 'react-native';
-import Slider from '@react-native-community/slider';
-import { useTheme, useThemedStyles } from '../../theme';
+import { NumericStepper } from '../NumericStepper';
+import { useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { CacheType } from '../../types';
 import {
@@ -15,7 +15,6 @@ import { createStyles } from './styles';
 // ─── GPU Acceleration ─────────────────────────────────────────────────────────
 
 export const GpuAccelerationToggle: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
   const { gpuLayersEffective, handleGpuToggle } = useTextGenerationAdvanced();
@@ -51,24 +50,15 @@ export const GpuAccelerationToggle: React.FC = () => {
 
       {settings.enableGpu && (
         <View style={styles.gpuLayersInline}>
-          <View style={styles.settingHeader}>
-            <Text style={styles.settingLabel}>GPU Layers</Text>
-            <Text style={styles.settingValue}>{gpuLayersEffective}</Text>
-          </View>
+          <Text style={styles.settingLabel}>GPU Layers</Text>
           <Text style={styles.settingDescription}>
             Layers offloaded to GPU. Higher = faster but may crash on low-VRAM devices. Requires model reload.
           </Text>
-          <Slider
-            testID="gpu-layers-slider"
-            style={styles.slider}
-            minimumValue={1}
-            maximumValue={GPU_LAYERS_MAX}
-            step={1}
+          <NumericStepper
+            testID="gpu-layers-stepper"
             value={gpuLayersEffective}
-            onSlidingComplete={(value: number) => updateSettings({ gpuLayers: value })}
-            minimumTrackTintColor={colors.primary}
-            maximumTrackTintColor={colors.surfaceLight}
-            thumbTintColor={colors.primary}
+            min={1} max={GPU_LAYERS_MAX} step={1}
+            onChange={(value) => updateSettings({ gpuLayers: value })}
           />
         </View>
       )}
@@ -199,56 +189,34 @@ export const ModelLoadingStrategyToggle: React.FC = () => {
 // ─── CPU Threads & Batch Size ────────────────────────────────────────────────
 
 export const CpuThreadsSlider: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
-  const value = settings.nThreads ?? 6;
 
   return (
     <View style={styles.modeToggleContainer}>
-      <View style={styles.settingHeader}>
-        <Text style={styles.settingLabel}>CPU Threads</Text>
-        <Text style={styles.settingValue}>{value}</Text>
-      </View>
+      <Text style={styles.settingLabel}>CPU Threads</Text>
       <Text style={styles.settingDescription}>Parallel threads for inference</Text>
-      <Slider
-        style={styles.slider}
-        minimumValue={1}
-        maximumValue={12}
-        step={1}
-        value={value}
-        onSlidingComplete={(v: number) => updateSettings({ nThreads: v })}
-        minimumTrackTintColor={colors.primary}
-        maximumTrackTintColor={colors.surfaceLight}
-        thumbTintColor={colors.primary}
+      <NumericStepper
+        value={settings.nThreads ?? 6}
+        min={1} max={12} step={1}
+        onChange={(v) => updateSettings({ nThreads: v })}
       />
     </View>
   );
 };
 
 export const BatchSizeSlider: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
-  const value = settings.nBatch ?? 512;
 
   return (
     <View style={styles.modeToggleContainer}>
-      <View style={styles.settingHeader}>
-        <Text style={styles.settingLabel}>Batch Size</Text>
-        <Text style={styles.settingValue}>{value}</Text>
-      </View>
+      <Text style={styles.settingLabel}>Batch Size</Text>
       <Text style={styles.settingDescription}>Tokens processed per batch</Text>
-      <Slider
-        style={styles.slider}
-        minimumValue={32}
-        maximumValue={512}
-        step={32}
-        value={value}
-        onSlidingComplete={(v: number) => updateSettings({ nBatch: v })}
-        minimumTrackTintColor={colors.primary}
-        maximumTrackTintColor={colors.surfaceLight}
-        thumbTintColor={colors.primary}
+      <NumericStepper
+        value={settings.nBatch ?? 512}
+        min={32} max={512} step={32}
+        onChange={(v) => updateSettings({ nBatch: v })}
       />
     </View>
   );
diff --git a/src/components/GenerationSettingsModal/TextGenerationSection.tsx b/src/components/GenerationSettingsModal/TextGenerationSection.tsx
index 18ed0c03..9ef8070d 100644
--- a/src/components/GenerationSettingsModal/TextGenerationSection.tsx
+++ b/src/components/GenerationSettingsModal/TextGenerationSection.tsx
@@ -1,6 +1,6 @@
 import React, { useState } from 'react';
 import { View, Text, TouchableOpacity, Platform } from 'react-native';
-import Slider from '@react-native-community/slider';
+import { NumericStepper } from '../NumericStepper';
 import { AdvancedToggle } from '../AdvancedToggle';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
@@ -103,35 +103,23 @@ const SettingSlider: React.FC<SettingSliderProps> = ({ config }) => {
   const rawValue = (settings as Record<string, unknown>)[config.key];
   const value = (rawValue ?? DEFAULT_SETTINGS[config.key]) as number;
   const warningText = config.warning?.(value) ?? null;
+  const decimals = config.step < 1 ? 2 : 0;
 
   return (
     <View style={styles.settingGroup}>
-      <View style={styles.settingHeader}>
-        <Text style={styles.settingLabel}>{config.label}</Text>
-        <Text style={styles.settingValue}>{config.format(value)}</Text>
-      </View>
+      <Text style={styles.settingLabel}>{config.label}</Text>
       {config.description && (
         <Text style={styles.settingDescription}>{config.description}</Text>
       )}
       {warningText && (
         <Text style={[styles.settingDescription, { color: colors.error }]}>{warningText}</Text>
       )}
-      <Slider
-        style={styles.slider}
-        minimumValue={config.min}
-        maximumValue={config.max}
-        step={config.step}
+      <NumericStepper
         value={value}
-        onValueChange={(v) => updateSettings({ [config.key]: v })}
-        onSlidingComplete={() => {}}
-        minimumTrackTintColor={colors.primary}
-        maximumTrackTintColor={colors.surfaceLight}
-        thumbTintColor={colors.primary}
+        min={config.min} max={config.max} step={config.step} decimals={decimals}
+        formatValue={config.format}
+        onChange={(v) => updateSettings({ [config.key]: v })}
       />
-      <View style={styles.sliderLabels}>
-        <Text style={styles.sliderMinMax}>{config.format(config.min)}</Text>
-        <Text style={styles.sliderMinMax}>{config.format(config.max)}</Text>
-      </View>
     </View>
   );
 };
diff --git a/src/components/GenerationSettingsModal/index.tsx b/src/components/GenerationSettingsModal/index.tsx
index b23a3b74..fa54ea96 100644
--- a/src/components/GenerationSettingsModal/index.tsx
+++ b/src/components/GenerationSettingsModal/index.tsx
@@ -9,6 +9,7 @@ import { createStyles } from './styles';
 import { ConversationActionsSection } from './ConversationActionsSection';
 import { ImageGenerationSection } from './ImageGenerationSection';
 import { TextGenerationSection } from './TextGenerationSection';
+import { TTSSection } from './TTSSection';
 
 const DEFAULT_SETTINGS = {
   temperature: 0.7,
@@ -26,6 +27,7 @@ interface GenerationSettingsModalProps {
   onOpenProject?: () => void;
   onOpenGallery?: () => void;
   onDeleteConversation?: () => void;
+  onOpenTTSSettings?: () => void;
   conversationImageCount?: number;
   activeProjectName?: string | null;
   isRemote?: boolean;
@@ -37,6 +39,7 @@ export const GenerationSettingsModal: React.FC<GenerationSettingsModalProps> = (
   onOpenProject,
   onOpenGallery,
   onDeleteConversation,
+  onOpenTTSSettings,
   conversationImageCount = 0,
   activeProjectName,
   isRemote,
@@ -48,6 +51,7 @@ export const GenerationSettingsModal: React.FC<GenerationSettingsModalProps> = (
   const [performanceStats, setPerformanceStats] = useState(llmService.getPerformanceStats());
   const [imageSettingsOpen, setImageSettingsOpen] = useState(false);
   const [textSettingsOpen, setTextSettingsOpen] = useState(false);
+  const [ttsSettingsOpen, setTtsSettingsOpen] = useState(false);
 
   useEffect(() => {
     if (visible) {
@@ -144,6 +148,23 @@ export const GenerationSettingsModal: React.FC<GenerationSettingsModalProps> = (
           </>
         )}
 
+        {/* TTS SETTINGS */}
+        <TouchableOpacity
+          style={styles.accordionHeader}
+          onPress={() => setTtsSettingsOpen(!ttsSettingsOpen)}
+          activeOpacity={0.7}
+        >
+          <Text style={styles.accordionTitle}>TEXT TO SPEECH</Text>
+          <Icon
+            name={ttsSettingsOpen ? 'chevron-up' : 'chevron-down'}
+            size={16}
+            color={colors.textMuted}
+          />
+        </TouchableOpacity>
+        {ttsSettingsOpen && (
+          <TTSSection onNavigateToTTSSettings={onOpenTTSSettings} />
+        )}
+
         <TouchableOpacity style={styles.resetButton} onPress={handleResetDefaults}>
           <Text style={styles.resetButtonText}>Reset to Defaults</Text>
         </TouchableOpacity>
diff --git a/src/components/KokoroTTSManager.tsx b/src/components/KokoroTTSManager.tsx
new file mode 100644
index 00000000..77799daf
--- /dev/null
+++ b/src/components/KokoroTTSManager.tsx
@@ -0,0 +1,171 @@
+/**
+ * KokoroTTSManager
+ *
+ * Mounts the react-native-executorch useTextToSpeech hook and exposes its
+ * speak/stop methods via module-level refs so they can be called from the
+ * ttsStore without a React context dependency.
+ *
+ * Mount exactly once, near the root (App.tsx), only on supported platforms.
+ * On Android <26 / iOS <17 this component should not be rendered at all.
+ *
+ * Voice changes use a key-based remount strategy: the outer component manages
+ * voice switching with a cooldown, then remounts the inner component with a new
+ * key so executorch gets a clean teardown/init cycle (avoids native SIGSEGV).
+ */
+import React, { useEffect, useRef } from 'react';
+import { useTextToSpeech } from 'react-native-executorch';
+import { AudioContext } from 'react-native-audio-api';
+import { useTTSStore } from '../stores/ttsStore';
+import { KOKORO_MEDIUM, getKokoroVoiceConfig } from '../constants/kokoroModels';
+import type { KokoroVoiceId } from '../constants/kokoroModels';
+import logger from '../utils/logger';
+
+// ─── Module-level refs (callable from ttsStore without React context) ─────────
+
+let _streamFn: ((text: string, speed: number) => Promise<void>) | null = null;
+let _stopFn: ((instant?: boolean) => void) | null = null;
+let _audioCtxRef: { current: AudioContext | null } = { current: null };
+// Pending onNext resolvers — force-resolved on stop so isSpeaking is always cleared
+const _pendingResolvers: Set<() => void> = new Set();
+// When true, onEnd skips ctx.suspend() so the next chunk can start cleanly
+let _skipSuspendOnEnd = false;
+/** Timestamp of the last stream completion/stop — used by voice change cooldown */
+let _lastStreamEndTime = 0;
+
+export const kokoroRef = {
+  speak: (text: string, speed = 1.0): Promise<void> =>
+    _streamFn ? _streamFn(text, speed) : Promise.resolve(),
+  /** Call before sequential chunks to prevent AudioContext suspension between them */
+  setKeepAlive: (keepAlive: boolean) => { _skipSuspendOnEnd = keepAlive; },
+  stop: (instant = true) => {
+    _pendingResolvers.forEach((resolve) => resolve());
+    _pendingResolvers.clear();
+    _stopFn?.(instant);
+    _lastStreamEndTime = Date.now();
+  },
+  /** Pause playback — suspends AudioContext, Kokoro waits for onNext to resolve */
+  pause: () => { _audioCtxRef.current?.suspend().catch(() => {}); },
+  /** Resume playback — AudioContext resumes, current chunk finishes, Kokoro continues */
+  resume: () => { _audioCtxRef.current?.resume().catch(() => {}); },
+};
+
+// ─── Inner component — holds the useTextToSpeech hook for a single voice ─────
+
+const KokoroTTSInner: React.FC<{ voiceId: KokoroVoiceId }> = ({ voiceId }) => {
+  const audioCtxRef = useRef<AudioContext | null>(null);
+  _audioCtxRef = audioCtxRef;
+
+  const tts = useTextToSpeech({
+    model: KOKORO_MEDIUM,
+    voice: getKokoroVoiceConfig(voiceId),
+  });
+
+  // Sync isReady + downloadProgress into ttsStore
+  useEffect(() => {
+    logger.log('[Kokoro] isReady=', tts.isReady, 'downloadProgress=', tts.downloadProgress, 'voiceId=', voiceId);
+    useTTSStore.getState().setKokoroState(tts.isReady, tts.downloadProgress);
+    if (tts.isReady) {
+      logger.log('[Kokoro] Setting kokoroActiveVoiceId to', voiceId);
+      useTTSStore.getState().setKokoroActiveVoiceId(voiceId);
+    }
+  }, [tts.isReady, tts.downloadProgress, voiceId]);
+
+  useEffect(() => {
+    if (tts.error) {
+      logger.warn('[Kokoro] Runtime error — falling back to OuteTTS:', tts.error);
+      useTTSStore.getState().setKokoroState(false, 0);
+    }
+  }, [tts.error]);
+
+  // Keep module refs pointing to the latest hook functions on every render
+  _streamFn = async (text: string, speed: number) => {
+    if (!audioCtxRef.current || audioCtxRef.current.state === 'closed') {
+      audioCtxRef.current = new AudioContext({ sampleRate: 24000 });
+    } else if (audioCtxRef.current.state === 'suspended') {
+      await audioCtxRef.current.resume().catch(() => {});
+    }
+    const ctx = audioCtxRef.current;
+
+    try {
+      await tts.stream({
+        text,
+        speed,
+        onNext: (chunk: Float32Array) =>
+          new Promise<void>((resolve) => {
+            _pendingResolvers.add(resolve);
+            const done = () => { _pendingResolvers.delete(resolve); resolve(); };
+            useTTSStore.getState().setAudioPlaying(true);
+            const currentSpeed = useTTSStore.getState().settings.speed;
+            const buffer = ctx.createBuffer(1, chunk.length, 24000);
+            buffer.copyToChannel(chunk, 0);
+            const source = ctx.createBufferSource();
+            source.buffer = buffer;
+            source.playbackRate.value = currentSpeed;
+            source.connect(ctx.destination);
+            source.onEnded = done;
+            source.start();
+          }),
+        onEnd: async () => {
+          if (!_skipSuspendOnEnd) {
+            await ctx.suspend().catch(() => {});
+          }
+        },
+      });
+    } catch (err) {
+      logger.error('[Kokoro] stream error:', err);
+      throw err;
+    }
+  };
+
+  _stopFn = (instant = true) => {
+    tts.streamStop(instant);
+    audioCtxRef.current?.close().catch(() => {});
+    audioCtxRef.current = null;
+  };
+
+  // Clear refs on unmount so stale closures don't fire during voice switch
+  useEffect(() => {
+    return () => {
+      logger.log('[Kokoro] Inner unmounting, clearing refs');
+      _streamFn = null;
+      _stopFn = null;
+    };
+  }, []);
+
+  return null;
+};
+
+// ─── Outer component — manages voice switching via key-based remount ─────────
+
+export const KokoroTTSManager: React.FC = () => {
+  const kokoroVoiceId = useTTSStore(s => s.settings.kokoroVoiceId) as KokoroVoiceId;
+  const isSpeaking = useTTSStore(s => s.isSpeaking);
+
+  // activeVoiceId controls which voice the inner component is mounted with.
+  // Changed only after a cooldown to give executorch time to clean up.
+  const [activeVoiceId, setActiveVoiceId] = React.useState(kokoroVoiceId);
+  const cooldownRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
+  React.useEffect(() => {
+    logger.log('[Kokoro] Voice effect: kokoroVoiceId=', kokoroVoiceId, 'activeVoiceId=', activeVoiceId, 'isSpeaking=', isSpeaking);
+    if (isSpeaking || kokoroVoiceId === activeVoiceId) {
+      if (cooldownRef.current) { clearTimeout(cooldownRef.current); cooldownRef.current = null; }
+      return;
+    }
+    const elapsed = Date.now() - _lastStreamEndTime;
+    const waitMs = Math.max(100, 2000 - elapsed);
+    logger.log('[Kokoro] Starting voice change cooldown:', waitMs, 'ms');
+    // Mark Kokoro as not ready during the switch so UI shows loader
+    useTTSStore.getState().setKokoroState(false, 0);
+    cooldownRef.current = setTimeout(() => {
+      logger.log('[Kokoro] Cooldown done, remounting with voice', kokoroVoiceId);
+      setActiveVoiceId(kokoroVoiceId);
+      cooldownRef.current = null;
+    }, waitMs);
+    return () => { if (cooldownRef.current) { clearTimeout(cooldownRef.current); cooldownRef.current = null; } };
+  }, [kokoroVoiceId, isSpeaking, activeVoiceId]);
+
+  // Key-based remount: when activeVoiceId changes, the inner component
+  // fully unmounts (executorch teardown) then remounts (fresh init).
+  return <KokoroTTSInner key={activeVoiceId} voiceId={activeVoiceId} />;
+};
diff --git a/src/components/MarkdownText.tsx b/src/components/MarkdownText.tsx
index 78d6c9ae..233a606a 100644
--- a/src/components/MarkdownText.tsx
+++ b/src/components/MarkdownText.tsx
@@ -1,5 +1,5 @@
 import React, { useCallback, useMemo } from 'react';
-import { Linking, Pressable, Text, StyleSheet } from 'react-native';
+import { Linking, Text } from 'react-native';
 import Markdown from '@ronradtke/react-native-markdown-display';
 import { useTheme } from '../theme';
 import type { ThemeColors } from '../theme';
@@ -14,21 +14,17 @@ export function preprocessMarkdown(text: string): string {
   return text.replaceAll(/(\d)\*(?=\d)/g, String.raw`$1\*`);
 }
 
-const linkWrapperStyles = StyleSheet.create({
-  pressable: { flexShrink: 1, paddingBottom: 6 },
-});
-
-/** Custom link rule that constrains the Pressable wrapper width */
+/** Custom link rule — renders as inline Text so it wraps correctly inside list items */
 function createLinkRule(onPress: (url: string) => void) {
-  return (node: any, renderChildren: any, _parent: any) => (
-    <Pressable
+  return (node: any, children: any, ...[, styles]: any[]) => (
+    <Text
       key={node.key}
       accessibilityRole="link"
-      style={linkWrapperStyles.pressable}
+      style={styles.link}
       onPress={() => onPress(node.attributes?.href ?? '')}
     >
-      <Text>{renderChildren}</Text>
-    </Pressable>
+      {children}
+    </Text>
   );
 }
 
diff --git a/src/components/NumericStepper.tsx b/src/components/NumericStepper.tsx
new file mode 100644
index 00000000..342cc669
--- /dev/null
+++ b/src/components/NumericStepper.tsx
@@ -0,0 +1,105 @@
+import React from 'react';
+import { View, Text, TouchableOpacity, StyleSheet } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTheme } from '../theme';
+import { TYPOGRAPHY, SPACING } from '../constants';
+
+interface NumericStepperProps {
+  value: number;
+  min: number;
+  max: number;
+  step: number;
+  decimals?: number;
+  onChange: (value: number) => void;
+  formatValue?: (value: number) => string;
+  testID?: string;
+}
+
+export const NumericStepper: React.FC<NumericStepperProps> = ({
+  value,
+  min,
+  max,
+  step,
+  decimals = 0,
+  onChange,
+  formatValue,
+  testID,
+}) => {
+  const { colors } = useTheme();
+
+  const round = (v: number) => Math.round(v / step) * step;
+
+  const decrement = () => {
+    const next = round(value - step);
+    if (next >= min) onChange(parseFloat(next.toFixed(decimals)));
+  };
+
+  const increment = () => {
+    const next = round(value + step);
+    if (next <= max) onChange(parseFloat(next.toFixed(decimals)));
+  };
+
+  const display = formatValue ? formatValue(value) : value.toFixed(decimals);
+  const canDecrement = value > min;
+  const canIncrement = value < max;
+
+  return (
+    <View style={styles.row}>
+      <TouchableOpacity
+        testID={testID ? `${testID}-decrement` : undefined}
+        onPress={decrement}
+        disabled={!canDecrement}
+        hitSlop={{ top: 8, bottom: 8, left: 8, right: 8 }}
+        style={[styles.button, { borderColor: colors.border, backgroundColor: colors.surface }, !canDecrement && styles.buttonDisabled]}
+      >
+        <Icon name="minus" size={14} color={canDecrement ? colors.text : colors.textMuted} />
+      </TouchableOpacity>
+
+      <Text testID={testID ? `${testID}-value` : undefined} style={[styles.value, { color: colors.primary, borderColor: colors.border, backgroundColor: colors.surfaceLight }]}>
+        {display}
+      </Text>
+
+      <TouchableOpacity
+        testID={testID ? `${testID}-increment` : undefined}
+        onPress={increment}
+        disabled={!canIncrement}
+        hitSlop={{ top: 8, bottom: 8, left: 8, right: 8 }}
+        style={[styles.button, { borderColor: colors.border, backgroundColor: colors.surface }, !canIncrement && styles.buttonDisabled]}
+      >
+        <Icon name="plus" size={14} color={canIncrement ? colors.text : colors.textMuted} />
+      </TouchableOpacity>
+    </View>
+  );
+};
+
+const styles = StyleSheet.create({
+  row: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    justifyContent: 'center',
+    gap: SPACING.sm,
+    marginTop: SPACING.sm,
+  },
+  button: {
+    width: 32,
+    height: 32,
+    borderRadius: 8,
+    borderWidth: 1,
+    alignItems: 'center',
+    justifyContent: 'center',
+  },
+  buttonDisabled: {
+    opacity: 0.35,
+  },
+  value: {
+    ...TYPOGRAPHY.body,
+    fontWeight: '400',
+    minWidth: 72,
+    textAlign: 'center',
+    paddingHorizontal: SPACING.sm,
+    paddingVertical: SPACING.xs,
+    borderRadius: 8,
+    borderWidth: 1,
+    overflow: 'hidden',
+  },
+});
diff --git a/src/components/TTSButton/index.tsx b/src/components/TTSButton/index.tsx
new file mode 100644
index 00000000..c33a18b7
--- /dev/null
+++ b/src/components/TTSButton/index.tsx
@@ -0,0 +1,117 @@
+import React, { useEffect } from 'react';
+import { TouchableOpacity, ActivityIndicator, StyleSheet } from 'react-native';
+import Animated, {
+  useSharedValue,
+  useAnimatedStyle,
+  withRepeat,
+  withSequence,
+  withTiming,
+} from 'react-native-reanimated';
+import Icon from 'react-native-vector-icons/Feather';
+import { useTheme } from '../../theme';
+import { useTTSStore } from '../../stores/ttsStore';
+import { SPACING } from '../../constants';
+
+interface TTSButtonProps {
+  text: string;
+  messageId: string;
+}
+
+export const TTSButton: React.FC<TTSButtonProps> = ({ text, messageId }) => {
+  const { colors } = useTheme();
+  const {
+    speak,
+    stop,
+    isSpeaking,
+    isGeneratingAudio,
+    isModelLoading,
+    isModelLoaded,
+    currentMessageId,
+    settings,
+    isBackboneDownloaded,
+    isVocoderDownloaded,
+    kokoroReady,
+    loadModels,
+  } = useTTSStore();
+
+  const areBothDownloaded = isBackboneDownloaded && isVocoderDownloaded;
+  const isThisMessage = currentMessageId === messageId;
+  // Kokoro streams so no separate generation phase — only OuteTTS sets isGeneratingAudio
+  const isThisMessageGenerating = isGeneratingAudio && isThisMessage;
+  const isThisMessageSpeaking = isSpeaking && !isGeneratingAudio && isThisMessage;
+
+  // Button is usable if Kokoro is ready (fast path) OR OuteTTS is downloaded (slow path)
+  const canSpeak = kokoroReady || areBothDownloaded;
+
+  const opacity = useSharedValue(1);
+  useEffect(() => {
+    if (isThisMessageSpeaking) {
+      opacity.value = withRepeat(
+        withSequence(
+          withTiming(0.4, { duration: 600 }),
+          withTiming(1, { duration: 600 }),
+        ),
+        -1,
+        false,
+      );
+    } else {
+      opacity.value = withTiming(1, { duration: 200 });
+    }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [isThisMessageSpeaking]);
+
+  const animatedStyle = useAnimatedStyle(() => ({ opacity: opacity.value }));
+
+  // Don't render if TTS disabled or no model is usable (Kokoro or OuteTTS)
+  if (!settings.enabled || !canSpeak) {
+    return null;
+  }
+
+  // Show spinner while model is loading for this message, or while generating audio tokens
+  if ((isModelLoading && isThisMessage) || isThisMessageGenerating) {
+    return <ActivityIndicator size="small" color={colors.textMuted} style={styles.button} />;
+  }
+
+  const handlePress = () => {
+    if (isThisMessageSpeaking || isThisMessageGenerating) {
+      stop();
+      return;
+    }
+    // Kokoro: ready immediately, no model loading step needed
+    if (kokoroReady) {
+      speak(text, messageId);
+      return;
+    }
+    // OuteTTS fallback: load models on first press if needed
+    if (!isModelLoaded) {
+      loadModels().then(() => {
+        useTTSStore.getState().speak(text, messageId);
+      });
+      return;
+    }
+    speak(text, messageId);
+  };
+
+  return (
+    <TouchableOpacity
+      onPress={handlePress}
+      style={styles.button}
+      hitSlop={{ top: 8, bottom: 8, left: 8, right: 8 }}
+      testID={`tts-button-${messageId}`}
+    >
+      <Animated.View style={isThisMessageSpeaking ? animatedStyle : undefined}>
+        <Icon
+          name={isThisMessageSpeaking ? 'volume-2' : 'volume-1'}
+          size={14}
+          color={isThisMessageSpeaking ? colors.primary : colors.textMuted}
+        />
+      </Animated.View>
+    </TouchableOpacity>
+  );
+};
+
+const styles = StyleSheet.create({
+  button: {
+    padding: SPACING.xs,
+  },
+});
diff --git a/src/components/VoiceRecordButton/index.tsx b/src/components/VoiceRecordButton/index.tsx
index bd1cca73..6844c05f 100644
--- a/src/components/VoiceRecordButton/index.tsx
+++ b/src/components/VoiceRecordButton/index.tsx
@@ -9,6 +9,7 @@ import {
   PanResponderGestureState,
   Vibration,
 } from 'react-native';
+import Icon from 'react-native-vector-icons/Feather';
 import ReanimatedAnimated, {
   useSharedValue,
   useAnimatedStyle,
@@ -16,15 +17,16 @@ import ReanimatedAnimated, {
   withTiming,
   Easing,
 } from 'react-native-reanimated';
-import { useNavigation } from '@react-navigation/native';
-import { NativeStackNavigationProp } from '@react-navigation/native-stack';
 import { useThemedStyles } from '../../theme';
 import { CustomAlert, showAlert, hideAlert, AlertState, initialAlertState } from '../CustomAlert';
 import { createStyles } from './styles';
 import { LoadingState, TranscribingState, UnavailableButton, ButtonIcon } from './states';
-import { RootStackParamList } from '../../navigation/types';
+import { useWhisperStore } from '../../stores';
 import logger from '../../utils/logger';
 
+const DOWNLOAD_MODEL_ID = 'small.en';
+const DOWNLOAD_MODEL_SIZE_MB = 466;
+
 interface VoiceRecordButtonProps {
   isRecording: boolean;
   isAvailable: boolean;
@@ -95,7 +97,7 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   isModelLoading,
   isTranscribing,
   partialResult,
-  error,
+  error: _error,
   disabled,
   onStartRecording,
   onStopRecording,
@@ -103,7 +105,7 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   asSendButton = false,
 }) => {
   const styles = useThemedStyles(createStyles);
-  const navigation = useNavigation<NativeStackNavigationProp<RootStackParamList>>();
+  const { downloadModel, isDownloading, downloadProgress } = useWhisperStore();
 
   const pulseAnim = useRef(new Animated.Value(1)).current;
   const loadingAnim = useRef(new Animated.Value(0)).current;
@@ -125,6 +127,7 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
       rippleOpacity.value = 0;
     }
 
+  // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [isRecording]);
 
   const rippleStyle = useAnimatedStyle(() => ({
@@ -161,15 +164,20 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   const panResponder = useRef(buildPanResponder({ isDraggingToCancel, cancelOffsetX, callbacksRef })).current;
 
   const handleUnavailableTap = () => {
-    const errorDetail = error || 'No transcription model downloaded';
+    if (isDownloading) { return; }
     setAlertState(showAlert(
-      'Voice Input Unavailable',
-      `${errorDetail}\n\nDownload a Whisper model to enable on-device voice input.`,
+      'Download Voice Model',
+      `Download Whisper Small to enable voice input? (${DOWNLOAD_MODEL_SIZE_MB} MB)`,
       [
-        { text: 'Cancel' },
+        { text: 'Cancel', style: 'cancel' },
         {
-          text: 'Go to Voice Settings',
-          onPress: () => navigation.navigate('VoiceSettings'),
+          text: 'Download',
+          onPress: () => {
+            setAlertState(hideAlert());
+            downloadModel(DOWNLOAD_MODEL_ID).catch((err) => {
+              logger.error('[VoiceRecordButton] Download failed:', err);
+            });
+          },
         },
       ],
     ));
@@ -206,8 +214,8 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
   if (!isAvailable) {
     return (
       <View style={styles.container}>
-        <TouchableOpacity style={styles.buttonWrapper} onPress={handleUnavailableTap}>
-          <UnavailableButton asSendButton={asSendButton} />
+        <TouchableOpacity style={styles.buttonWrapper} onPress={handleUnavailableTap} disabled={isDownloading}>
+          <UnavailableButton asSendButton={asSendButton} downloadProgress={isDownloading ? downloadProgress : undefined} />
         </TouchableOpacity>
         {alert}
       </View>
@@ -221,6 +229,42 @@ export const VoiceRecordButton: React.FC<VoiceRecordButtonProps> = ({
     disabled && styles.buttonDisabled,
   ];
 
+  // ── Audio mode: tap-to-toggle (tap to start, tap to stop & send) ───────────
+  if (!asSendButton) {
+    const handleToggle = () => {
+      if (disabled) return;
+      Vibration.vibrate(50);
+      if (isRecording) {
+        onStopRecording();
+      } else {
+        onStartRecording();
+      }
+    };
+
+    return (
+      <View style={styles.container}>
+        {isRecording && <ReanimatedAnimated.View style={[styles.rippleRing, rippleStyle]} />}
+        <Animated.View
+          style={[styles.buttonWrapper, { transform: [{ scale: isRecording ? pulseAnim : 1 }] }]}
+        >
+          <TouchableOpacity
+            onPress={handleToggle}
+            disabled={disabled}
+            activeOpacity={0.7}
+          >
+            <View style={buttonStyle}>
+              {isRecording
+                ? <Icon name="square" size={16} color="#fff" />
+                : <ButtonIcon asSendButton={false} isRecording={false} />}
+            </View>
+          </TouchableOpacity>
+        </Animated.View>
+        {alert}
+      </View>
+    );
+  }
+
+  // ── Chat mode: hold-to-record with slide-to-cancel ─────────────────────────
   return (
     <View style={styles.container}>
       {isRecording && (
diff --git a/src/components/VoiceRecordButton/states.tsx b/src/components/VoiceRecordButton/states.tsx
index d0ba1ab2..889a820c 100644
--- a/src/components/VoiceRecordButton/states.tsx
+++ b/src/components/VoiceRecordButton/states.tsx
@@ -43,7 +43,6 @@ export const TranscribingState: React.FC<TranscribingStateProps> = ({ asSendButt
       <Animated.View style={[styles.button, asSendButton ? styles.buttonAsSendLoading : styles.buttonTranscribing, { transform: [{ rotate: spin }] }]}>
         {asSendButton ? <Icon name="mic" size={18} color={colors.info} /> : <View style={styles.loadingIndicator} />}
       </Animated.View>
-      {!asSendButton && <Text style={styles.transcribingText}>Transcribing...</Text>}
     </View>
   );
 };
@@ -52,16 +51,30 @@ export const TranscribingState: React.FC<TranscribingStateProps> = ({ asSendButt
 
 interface UnavailableButtonProps {
   asSendButton: boolean;
+  /** 0–1 while downloading, undefined when idle */
+  downloadProgress?: number;
 }
 
-export const UnavailableButton: React.FC<UnavailableButtonProps> = ({ asSendButton }) => {
+export const UnavailableButton: React.FC<UnavailableButtonProps> = ({ asSendButton, downloadProgress }) => {
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
+  const isDownloading = downloadProgress !== undefined;
+
+  if (asSendButton) {
+    return (
+      <View style={[styles.button, styles.buttonAsSendUnavailable]}>
+        <Icon name={isDownloading ? 'download' : 'mic-off'} size={18} color={colors.textMuted} />
+      </View>
+    );
+  }
 
   return (
-    <View style={[styles.button, asSendButton ? styles.buttonAsSendUnavailable : styles.buttonUnavailable]}>
-      {asSendButton ? (
-        <Icon name="mic-off" size={18} color={colors.textMuted} />
+    <View style={[styles.button, styles.buttonUnavailable]}>
+      {isDownloading ? (
+        <>
+          <Icon name="download" size={14} color={colors.textMuted} />
+          <Text style={styles.loadingText}>{Math.round(downloadProgress * 100)}%</Text>
+        </>
       ) : (
         <>
           <View style={styles.micIcon}>
diff --git a/src/constants/kokoroModels.ts b/src/constants/kokoroModels.ts
new file mode 100644
index 00000000..9cf90b6e
--- /dev/null
+++ b/src/constants/kokoroModels.ts
@@ -0,0 +1,62 @@
+import { Platform } from 'react-native';
+import {
+  KOKORO_MEDIUM,
+  KOKORO_VOICE_AF_HEART,
+  KOKORO_VOICE_AF_RIVER,
+  KOKORO_VOICE_AF_SARAH,
+  KOKORO_VOICE_AM_ADAM,
+  KOKORO_VOICE_AM_MICHAEL,
+  KOKORO_VOICE_AM_SANTA,
+  KOKORO_VOICE_BF_EMMA,
+  KOKORO_VOICE_BM_DANIEL,
+} from 'react-native-executorch';
+import type { VoiceConfig } from 'react-native-executorch';
+
+export { KOKORO_MEDIUM };
+
+export type KokoroVoiceId =
+  | 'af_heart'
+  | 'af_river'
+  | 'af_sarah'
+  | 'am_adam'
+  | 'am_michael'
+  | 'am_santa'
+  | 'bf_emma'
+  | 'bm_daniel';
+
+export const KOKORO_VOICES: {
+  id: KokoroVoiceId;
+  label: string;
+  persona: string;
+  accent: string;
+  gender: 'Female' | 'Male';
+  /** Recommended playback speed for this persona's mood */
+  defaultSpeed: number;
+  config: VoiceConfig;
+}[] = [
+  { id: 'af_heart',   label: 'Warm',      persona: 'Friendly and approachable',   accent: 'US',      gender: 'Female', defaultSpeed: 1.0, config: KOKORO_VOICE_AF_HEART },
+  { id: 'af_river',   label: 'Calm',      persona: 'Relaxed and soothing',        accent: 'US',      gender: 'Female', defaultSpeed: 0.9, config: KOKORO_VOICE_AF_RIVER },
+  { id: 'af_sarah',   label: 'Clear',     persona: 'Crisp and professional',      accent: 'US',      gender: 'Female', defaultSpeed: 1.0, config: KOKORO_VOICE_AF_SARAH },
+  { id: 'am_adam',    label: 'Steady',    persona: 'Composed and reliable',       accent: 'US',      gender: 'Male',   defaultSpeed: 1.0, config: KOKORO_VOICE_AM_ADAM },
+  { id: 'am_michael', label: 'Bold',      persona: 'Confident and direct',        accent: 'US',      gender: 'Male',   defaultSpeed: 1.1, config: KOKORO_VOICE_AM_MICHAEL },
+  { id: 'am_santa',   label: 'Cheerful',  persona: 'Upbeat and energetic',        accent: 'US',      gender: 'Male',   defaultSpeed: 1.2, config: KOKORO_VOICE_AM_SANTA },
+  { id: 'bf_emma',    label: 'Gentle',    persona: 'Soft and thoughtful',         accent: 'British',  gender: 'Female', defaultSpeed: 0.9, config: KOKORO_VOICE_BF_EMMA },
+  { id: 'bm_daniel',  label: 'Refined',   persona: 'Polished and articulate',     accent: 'British',  gender: 'Male',   defaultSpeed: 1.0, config: KOKORO_VOICE_BM_DANIEL },
+];
+
+export const DEFAULT_KOKORO_VOICE_ID: KokoroVoiceId = 'af_heart';
+
+export function getKokoroVoiceConfig(id: KokoroVoiceId): VoiceConfig {
+  return KOKORO_VOICES.find(v => v.id === id)?.config ?? KOKORO_VOICE_AF_HEART;
+}
+
+/** Runtime check — executorch gradle.properties sets minSdkVersion=26; README says 33 but that's conservative */
+export function isExecutorchSupported(): boolean {
+  if (Platform.OS === 'android') {
+    return (Platform.Version as number) >= 26;
+  }
+  if (Platform.OS === 'ios') {
+    return parseInt(Platform.Version as string, 10) >= 17;
+  }
+  return false;
+}
diff --git a/src/constants/ttsModels.ts b/src/constants/ttsModels.ts
new file mode 100644
index 00000000..f93dfe85
--- /dev/null
+++ b/src/constants/ttsModels.ts
@@ -0,0 +1,25 @@
+export const TTS_BACKBONE_MODEL = {
+  id: 'outetts-0.3-500m-q4',
+  name: 'OuteTTS 0.3',
+  backboneFile: 'OuteTTS-0.3-500M-Q4_K_M.gguf',
+  backboneUrl:
+    'https://huggingface.co/OuteAI/OuteTTS-0.3-500M-GGUF/resolve/main/OuteTTS-0.3-500M-Q4_K_M.gguf',
+  backboneSizeMB: 454,
+  vocoderFile: 'WavTokenizer-Large-75-Q5_1.gguf',
+  vocoderUrl:
+    'https://huggingface.co/ggml-org/WavTokenizer/resolve/main/WavTokenizer-Large-75-Q5_1.gguf',
+  vocoderSizeMB: 73,
+  sampleRate: 24000,
+  description: 'Natural-sounding on-device speech. Requires ~530 MB storage.',
+};
+
+export const TTS_SPEAKER_PROFILES = [
+  { id: '0', label: 'Default' },
+];
+
+/** Warn user if device RAM is below this threshold */
+export const TTS_WARN_RAM_GB = 8;
+/** Hard-block TTS on devices below this threshold */
+export const TTS_BLOCK_RAM_GB = 6;
+/** Max cached audio messages per conversation before eviction */
+export const AUDIO_CACHE_MAX_MESSAGES = 50;
diff --git a/src/hooks/useTTS.ts b/src/hooks/useTTS.ts
new file mode 100644
index 00000000..5ad948a3
--- /dev/null
+++ b/src/hooks/useTTS.ts
@@ -0,0 +1,48 @@
+import { useEffect, useCallback } from 'react';
+import { useTTSStore } from '../stores/ttsStore';
+import { hardwareService } from '../services/hardware';
+import { TTS_BLOCK_RAM_GB, TTS_WARN_RAM_GB } from '../constants/ttsModels';
+
+export function useTTS() {
+  const store = useTTSStore();
+
+  useEffect(() => {
+    store.checkDownloadStatus();
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const canRunOnDevice = useCallback((): { allowed: boolean; warning: boolean } => {
+    const ramGB = hardwareService.getTotalMemoryGB();
+    return {
+      allowed: ramGB >= TTS_BLOCK_RAM_GB,
+      warning: ramGB < TTS_WARN_RAM_GB,
+    };
+  }, []);
+
+  const speakMessage = useCallback(
+    (text: string, messageId: string) => {
+      if (!store.isModelLoaded && store.isBackboneDownloaded && store.isVocoderDownloaded) {
+        store.loadModels().then(() => store.speak(text, messageId));
+        return;
+      }
+      store.speak(text, messageId);
+    },
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    [store.isModelLoaded, store.isBackboneDownloaded, store.isVocoderDownloaded],
+  );
+
+  const areBothDownloaded = store.isBackboneDownloaded && store.isVocoderDownloaded;
+
+  return {
+    ...store,
+    speakMessage,
+    canRunOnDevice,
+    areBothDownloaded,
+    isDownloading: store.isDownloadingBackbone || store.isDownloadingVocoder,
+    // weighted by file size (454 MB backbone, 73 MB vocoder → 86% / 14%)
+    overallDownloadProgress:
+      store.backboneDownloadProgress * 0.86 + store.vocoderDownloadProgress * 0.14,
+    isAudioMode: store.settings.interfaceMode === 'audio',
+    isChatMode: store.settings.interfaceMode === 'chat',
+  };
+}
diff --git a/src/navigation/AppNavigator.tsx b/src/navigation/AppNavigator.tsx
index 1d15b73a..517357a2 100644
--- a/src/navigation/AppNavigator.tsx
+++ b/src/navigation/AppNavigator.tsx
@@ -32,6 +32,7 @@ import {
   DownloadManagerScreen,
   ModelSettingsScreen,
   VoiceSettingsScreen,
+  TTSSettingsScreen,
   DeviceInfoScreen,
   StorageSettingsScreen,
   SecuritySettingsScreen,
@@ -229,6 +230,7 @@ export const AppNavigator: React.FC = () => {
         <RootStack.Screen name="ModelSettings" component={ModelSettingsScreen} />
         <RootStack.Screen name="RemoteServers" component={RemoteServersScreen} />
         <RootStack.Screen name="VoiceSettings" component={VoiceSettingsScreen} />
+        <RootStack.Screen name="TTSSettings" component={TTSSettingsScreen} />
         <RootStack.Screen name="DeviceInfo" component={DeviceInfoScreen} />
         <RootStack.Screen name="StorageSettings" component={StorageSettingsScreen} />
         <RootStack.Screen name="SecuritySettings" component={SecuritySettingsScreen} />
diff --git a/src/navigation/types.ts b/src/navigation/types.ts
index 21b876da..b58d03c1 100644
--- a/src/navigation/types.ts
+++ b/src/navigation/types.ts
@@ -16,6 +16,7 @@ export type RootStackParamList = {
   ModelSettings: undefined;
   RemoteServers: undefined;
   VoiceSettings: undefined;
+  TTSSettings: undefined;
   DeviceInfo: undefined;
   StorageSettings: undefined;
   SecuritySettings: undefined;
diff --git a/src/screens/ChatScreen/ChatMessageArea.tsx b/src/screens/ChatScreen/ChatMessageArea.tsx
index f7611cc0..374c80bc 100644
--- a/src/screens/ChatScreen/ChatMessageArea.tsx
+++ b/src/screens/ChatScreen/ChatMessageArea.tsx
@@ -1,5 +1,6 @@
 import React, { useState, useMemo } from 'react';
-import { View, FlatList, Text, Keyboard, ActivityIndicator, Platform } from 'react-native';
+import { View, FlatList, Text, Keyboard, ActivityIndicator, Platform, StyleSheet } from 'react-native';
+import { useTTSStore } from '../../stores/ttsStore';
 import Icon from 'react-native-vector-icons/Feather';
 import Animated, { FadeIn } from 'react-native-reanimated';
 import { AttachStep } from 'react-native-spotlight-tour';
@@ -28,6 +29,10 @@ export type ChatMessageAreaProps = {
 export const ChatMessageArea: React.FC<ChatMessageAreaProps> = ({
   flatListRef, isNearBottomRef, chat, styles, colors, handleScroll, renderItem, chatSpotlight,
 }) => {
+  // Hide FlatList until initial layout + scroll is complete to prevent visible scroll jump
+  const [isListReady, setIsListReady] = useState(false);
+  const hasScrolledRef = React.useRef(false);
+  const interfaceMode = useTTSStore((s) => s.settings.interfaceMode);
   const tabNav = useNavigation<NativeStackNavigationProp<RootStackParamList>>();
   const [inputHeight, setInputHeight] = useState(84);
   const activeModelRepoId = chat.activeModelId?.split('/').slice(0, 2).join('/');
@@ -52,12 +57,26 @@ export const ChatMessageArea: React.FC<ChatMessageAreaProps> = ({
       ) : (
         <FlatList
           ref={flatListRef}
+          style={isListReady ? undefined : hiddenStyle.hidden}
           data={chat.displayMessages}
           renderItem={renderItem}
           keyExtractor={(item) => item.id}
+          extraData={interfaceMode}
           contentContainerStyle={styles.messageList}
           onScroll={handleScroll}
-          onContentSizeChange={(_w, _h) => { if (isNearBottomRef.current) flatListRef.current?.scrollToEnd({ animated: false }); }}
+          onContentSizeChange={(_w, h) => {
+            if (!hasScrolledRef.current && h > 0) {
+              // Initial layout: force scroll to bottom regardless of isNearBottom
+              flatListRef.current?.scrollToEnd({ animated: false });
+              hasScrolledRef.current = true;
+              // Reveal after a frame so the scroll position settles
+              requestAnimationFrame(() => {
+                requestAnimationFrame(() => setIsListReady(true));
+              });
+            } else if (isNearBottomRef.current) {
+              flatListRef.current?.scrollToEnd({ animated: false });
+            }
+          }}
           onLayout={() => { }}
           scrollEventThrottle={16}
           keyboardDismissMode="on-drag"
@@ -140,3 +159,7 @@ export const ChatMessageArea: React.FC<ChatMessageAreaProps> = ({
     </>
   );
 };
+
+const hiddenStyle = StyleSheet.create({
+  hidden: { opacity: 0 },
+});
diff --git a/src/screens/ChatScreen/ChatModalSection.tsx b/src/screens/ChatScreen/ChatModalSection.tsx
index 301b3bdc..76f90703 100644
--- a/src/screens/ChatScreen/ChatModalSection.tsx
+++ b/src/screens/ChatScreen/ChatModalSection.tsx
@@ -83,6 +83,7 @@ export const ChatModalSection: React.FC<ChatModalSectionProps> = ({
       onOpenProject={() => setShowProjectSelector(true)}
       onOpenGallery={imageCount > 0 ? () => navigation.navigate('Gallery', { conversationId: activeConversationId }) : undefined}
       onDeleteConversation={activeConversation ? handleDeleteConversation : undefined}
+      onOpenTTSSettings={() => { setShowSettingsPanel(false); navigation.navigate('TTSSettings'); }}
       conversationImageCount={imageCount}
       activeProjectName={activeProject?.name || null}
       isRemote={isRemote}
diff --git a/src/screens/ChatScreen/MessageRenderer.tsx b/src/screens/ChatScreen/MessageRenderer.tsx
index 5cf4a0cc..e5511441 100644
--- a/src/screens/ChatScreen/MessageRenderer.tsx
+++ b/src/screens/ChatScreen/MessageRenderer.tsx
@@ -1,7 +1,18 @@
-import React from 'react';
+import React, { useState } from 'react';
+import { View, StyleSheet } from 'react-native';
 import { ChatMessage } from '../../components';
+import { AudioMessageBubble } from '../../components/AudioMessageBubble';
+import { TTSButton } from '../../components/TTSButton';
+import { AnimatedEntry } from '../../components/AnimatedEntry';
+import { useTTSStore } from '../../stores/ttsStore';
+import { stripControlTokens } from '../../utils/messageContent';
 import { Message } from '../../types';
+import '../../types/tts';
 import { ChatMessageItem } from './useChatScreen';
+import { parseThinkingContent, buildMessageData } from '../../components/ChatMessage/utils';
+import { ThinkingBlock } from '../../components/ChatMessage/components/ThinkingBlock';
+import { createStyles as createChatStyles } from '../../components/ChatMessage/styles';
+import { useThemedStyles } from '../../theme';
 
 type MessageRendererProps = {
   item: Message | ChatMessageItem;
@@ -19,31 +30,215 @@ type MessageRendererProps = {
   onImagePress: (uri: string) => void;
 };
 
-export const MessageRenderer: React.FC<MessageRendererProps> = ({
-  item,
-  index,
-  displayMessagesLength,
-  animateLastN,
-  imageModelLoaded,
-  isStreaming,
-  isGeneratingImage,
-  showGenerationDetails,
-  onCopy,
-  onRetry,
-  onEdit,
-  onGenerateImage,
-  onImagePress,
-}) => (
-  <ChatMessage
-    message={item as Message}
-    isStreaming={item.id === 'streaming'}
-    onCopy={onCopy}
-    onRetry={onRetry}
-    onEdit={onEdit}
-    onGenerateImage={onGenerateImage}
-    onImagePress={onImagePress}
-    canGenerateImage={imageModelLoaded && !isStreaming && !isGeneratingImage}
-    showGenerationDetails={showGenerationDetails}
-    animateEntry={animateLastN > 0 && index >= displayMessagesLength - animateLastN}
-  />
-);
+/** Renders the thinking/reasoning block for audio mode without the ChatMessage bubble wrapper */
+const AudioModeThinkingBlock: React.FC<{ msg: Message }> = ({ msg }) => {
+  const chatStyles = useThemedStyles(createChatStyles);
+  const [showThinking, setShowThinking] = useState(false);
+  const { parsedContent } = buildMessageData(msg);
+  if (!parsedContent.thinking) return null;
+  return (
+    <View style={chatStyles.thinkingBlockWrapper}>
+      <ThinkingBlock
+        parsedContent={parsedContent}
+        showThinking={showThinking}
+        onToggle={() => setShowThinking((v) => !v)}
+        styles={chatStyles}
+      />
+    </View>
+  );
+};
+
+interface AudioBubbleProps {
+  messageId: string;
+  audioPath: string;
+  waveformData: number[];
+  durationSeconds: number;
+  transcript: string;
+  _reasoningContent?: string;
+}
+
+function buildAudioBubbleProps(msg: Message): AudioBubbleProps {
+  const transcript = stripControlTokens(msg.content);
+  console.log('[AudioBubble] buildProps: msgId=', msg.id, 'contentLen=', msg.content.length, 'transcriptLen=', transcript.length);
+  return {
+    messageId: msg.id,
+    audioPath: msg.audioPath ?? '',
+    waveformData: msg.waveformData ?? [],
+    durationSeconds: msg.audioDurationSeconds ?? 0,
+    transcript,
+    _reasoningContent: msg.reasoningContent,
+  };
+}
+
+/** Wraps content with AnimatedEntry if needed */
+function wrapAnimated(content: React.ReactElement, shouldAnimate: boolean): React.ReactElement {
+  return shouldAnimate ? <AnimatedEntry index={0}>{content}</AnimatedEntry> : content;
+}
+
+/** Renders a user voice message as an audio bubble */
+function renderUserAudioBubble(
+  opts: { msg: Message; audioAtt: any; shouldAnimate: boolean },
+  props: MessageRendererProps,
+): React.ReactElement {
+  const { msg, audioAtt, shouldAnimate } = opts;
+  const bubble = (
+    <View style={audioStyles.userContainer}>
+      <AudioMessageBubble
+        messageId={msg.id}
+        audioPath={audioAtt.uri}
+        waveformData={[]}
+        durationSeconds={audioAtt.audioDurationSeconds ?? 0}
+        transcript={msg.content}
+        isUser
+        onCopy={props.onCopy}
+        onRetry={() => props.onRetry(msg)}
+      />
+    </View>
+  );
+  return wrapAnimated(bubble, shouldAnimate);
+}
+
+/** Renders a streaming/thinking assistant message in audio mode as a ChatMessage */
+function renderAudioStreamingMessage(
+  msg: Message,
+  isStreamingThis: boolean,
+  props: MessageRendererProps,
+): React.ReactElement {
+  return (
+    <ChatMessage
+      message={msg}
+      isStreaming={isStreamingThis}
+      onCopy={props.onCopy}
+      onRetry={props.onRetry}
+      onEdit={props.onEdit}
+      onGenerateImage={props.onGenerateImage}
+      onImagePress={props.onImagePress}
+      canGenerateImage={false}
+      showGenerationDetails={props.showGenerationDetails}
+      animateEntry={false}
+    />
+  );
+}
+
+/** Renders a completed assistant audio bubble, with optional tool call UI */
+function renderAudioAssistantBubble(
+  msg: Message,
+  shouldAnimate: boolean,
+  props: MessageRendererProps,
+): React.ReactElement {
+  const hasThinking = !!msg.reasoningContent || !!parseThinkingContent(msg.content).thinking;
+  const hasToolCalls = !!msg.toolCalls?.length;
+
+  // For messages with tool calls, render as a regular ChatMessage (has proper tool call UI)
+  // followed by the audio bubble for the spoken text
+  if (hasToolCalls) {
+    const element = (
+      <View style={audioStyles.assistantContainer}>
+        <ChatMessage
+          message={msg}
+          isStreaming={false}
+          onCopy={props.onCopy}
+          onRetry={props.onRetry}
+          onEdit={props.onEdit}
+          onGenerateImage={props.onGenerateImage}
+          onImagePress={props.onImagePress}
+          canGenerateImage={false}
+          showGenerationDetails={props.showGenerationDetails}
+          animateEntry={false}
+        />
+      </View>
+    );
+    return wrapAnimated(element, shouldAnimate);
+  }
+
+  const bubble = (
+    <View style={audioStyles.assistantContainer}>
+      {hasThinking && <AudioModeThinkingBlock msg={msg} />}
+      <AudioMessageBubble
+        {...buildAudioBubbleProps(msg)}
+        onCopy={props.onCopy}
+        onRetry={() => props.onRetry(msg)}
+      />
+    </View>
+  );
+  return wrapAnimated(bubble, shouldAnimate);
+}
+
+export const MessageRenderer: React.FC<MessageRendererProps> = (props) => {
+  const {
+    item,
+    index,
+    displayMessagesLength,
+    animateLastN,
+    imageModelLoaded,
+    isStreaming,
+    isGeneratingImage,
+    showGenerationDetails,
+    onCopy,
+    onRetry,
+    onEdit,
+    onGenerateImage,
+    onImagePress,
+  } = props;
+
+  const ttsMode = useTTSStore((s) => s.settings.interfaceMode);
+  const msg = item as Message;
+  const animateEntry = animateLastN > 0 && index >= displayMessagesLength - animateLastN;
+  const isStreamingThis = item.id === 'streaming';
+
+  // User voice message: always show as audio bubble
+  if (msg.role === 'user') {
+    const audioAtt = msg.attachments?.find((a) => a.type === 'audio');
+    if (audioAtt) {
+      return renderUserAudioBubble({ msg, audioAtt, shouldAnimate: animateEntry }, props);
+    }
+  }
+
+  const isAudioAssistant = msg.role === 'assistant' && !msg.isSystemInfo;
+
+  // Thinking placeholder + audio streaming
+  const isThinkingItem = !!(msg as any).isThinking;
+  if (isAudioAssistant && ttsMode === 'audio' && (isStreamingThis || isThinkingItem)) {
+    return renderAudioStreamingMessage(msg, isStreamingThis, props);
+  }
+
+  // Audio Mode: show assistant messages as audio bubbles after streaming ends
+  if (isAudioAssistant && ttsMode === 'audio' && !isStreamingThis) {
+    return renderAudioAssistantBubble(msg, animateEntry, props);
+  }
+
+  // Chat Mode: TTSButton lives in the meta row
+  const isPlainAssistant = msg.role === 'assistant' && !msg.isSystemInfo && !msg.toolCalls?.length;
+  const ttsMeta = isPlainAssistant && !isStreamingThis
+    ? <TTSButton text={stripControlTokens(msg.content)} messageId={msg.id} />
+    : undefined;
+
+  return (
+    <ChatMessage
+      message={msg}
+      isStreaming={isStreamingThis}
+      onCopy={onCopy}
+      onRetry={onRetry}
+      onEdit={onEdit}
+      onGenerateImage={onGenerateImage}
+      onImagePress={onImagePress}
+      canGenerateImage={imageModelLoaded && !isStreaming && !isGeneratingImage}
+      showGenerationDetails={showGenerationDetails}
+      animateEntry={animateEntry}
+      metaExtra={ttsMeta}
+    />
+  );
+};
+
+const audioStyles = StyleSheet.create({
+  userContainer: {
+    paddingRight: 16,
+    marginVertical: 8,
+    alignItems: 'flex-end',
+  },
+  assistantContainer: {
+    paddingHorizontal: 16,
+    marginVertical: 8,
+    alignItems: 'flex-start',
+  },
+});
diff --git a/src/screens/ChatScreen/index.tsx b/src/screens/ChatScreen/index.tsx
index 2be6468e..bdf0c138 100644
--- a/src/screens/ChatScreen/index.tsx
+++ b/src/screens/ChatScreen/index.tsx
@@ -1,5 +1,6 @@
 import React, { useCallback, useEffect, useRef, useState } from 'react';
 import { FlatList, KeyboardAvoidingView, InteractionManager } from 'react-native';
+import { useTTSStore } from '../../stores/ttsStore';
 import { SafeAreaView } from 'react-native-safe-area-context';
 import { useFocusEffect } from '@react-navigation/native';
 import { useSpotlightTour } from 'react-native-spotlight-tour';
@@ -101,6 +102,22 @@ export const ChatScreen: React.FC = () => {
       setTimeout(() => { flatListRef.current?.scrollToEnd({ animated: true }); }, 100);
     }
   }, [chat.activeConversation?.messages.length]);
+
+  // Reset scroll when switching between chat/audio interface modes
+  const interfaceMode = useTTSStore((s) => s.settings.interfaceMode);
+  const prevModeRef = React.useRef(interfaceMode);
+  React.useEffect(() => {
+    if (prevModeRef.current !== interfaceMode) {
+      prevModeRef.current = interfaceMode;
+      isNearBottomRef.current = true;
+      chat.setShowScrollToBottom(false);
+      // FlatList re-renders via extraData; onContentSizeChange fires and scrolls.
+      // Backup: scroll after items have had time to re-measure.
+      setTimeout(() => { flatListRef.current?.scrollToEnd({ animated: false }); }, 300);
+    }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [interfaceMode]);
+
   const alertEl = (
     <CustomAlert
       visible={chat.alertState.visible}
diff --git a/src/screens/ChatScreen/useChatGenerationActions.ts b/src/screens/ChatScreen/useChatGenerationActions.ts
index f48e558e..556ef9a5 100644
--- a/src/screens/ChatScreen/useChatGenerationActions.ts
+++ b/src/screens/ChatScreen/useChatGenerationActions.ts
@@ -18,11 +18,28 @@ import {
   retrievalService,
 } from '../../services';
 import { embeddingService } from '../../services/rag/embedding';
-import { useChatStore, useProjectStore, useRemoteServerStore } from '../../stores';
+import { useChatStore, useProjectStore, useRemoteServerStore, useTTSStore } from '../../stores';
 import { Message, MediaAttachment, Project, DownloadedModel, RemoteModel, ModelLoadingStrategy, CacheType } from '../../types';
 import logger from '../../utils/logger';
 type SetState<T> = Dispatch<SetStateAction<T>>;
 const FALLBACK_RECENT_MESSAGE_COUNT = 2;
+
+/**
+ * Appended to the system prompt when TTS audio mode is active.
+ * Guides the model to respond conversationally for voice output.
+ */
+const AUDIO_MODE_PROMPT_HINT = `
+
+[VOICE MODE ACTIVE — your response will be spoken aloud via text-to-speech]
+Respond as if you are speaking to the user in a natural conversation:
+- Be concise and conversational — talk like a person, not a document
+- Never use markdown formatting (no headers, bullets, bold, code blocks, tables)
+- Never use special characters, symbols, or emoji that sound awkward when read aloud
+- Use short sentences and natural spoken transitions ("So,", "Basically,", "Here's the thing —")
+- If summarizing research or long content, give the key takeaways in a few spoken paragraphs, not an essay
+- Numbers: say "about two thousand" not "~2,000"
+- Keep responses under 2-3 paragraphs unless the user explicitly asks for detail
+- Use expressive punctuation for natural prosody: exclamation marks for emphasis!, question marks for curiosity?, ellipses for pauses..., and vary sentence length for rhythm`;
 export type GenerationDeps = {
   activeModelId: string | null;
   activeModel: DownloadedModel | null | undefined;
@@ -248,7 +265,13 @@ export async function startGenerationFn(deps: GenerationDeps, call: StartGenerat
   }
   const conversation = useChatStore.getState().conversations.find(c => c.id === targetConversationId);
   const { enabledTools, rawPrompt } = resolveToolsAndPrompt(deps, conversation);
-  const basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
+  let basePrompt = await injectRagContext(conversation?.projectId, messageText, rawPrompt);
+
+  // In audio mode, append instructions for conversational voice-friendly responses
+  if (useTTSStore.getState().settings.interfaceMode === 'audio') {
+    basePrompt += AUDIO_MODE_PROMPT_HINT;
+  }
+
   const isRemote = !!useRemoteServerStore.getState().activeRemoteTextModelId;
   const activeTools = enabledTools;
   const systemPrompt = applyGemma4ThinkToken(
diff --git a/src/screens/ChatScreen/useChatMessageHandlers.ts b/src/screens/ChatScreen/useChatMessageHandlers.ts
index c9ff7f1c..f20d8237 100644
--- a/src/screens/ChatScreen/useChatMessageHandlers.ts
+++ b/src/screens/ChatScreen/useChatMessageHandlers.ts
@@ -1,6 +1,7 @@
 import { Dispatch, SetStateAction } from 'react';
 import { showAlert, AlertState } from '../../components';
 import { Message } from '../../types';
+import { useTTSStore } from '../../stores/ttsStore';
 import {
   regenerateResponseFn, executeDeleteConversationFn, handleImageGenerationFn,
 } from './useChatGenerationActions';
@@ -20,6 +21,8 @@ export async function handleRetryMessageFn(
   message: Message, genDeps: GenerationDeps, p: RetryParams,
 ): Promise<void> {
   if (!p.activeConversationId || !p.hasActiveModel) return;
+  // Stop any in-flight TTS before deleting messages
+  useTTSStore.getState().stop();
   const msgs = p.activeConversation?.messages || [];
   if (message.role === 'user') {
     const idx = msgs.findIndex((m: Message) => m.id === message.id);
diff --git a/src/screens/ChatScreen/useChatScreen.ts b/src/screens/ChatScreen/useChatScreen.ts
index e543b7e5..b9e7683e 100644
--- a/src/screens/ChatScreen/useChatScreen.ts
+++ b/src/screens/ChatScreen/useChatScreen.ts
@@ -1,7 +1,9 @@
 import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
+import { AppState } from 'react-native';
 import { useNavigation, useRoute, RouteProp } from '@react-navigation/native';
 import { AlertState, initialAlertState } from '../../components';
-import { useAppStore, useChatStore, useProjectStore, useRemoteServerStore } from '../../stores';
+import { useAppStore, useChatStore, useProjectStore, useRemoteServerStore, useTTSStore } from '../../stores';
+import '../../types/tts';
 import logger from '../../utils/logger';
 import {
   llmService, generationService, imageGenerationService, activeModelService,
@@ -15,10 +17,16 @@ import { startGenerationFn, handleSendFn, handleStopFn, handleSelectProjectFn }
 import { handleRetryMessageFn, handleEditMessageFn, handleDeleteConversationFn, handleGenerateImageFromMsgFn } from './useChatMessageHandlers';
 import { getDisplayMessages, getPlaceholderText, ChatMessageItem, StreamingState } from './types';
 import { saveImageToGallery } from './useSaveImage';
+import { stripControlTokens, stripMarkdownForSpeech } from '../../utils/messageContent';
 
 export type { AlertState, ChatMessageItem, StreamingState };
 export { getDisplayMessages, getPlaceholderText };
 
+function _triggerAudioModeGeneration(conversationId: string, messageId: string, content: string) {
+  useChatStore.getState().updateMessageAudio(conversationId, messageId, { isAudioModeMessage: true });
+  useTTSStore.getState().speak(stripMarkdownForSpeech(stripControlTokens(content)), messageId);
+}
+
 type ChatScreenRouteProp = RouteProp<RootStackParamList, 'Chat'>;
 
 type ActiveModelInfo = {
@@ -53,6 +61,26 @@ export const useChatScreen = () => {
   const [isCompacting, setIsCompacting] = useState(false);
   const lastMessageCountRef = useRef(0);
   const generatingForConversationRef = useRef<string | null>(null);
+
+  // Stop TTS when navigating away, app backgrounded, or screen locked
+  useEffect(() => {
+    const unsubBlur = navigation.addListener('blur', () => {
+      useTTSStore.getState().stop();
+    });
+    // beforeRemove fires on back button — more reliable than blur for native-stack
+    const unsubRemove = navigation.addListener('beforeRemove', () => {
+      useTTSStore.getState().stop();
+    });
+    const appStateSub = AppState.addEventListener('change', (nextState) => {
+      const tts = useTTSStore.getState();
+      if (nextState !== 'active') {
+        if (tts.isSpeaking && !tts.isPaused) { tts.pause(); }
+      } else {
+        if (tts.isSpeaking && tts.isPaused) { tts.resume(); }
+      }
+    });
+    return () => { unsubBlur(); unsubRemove(); appStateSub.remove(); };
+  }, [navigation]);
   const modelLoadStartTimeRef = useRef<number | null>(null);
   const startGenerationRef = useRef<(id: string, text: string) => Promise<void>>(null as any);
   const addMessageRef = useRef<typeof addMessage>(null as any);
@@ -193,6 +221,95 @@ export const useChatScreen = () => {
     lastMessageCountRef.current = curr;
   }, [displayMessages.length]);
   useEffect(() => { lastMessageCountRef.current = 0; setAnimateLastN(0); }, [activeConversationId]);
+  const prevStreamingRef = useRef(false);
+  const ttsStreamRef = useRef<{ nextPos: number; pending: string[]; isPlaying: boolean }>({
+    nextPos: 0, pending: [], isPlaying: false,
+  });
+
+  // Buffer-based streaming TTS: feed text to Kokoro as soon as enough runway accumulates.
+  // No sentence detection — just split at word boundaries when buffer exceeds threshold.
+  // Works even at low tok/sec because the threshold is much smaller than a full sentence.
+  useEffect(() => {
+    if (!isStreamingForThisConversation) return;
+    const tts = useTTSStore.getState();
+    if (tts.settings.interfaceMode !== 'audio') return;
+    if (!tts.kokoroReady && !tts.isModelLoaded) return;
+    if (!streamingMessage) return;
+
+    const ref = ttsStreamRef.current;
+    const stripped = stripControlTokens(streamingMessage);
+    const buffered = stripped.slice(ref.nextPos);
+
+    // Need enough chars for Kokoro to have meaningful speech (~2-3 seconds worth)
+    const MIN_CHARS = 50;
+    if (buffered.length < MIN_CHARS) return;
+
+    // Split at the last word boundary so we don't cut mid-word
+    const lastSpace = buffered.lastIndexOf(' ');
+    if (lastSpace <= 0) return;
+
+    const chunk = buffered.slice(0, lastSpace).trim();
+    ref.nextPos += lastSpace + 1;
+    if (!chunk) return;
+
+    ref.pending.push(stripMarkdownForSpeech(chunk));
+    logger.log('[StreamTTS] chunk queued, pending=', ref.pending.length, 'isPlaying=', ref.isPlaying);
+
+    if (!ref.isPlaying) {
+      const playNext = () => {
+        // If another message took over playback (e.g. user tapped a recording), stop the chain
+        const currentId = useTTSStore.getState().currentMessageId;
+        if (currentId !== null && currentId !== 'streaming') {
+          logger.log('[StreamTTS] chain interrupted, currentId=', currentId);
+          ref.pending = [];
+          ref.isPlaying = false;
+          return;
+        }
+        const next = ref.pending.shift();
+        if (!next) { ref.isPlaying = false; logger.log('[StreamTTS] chain done, no more pending'); return; }
+        ref.isPlaying = true;
+        logger.log('[StreamTTS] playing next chunk, remaining=', ref.pending.length);
+        useTTSStore.getState().speak(next, 'streaming').finally(playNext);
+      };
+      playNext();
+    }
+  }, [streamingMessage, isStreamingForThisConversation]);
+
+  useEffect(() => {
+    const was = prevStreamingRef.current;
+    prevStreamingRef.current = isStreamingForThisConversation;
+    if (!was || isStreamingForThisConversation || !activeConversationId) return;
+    const { nextPos: alreadySpoken } = ttsStreamRef.current;
+    ttsStreamRef.current = { nextPos: 0, pending: [], isPlaying: false };
+    const tts = useTTSStore.getState();
+    if (tts.settings.interfaceMode !== 'audio') return;
+    const conv = useChatStore.getState().conversations.find((c) => c.id === activeConversationId);
+    const last = (conv?.messages ?? []).at(-1);
+    if (!last || last.role !== 'assistant' || last.isSystemInfo || last.toolCalls?.length || last.audioPath) return;
+    // Stamp as audio-mode. Estimate duration from word count (avg 2.5 words/sec)
+    const wordCount = last.content.split(/\s+/).filter(Boolean).length;
+    const speed = useTTSStore.getState().settings.speed || 1;
+    const estDuration = Math.max(1, wordCount / (2.5 * speed));
+    logger.log('[StreamTTS] post-stream: messageId=', last.id, 'alreadySpoken=', alreadySpoken, 'wordCount=', wordCount, 'estDuration=', estDuration);
+    useChatStore.getState().updateMessageAudio(activeConversationId, last.id, {
+      isAudioModeMessage: true,
+      audioDurationSeconds: estDuration,
+    });
+    // Only speak if a TTS engine is available
+    if (!tts.kokoroReady && !tts.isModelLoaded) { logger.log('[StreamTTS] post-stream: no TTS engine available'); return; }
+    // Strip thinking/control tokens — must match how positions were tracked during streaming
+    const cleanContent = stripMarkdownForSpeech(stripControlTokens(last.content));
+    const remaining = cleanContent.slice(alreadySpoken).trim();
+    logger.log('[StreamTTS] post-stream: remaining chars=', remaining.length, 'isSpeaking=', tts.isSpeaking, 'currentMessageId=', tts.currentMessageId);
+    if (remaining) {
+      useTTSStore.getState().speak(remaining, last.id);
+    } else if (useTTSStore.getState().currentMessageId === 'streaming') {
+      // All text was already spoken by streaming chunks — transfer ownership
+      // to the real message ID so the AudioMessageBubble's seekbar works.
+      logger.log('[StreamTTS] post-stream: transferring ownership from streaming to', last.id);
+      useTTSStore.setState({ currentMessageId: last.id });
+    }
+  }, [isStreamingForThisConversation]); // eslint-disable-line react-hooks/exhaustive-deps
 
   const startGeneration = async (targetConversationId: string, messageText: string) => {
     await startGenerationFn(genDeps, { setDebugInfo, targetConversationId, messageText });
diff --git a/src/screens/DownloadManagerScreen/index.tsx b/src/screens/DownloadManagerScreen/index.tsx
index 3829299f..46c2312f 100644
--- a/src/screens/DownloadManagerScreen/index.tsx
+++ b/src/screens/DownloadManagerScreen/index.tsx
@@ -1,5 +1,5 @@
-import React from 'react';
-import { View, Text, FlatList, TouchableOpacity, RefreshControl } from 'react-native';
+import React, { useState, useCallback } from 'react';
+import { View, Text, FlatList, TouchableOpacity, RefreshControl, ScrollView } from 'react-native';
 import { SafeAreaView } from 'react-native-safe-area-context';
 import Icon from 'react-native-vector-icons/Feather';
 import { Card } from '../../components';
@@ -7,13 +7,35 @@ import { CustomAlert, hideAlert } from '../../components/CustomAlert';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useNavigation } from '@react-navigation/native';
 import { createStyles } from './styles';
-import { ActiveDownloadCard, CompletedDownloadCard, formatBytes } from './items';
+import { ActiveDownloadCard, CompletedDownloadCard, formatBytes, type DownloadItem } from './items';
 import { useDownloadManager } from './useDownloadManager';
 
+type FilterType = 'all' | 'text' | 'vision' | 'image' | 'tts' | 'stt';
+
+const FILTERS: { id: FilterType; label: string }[] = [
+  { id: 'all',    label: 'All' },
+  { id: 'text',   label: 'Text' },
+  { id: 'vision', label: 'Vision' },
+  { id: 'image',  label: 'Image Gen' },
+  { id: 'tts',    label: 'Text to Speech' },
+  { id: 'stt',    label: 'Speech to Text' },
+];
+
+function matchesFilter(item: DownloadItem, filter: FilterType): boolean {
+  if (filter === 'all')    return true;
+  if (filter === 'vision') return item.modelType === 'text' && !!item.isVisionModel;
+  if (filter === 'text')   return item.modelType === 'text' && !item.isVisionModel;
+  if (filter === 'image')  return item.modelType === 'image';
+  if (filter === 'tts')    return item.modelType === 'tts';
+  if (filter === 'stt')    return item.modelType === 'stt';
+  return true;
+}
+
 export const DownloadManagerScreen: React.FC = () => {
   const navigation = useNavigation();
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
+  const [activeFilter, setActiveFilter] = useState<FilterType>('all');
   const {
     isRefreshing,
     activeItems,
@@ -27,6 +49,30 @@ export const DownloadManagerScreen: React.FC = () => {
     totalStorageUsed,
   } = useDownloadManager();
 
+  const filteredActive = activeItems.filter(item => matchesFilter(item, activeFilter));
+  const filteredCompleted = completedItems.filter(item => matchesFilter(item, activeFilter));
+
+  const renderHeader = useCallback(() => (
+    <ScrollView
+      horizontal
+      showsHorizontalScrollIndicator={false}
+      contentContainerStyle={styles.filterBarContent}
+    >
+      {FILTERS.map(f => {
+        const active = activeFilter === f.id;
+        return (
+          <TouchableOpacity
+            key={f.id}
+            style={[styles.filterChip, active && styles.filterChipActive]}
+            onPress={() => setActiveFilter(f.id)}
+          >
+            <Text style={[styles.filterChipText, active && styles.filterChipTextActive]}>{f.label}</Text>
+          </TouchableOpacity>
+        );
+      })}
+    </ScrollView>
+  ), [activeFilter, colors, styles]);
+
   return (
     <SafeAreaView style={styles.container} edges={['top']} testID="downloaded-models-screen">
       <View style={styles.header}>
@@ -39,52 +85,47 @@ export const DownloadManagerScreen: React.FC = () => {
 
       <FlatList
         data={[{ key: 'content' }]}
+        ListHeaderComponent={renderHeader}
         renderItem={() => (
           <View style={styles.content}>
-            {/* Active Downloads */}
-            <View style={styles.section}>
-              <View style={styles.sectionHeader}>
-                <Icon name="download" size={18} color={colors.primary} />
-                <Text style={styles.sectionTitle}>Active Downloads</Text>
-                <View style={styles.countBadge}>
-                  <Text style={styles.countText}>{activeItems.length}</Text>
+            {/* Active Downloads — only show when there are active items */}
+            {filteredActive.length > 0 && (
+              <View style={styles.section}>
+                <View style={styles.sectionHeader}>
+                  <Icon name="download" size={16} color={colors.primary} />
+                  <Text style={styles.sectionTitle}>Active Downloads</Text>
+                  <View style={styles.countBadge}>
+                    <Text style={styles.countText}>{filteredActive.length}</Text>
+                  </View>
                 </View>
-              </View>
-              {activeItems.length > 0 ? (
-                activeItems.map(item => (
+                {filteredActive.map(item => (
                   <View key={`active-${item.modelId}-${item.fileName}`}>
                     <ActiveDownloadCard item={item} onRemove={handleRemoveDownload} />
                   </View>
-                ))
-              ) : (
-                <Card style={styles.emptyCard}>
-                  <Icon name="inbox" size={32} color={colors.textMuted} />
-                  <Text style={styles.emptyText}>No active downloads</Text>
-                </Card>
-              )}
-            </View>
+                ))}
+              </View>
+            )}
 
-            {/* Completed Downloads */}
+            {/* Downloaded Models */}
             <View style={styles.section}>
               <View style={styles.sectionHeader}>
-                <Icon name="check-circle" size={18} color={colors.success} />
+                <Icon name="check-circle" size={16} color={colors.success} />
                 <Text style={styles.sectionTitle}>Downloaded Models</Text>
                 <View style={styles.countBadge}>
-                  <Text style={styles.countText}>{completedItems.length}</Text>
+                  <Text style={styles.countText}>{filteredCompleted.length}</Text>
                 </View>
               </View>
-              {completedItems.length > 0 ? (
-                completedItems.map(item => (
+              {filteredCompleted.length > 0 ? (
+                filteredCompleted.map(item => (
                   <View key={`completed-${item.modelId}-${item.fileName}`}>
                     <CompletedDownloadCard item={item} onDelete={handleDeleteItem} onRepairVision={handleRepairVision} />
                   </View>
                 ))
               ) : (
                 <Card style={styles.emptyCard}>
-                  <Icon name="package" size={32} color={colors.textMuted} />
-                  <Text style={styles.emptyText}>No models downloaded yet</Text>
-                  <Text style={styles.emptySubtext}>
-                    Go to the Models tab to browse and download models
+                  <Icon name="package" size={24} color={colors.textMuted} />
+                  <Text style={styles.emptyText}>
+                    {activeFilter === 'all' ? 'No models downloaded yet' : `No ${FILTERS.find(f => f.id === activeFilter)?.label ?? ''} models`}
                   </Text>
                 </Card>
               )}
diff --git a/src/screens/DownloadManagerScreen/items.tsx b/src/screens/DownloadManagerScreen/items.tsx
index f2d20d80..8cc45992 100644
--- a/src/screens/DownloadManagerScreen/items.tsx
+++ b/src/screens/DownloadManagerScreen/items.tsx
@@ -12,7 +12,7 @@ import { createStyles } from './styles';
 
 export type DownloadItem = {
   type: 'active' | 'completed';
-  modelType: 'text' | 'image';
+  modelType: 'text' | 'image' | 'tts' | 'stt';
   downloadId?: number;
   modelId: string;
   fileName: string;
@@ -222,9 +222,9 @@ export const CompletedDownloadCard: React.FC<CompletedDownloadCardProps> = ({ it
       <View style={styles.downloadHeader}>
         <View style={styles.modelTypeIcon}>
           <Icon
-            name={item.modelType === 'image' ? 'image' : 'message-square'}
+            name={item.modelType === 'image' ? 'image' : item.modelType === 'tts' ? 'volume-2' : item.modelType === 'stt' ? 'mic' : item.isVisionModel ? 'eye' : 'message-square'}
             size={16}
-            color={item.modelType === 'image' ? colors.info : colors.primary}
+            color={item.modelType === 'image' ? colors.info : item.modelType === 'tts' || item.modelType === 'stt' ? colors.success : item.isVisionModel ? colors.warning : colors.primary}
           />
         </View>
         <View style={styles.downloadInfo}>
diff --git a/src/screens/DownloadManagerScreen/styles.ts b/src/screens/DownloadManagerScreen/styles.ts
index 39120fa0..8f40c283 100644
--- a/src/screens/DownloadManagerScreen/styles.ts
+++ b/src/screens/DownloadManagerScreen/styles.ts
@@ -33,17 +33,17 @@ export const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
     flex: 1,
   },
   listContent: {
-    paddingTop: SPACING.lg,
+    paddingTop: SPACING.md,
     paddingBottom: SPACING.xxl,
   },
   section: {
-    marginBottom: SPACING.xl,
+    marginBottom: SPACING.md,
   },
   sectionHeader: {
     flexDirection: 'row' as const,
     alignItems: 'center' as const,
     paddingHorizontal: SPACING.lg,
-    marginBottom: SPACING.md,
+    marginBottom: SPACING.sm,
     gap: SPACING.sm,
   },
   sectionTitle: {
@@ -63,7 +63,7 @@ export const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
   },
   downloadCard: {
     marginHorizontal: SPACING.lg,
-    marginBottom: SPACING.md,
+    marginBottom: SPACING.sm,
   },
   downloadHeader: {
     flexDirection: 'row' as const,
@@ -160,19 +160,47 @@ export const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
   emptyCard: {
     marginHorizontal: SPACING.lg,
     alignItems: 'center' as const,
-    paddingVertical: SPACING.xxl,
-    gap: SPACING.sm,
+    paddingVertical: SPACING.xl,
+    gap: SPACING.xs,
   },
   emptyText: {
-    ...TYPOGRAPHY.body,
-    color: colors.textSecondary,
-    marginTop: SPACING.sm,
+    ...TYPOGRAPHY.bodySmall,
+    color: colors.textMuted,
+    marginTop: SPACING.xs,
   },
   emptySubtext: {
-    ...TYPOGRAPHY.bodySmall,
+    ...TYPOGRAPHY.meta,
     color: colors.textMuted,
     textAlign: 'center' as const,
   },
+  filterBarContent: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    paddingHorizontal: SPACING.lg,
+    paddingVertical: SPACING.sm,
+    gap: SPACING.xs,
+  },
+  filterChip: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
+    paddingHorizontal: SPACING.sm + 2,
+    paddingVertical: 5,
+    borderRadius: 12,
+    borderWidth: 1,
+    borderColor: colors.border,
+    backgroundColor: colors.background,
+  },
+  filterChipActive: {
+    borderColor: colors.primary,
+    backgroundColor: `${colors.primary}15`,
+  },
+  filterChipText: {
+    ...TYPOGRAPHY.meta,
+    color: colors.textSecondary,
+  },
+  filterChipTextActive: {
+    color: colors.primary,
+  },
   storageSection: {
     paddingHorizontal: SPACING.lg,
   },
diff --git a/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx b/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx
index ea7c9306..4d84b130 100644
--- a/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx
+++ b/src/screens/ModelSettingsScreen/ImageGenerationSection.tsx
@@ -1,7 +1,7 @@
 import React, { useState } from 'react';
 import { View, Text, Switch, Platform, TouchableOpacity } from 'react-native';
-import Slider from '@react-native-community/slider';
 import { AdvancedToggle, Card } from '../../components';
+import { NumericStepper } from '../../components/NumericStepper';
 import { Button } from '../../components/Button';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
@@ -114,49 +114,28 @@ const DetectionMethodRow: React.FC = () => {
 // ─── Advanced Section ────────────────────────────────────────────────────────
 
 const ImageAdvancedSection: React.FC = () => {
-  const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const { settings, updateSettings } = useAppStore();
 
   return (
     <>
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Guidance Scale</Text>
-          <Text style={styles.sliderValue}>{(settings?.imageGuidanceScale || 7.5).toFixed(1)}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Guidance Scale</Text>
         <Text style={styles.sliderDesc}>Higher = follows prompt more strictly</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={20}
-          step={0.5}
+        <NumericStepper
           value={settings?.imageGuidanceScale || 7.5}
-          onSlidingComplete={(value) => updateSettings({ imageGuidanceScale: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1} max={20} step={0.5} decimals={1}
+          onChange={(value) => updateSettings({ imageGuidanceScale: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Image Threads</Text>
-          <Text style={styles.sliderValue}>{settings?.imageThreads ?? 4}</Text>
-        </View>
-        <Text style={styles.sliderDesc}>
-          CPU threads used for image generation (applies on next image model load)
-        </Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={8}
-          step={1}
+        <Text style={styles.sliderLabel}>Image Threads</Text>
+        <Text style={styles.sliderDesc}>CPU threads used for image generation (applies on next image model load)</Text>
+        <NumericStepper
           value={settings?.imageThreads ?? 4}
-          onSlidingComplete={(value) => updateSettings({ imageThreads: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1} max={8} step={1}
+          onChange={(value) => updateSettings({ imageThreads: value })}
         />
       </View>
 
@@ -212,40 +191,23 @@ export const ImageGenerationSection: React.FC = () => {
       </Text>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Image Steps</Text>
-          <Text style={styles.sliderValue}>{settings?.imageSteps || 8}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Image Steps</Text>
         <Text style={styles.sliderDesc}>More steps = better quality but slower (4-8 fast, 20-50 high quality)</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={4}
-          maximumValue={50}
-          step={1}
+        <NumericStepper
           value={settings?.imageSteps || 8}
-          onSlidingComplete={(value) => updateSettings({ imageSteps: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={4} max={50} step={1}
+          onChange={(value) => updateSettings({ imageSteps: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Image Size</Text>
-          <Text style={styles.sliderValue}>{settings?.imageWidth ?? 256}x{settings?.imageHeight ?? 256}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Image Size</Text>
         <Text style={styles.sliderDesc}>Output resolution (smaller = faster, larger = more detail)</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={128}
-          maximumValue={512}
-          step={64}
+        <NumericStepper
           value={settings?.imageWidth ?? 256}
-          onSlidingComplete={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={128} max={512} step={64}
+          formatValue={(v) => `${v}x${v}`}
+          onChange={(value) => updateSettings({ imageWidth: value, imageHeight: value })}
         />
       </View>
 
diff --git a/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx b/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx
index 33faa229..e1387488 100644
--- a/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx
+++ b/src/screens/ModelSettingsScreen/TextGenerationAdvanced.tsx
@@ -1,7 +1,7 @@
 import React from 'react';
 import { View, Text, Switch, Platform } from 'react-native';
-import Slider from '@react-native-community/slider';
 import { Button } from '../../components/Button';
+import { NumericStepper } from '../../components/NumericStepper';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { CacheType } from '../../types';
@@ -52,24 +52,15 @@ const GpuSection: React.FC<GpuSectionProps> = ({
 
       {isGpuEnabled && (
         <View style={styles.sliderSection}>
-          <View style={styles.sliderHeader}>
-            <Text style={styles.sliderLabel}>GPU Layers</Text>
-            <Text style={styles.sliderValue}>{gpuLayersEffective}</Text>
-          </View>
+          <Text style={styles.sliderLabel}>GPU Layers</Text>
           <Text style={styles.sliderDesc}>
             Layers offloaded to GPU. Higher = faster but may crash on low-VRAM devices.
           </Text>
-          <Slider
-            testID="gpu-layers-slider"
-            style={styles.slider}
-            minimumValue={1}
-            maximumValue={GPU_LAYERS_MAX}
-            step={1}
+          <NumericStepper
+            testID="gpu-layers-stepper"
             value={gpuLayersEffective}
-            onSlidingComplete={(value) => updateSettings({ gpuLayers: value })}
-            minimumTrackTintColor={colors.primary}
-            maximumTrackTintColor={colors.surface}
-            thumbTintColor={colors.primary}
+            min={1} max={GPU_LAYERS_MAX} step={1}
+            onChange={(value) => updateSettings({ gpuLayers: value })}
           />
         </View>
       )}
@@ -207,78 +198,42 @@ export const TextGenerationAdvanced: React.FC = () => {
   return (
     <>
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Top P</Text>
-          <Text style={styles.sliderValue}>{(settings?.topP || 0.9).toFixed(2)}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Top P</Text>
         <Text style={styles.sliderDesc}>Nucleus sampling threshold</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={0.1}
-          maximumValue={1.0}
-          step={0.05}
+        <NumericStepper
           value={settings?.topP || 0.9}
-          onSlidingComplete={(value) => updateSettings({ topP: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={0.1} max={1.0} step={0.05} decimals={2}
+          onChange={(value) => updateSettings({ topP: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Repeat Penalty</Text>
-          <Text style={styles.sliderValue}>{(settings?.repeatPenalty || 1.1).toFixed(2)}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Repeat Penalty</Text>
         <Text style={styles.sliderDesc}>Penalize repeated tokens</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1.0}
-          maximumValue={2.0}
-          step={0.05}
+        <NumericStepper
           value={settings?.repeatPenalty || 1.1}
-          onSlidingComplete={(value) => updateSettings({ repeatPenalty: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1.0} max={2.0} step={0.05} decimals={2}
+          onChange={(value) => updateSettings({ repeatPenalty: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>CPU Threads</Text>
-          <Text style={styles.sliderValue}>{settings?.nThreads || 6}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>CPU Threads</Text>
         <Text style={styles.sliderDesc}>Parallel threads for inference</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={1}
-          maximumValue={12}
-          step={1}
+        <NumericStepper
           value={settings?.nThreads || 6}
-          onSlidingComplete={(value) => updateSettings({ nThreads: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={1} max={12} step={1}
+          onChange={(value) => updateSettings({ nThreads: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
-        <View style={styles.sliderHeader}>
-          <Text style={styles.sliderLabel}>Batch Size</Text>
-          <Text style={styles.sliderValue}>{settings?.nBatch || 256}</Text>
-        </View>
+        <Text style={styles.sliderLabel}>Batch Size</Text>
         <Text style={styles.sliderDesc}>Tokens processed per batch</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={32}
-          maximumValue={512}
-          step={32}
+        <NumericStepper
           value={settings?.nBatch || 256}
-          onSlidingComplete={(value) => updateSettings({ nBatch: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={32} max={512} step={32}
+          onChange={(value) => updateSettings({ nBatch: value })}
         />
       </View>
 
diff --git a/src/screens/ModelSettingsScreen/TextGenerationSection.tsx b/src/screens/ModelSettingsScreen/TextGenerationSection.tsx
index 5b1d9099..3ae132f4 100644
--- a/src/screens/ModelSettingsScreen/TextGenerationSection.tsx
+++ b/src/screens/ModelSettingsScreen/TextGenerationSection.tsx
@@ -1,7 +1,7 @@
 import React, { useState } from 'react';
 import { View, Text, Switch } from 'react-native';
-import Slider from '@react-native-community/slider';
 import { AdvancedToggle, Card } from '../../components';
+import { NumericStepper } from '../../components/NumericStepper';
 import { useTheme, useThemedStyles } from '../../theme';
 import { useAppStore } from '../../stores';
 import { createStyles } from './styles';
@@ -26,56 +26,40 @@ export const TextGenerationSection: React.FC = () => {
   const contextLengthLabel = contextLength >= 1024
     ? `${(contextLength / 1024).toFixed(0)}K`
     : String(contextLength);
-  const ctxSliderMax = modelMaxContext || FALLBACK_MAX_CONTEXT;
+  const ctxMax = modelMaxContext || FALLBACK_MAX_CONTEXT;
 
   return (
     <Card style={styles.section}>
       <Text style={styles.settingHelp}>Configure LLM behavior for text responses.</Text>
 
-      {/* ── Basic Settings ── */}
-
       <View style={styles.sliderSection}>
         <View style={styles.sliderHeader}>
           <Text style={styles.sliderLabel}>Temperature</Text>
-          <Text style={styles.sliderValue}>{(settings?.temperature || 0.7).toFixed(2)}</Text>
         </View>
         <Text style={styles.sliderDesc}>Higher = more creative, Lower = more focused</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={0}
-          maximumValue={2}
-          step={0.05}
+        <NumericStepper
           value={settings?.temperature || 0.7}
-          onSlidingComplete={(value) => updateSettings({ temperature: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={0} max={2} step={0.05} decimals={2}
+          onChange={(value) => updateSettings({ temperature: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
         <View style={styles.sliderHeader}>
           <Text style={styles.sliderLabel}>Max Tokens</Text>
-          <Text style={styles.sliderValue}>{maxTokensLabel}</Text>
         </View>
         <Text style={styles.sliderDesc}>Maximum response length</Text>
-        <Slider
-          style={styles.slider}
-          minimumValue={64}
-          maximumValue={8192}
-          step={64}
+        <NumericStepper
           value={maxTokens}
-          onSlidingComplete={(value) => updateSettings({ maxTokens: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={64} max={8192} step={64}
+          formatValue={() => maxTokensLabel}
+          onChange={(value) => updateSettings({ maxTokens: value })}
         />
       </View>
 
       <View style={styles.sliderSection}>
         <View style={styles.sliderHeader}>
           <Text style={styles.sliderLabel}>Context Length</Text>
-          <Text style={styles.sliderValue}>{contextLengthLabel}</Text>
         </View>
         <Text style={styles.sliderDesc}>KV cache size — larger uses more RAM (requires reload)</Text>
         {contextLength > HIGH_CONTEXT_THRESHOLD && (
@@ -83,16 +67,11 @@ export const TextGenerationSection: React.FC = () => {
             High context uses significant RAM and may crash on some devices
           </Text>
         )}
-        <Slider
-          style={styles.slider}
-          minimumValue={512}
-          maximumValue={ctxSliderMax}
-          step={1024}
+        <NumericStepper
           value={contextLength}
-          onSlidingComplete={(value) => updateSettings({ contextLength: value })}
-          minimumTrackTintColor={colors.primary}
-          maximumTrackTintColor={colors.surface}
-          thumbTintColor={colors.primary}
+          min={512} max={ctxMax} step={1024}
+          formatValue={() => contextLengthLabel}
+          onChange={(value) => updateSettings({ contextLength: value })}
         />
       </View>
 
diff --git a/src/screens/ModelSettingsScreen/index.tsx b/src/screens/ModelSettingsScreen/index.tsx
index e0aefc79..319c9302 100644
--- a/src/screens/ModelSettingsScreen/index.tsx
+++ b/src/screens/ModelSettingsScreen/index.tsx
@@ -33,6 +33,7 @@ export const ModelSettingsScreen: React.FC = () => {
       const task = InteractionManager.runAfterInteractions(() => goTo(pending));
       return () => task.cancel();
     }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
   }, []);
 
   const handleReset = () => {
diff --git a/src/screens/SettingsScreen.tsx b/src/screens/SettingsScreen.tsx
index f1cd721a..353c9b23 100644
--- a/src/screens/SettingsScreen.tsx
+++ b/src/screens/SettingsScreen.tsx
@@ -151,6 +151,7 @@ export const SettingsScreen: React.FC = () => {
               { icon: 'wifi', title: 'Remote Servers', desc: 'Connect to Ollama, LM Studio, and more', screen: 'RemoteServers' as const },
             //  { icon: 'search', title: 'Web Search', desc: 'Configure search API key for reliable results', screen: 'WebSearchSettings' as const },
               { icon: 'mic', title: 'Voice Transcription', desc: 'On-device speech to text', screen: 'VoiceSettings' as const },
+              { icon: 'volume-2', title: 'Text to Speech', desc: 'On-device voice responses', screen: 'TTSSettings' as const },
               { icon: 'lock', title: 'Security', desc: 'Passphrase and app lock', screen: 'SecuritySettings' as const },
               { icon: 'smartphone', title: 'Device Information', desc: 'Hardware and compatibility', screen: 'DeviceInfo' as const },
               { icon: 'hard-drive', title: 'Storage', desc: 'Models and data usage', screen: 'StorageSettings' as const },
diff --git a/src/screens/TTSSettingsScreen/index.tsx b/src/screens/TTSSettingsScreen/index.tsx
new file mode 100644
index 00000000..b982c6f7
--- /dev/null
+++ b/src/screens/TTSSettingsScreen/index.tsx
@@ -0,0 +1,412 @@
+import React, { useEffect, useState } from 'react';
+import { View, Text, ScrollView, TouchableOpacity, Switch, ActivityIndicator } from 'react-native';
+import { SafeAreaView } from 'react-native-safe-area-context';
+import Icon from 'react-native-vector-icons/Feather';
+import { NumericStepper } from '../../components/NumericStepper';
+import { useNavigation } from '@react-navigation/native';
+import { Card, Button } from '../../components';
+import { CustomAlert, showAlert, hideAlert, AlertState, initialAlertState } from '../../components/CustomAlert';
+import { useTheme, useThemedStyles } from '../../theme';
+import type { ThemeColors, ThemeShadows } from '../../theme';
+import { TYPOGRAPHY, SPACING } from '../../constants';
+import { useTTSStore } from '../../stores/ttsStore';
+import { hardwareService } from '../../services/hardware';
+import { TTS_BACKBONE_MODEL, TTS_WARN_RAM_GB, TTS_BLOCK_RAM_GB } from '../../constants/ttsModels';
+import { KOKORO_VOICES, isExecutorchSupported } from '../../constants/kokoroModels';
+import type { KokoroVoiceId } from '../../constants/kokoroModels';
+import type { InterfaceMode } from '../../stores/ttsStore';
+
+// ─── Sub-components ───────────────────────────────────────────────────────────
+
+type Styles = ReturnType<typeof createStyles>;
+
+const ProgressRow: React.FC<{
+  label: string;
+  sizeMB: number;
+  downloaded: boolean;
+  downloading: boolean;
+  progress: number;
+  styles: Styles;
+  colors: ThemeColors;
+  border?: boolean;
+}> = ({ label, sizeMB, downloaded, downloading, progress, styles, colors, border }) => (
+  <View>
+    <View style={[styles.modelRow, border ? styles.modelRowBorder : undefined]}>
+      <View style={styles.modelInfo}>
+        <Text style={styles.modelName}>{label}</Text>
+        <Text style={styles.modelSize}>{sizeMB} MB</Text>
+      </View>
+      {downloaded && <Icon name="check-circle" size={14} color={colors.primary} />}
+      {downloading && <Text style={styles.progressText}>{Math.round(progress * 100)}%</Text>}
+      {!downloaded && !downloading && <Icon name="download" size={14} color={colors.textMuted} />}
+    </View>
+    {downloading && (
+      <View style={styles.progressBar}>
+        <View style={[styles.progressFill, { width: `${progress * 100}%` }]} />
+      </View>
+    )}
+  </View>
+);
+
+const InterfaceModeCard: React.FC<{
+  mode: InterfaceMode;
+  deviceBlocked: boolean;
+  areBothDownloaded: boolean;
+  onModeChange: (m: InterfaceMode) => void;
+  styles: Styles;
+}> = ({ mode, deviceBlocked, areBothDownloaded, onModeChange, styles }) => (
+  <Card style={styles.section}>
+    <Text style={styles.sectionLabel}>Interface Mode</Text>
+    <Text style={styles.description}>
+      Audio Mode renders responses as voice notes. Chat Mode adds a play button to text bubbles.
+    </Text>
+    <View style={styles.modeRow}>
+      {(['chat', 'audio'] as InterfaceMode[]).map((m) => {
+        const active = mode === m;
+        const blocked = m === 'audio' && (deviceBlocked || !areBothDownloaded);
+        return (
+          <TouchableOpacity
+            key={m}
+            style={[styles.modeChip, active && styles.modeChipActive, blocked && styles.modeChipDisabled]}
+            onPress={() => onModeChange(m)}
+            disabled={blocked}
+          >
+            <Text style={[styles.modeChipText, active && styles.modeChipTextActive]}>
+              {m === 'chat' ? 'Chat' : 'Audio'}
+            </Text>
+          </TouchableOpacity>
+        );
+      })}
+    </View>
+    {!areBothDownloaded && (
+      <Text style={styles.hintText}>Download models below to enable Audio Mode.</Text>
+    )}
+  </Card>
+);
+
+const PlaybackCard: React.FC<{
+  settings: ReturnType<typeof useTTSStore.getState>['settings'];
+  onUpdate: (patch: Partial<ReturnType<typeof useTTSStore.getState>['settings']>) => void;
+  colors: ThemeColors;
+  styles: Styles;
+}> = ({ settings, onUpdate, colors, styles }) => (
+  <Card style={styles.section}>
+    <Text style={styles.sectionLabel}>Playback</Text>
+    <Text style={styles.sliderLabel}>Speed</Text>
+    <NumericStepper
+      value={settings.speed}
+      min={0.5} max={2.0} step={0.1} decimals={1}
+      formatValue={(v) => `${v.toFixed(1)}x`}
+      onChange={(v) => onUpdate({ speed: v })}
+    />
+    {settings.interfaceMode === 'chat' && (
+      <View style={[styles.toggleRow, styles.toggleRowBorder]}>
+        <View style={styles.toggleInfo}>
+          <Text style={styles.toggleTitle}>Auto-play</Text>
+          <Text style={styles.toggleDesc}>Speak AI responses automatically</Text>
+        </View>
+        <Switch
+          value={settings.autoPlay}
+          onValueChange={(v) => onUpdate({ autoPlay: v })}
+          trackColor={{ true: colors.primary }}
+        />
+      </View>
+    )}
+  </Card>
+);
+
+const CompatibilityCard: React.FC<{
+  ramGB: number;
+  deviceBlocked: boolean;
+  deviceWarning: boolean;
+  styles: Styles;
+  colors: ThemeColors;
+}> = ({ ramGB, deviceBlocked, deviceWarning, styles, colors }) => {
+  if (!deviceWarning && !deviceBlocked) { return null; }
+  return (
+    <Card style={deviceBlocked ? styles.errorCard : styles.warningCard}>
+      <View style={styles.compatRow}>
+        <Icon name="alert-triangle" size={14} color={deviceBlocked ? colors.error : colors.textSecondary} />
+        <Text style={[styles.compatText, deviceBlocked && styles.errorText]}>
+          {deviceBlocked
+            ? `TTS requires at least ${TTS_BLOCK_RAM_GB} GB RAM. Your device has ${ramGB.toFixed(1)} GB.`
+            : `Your device (${ramGB.toFixed(1)} GB RAM) may run TTS but performance could be slow. 8 GB recommended.`}
+        </Text>
+      </View>
+    </Card>
+  );
+};
+
+const KokoroCard: React.FC<{
+  kokoroReady: boolean;
+  kokoroDownloadProgress: number;
+  selectedVoiceId: KokoroVoiceId;
+  isChangingVoice: boolean;
+  onVoiceChange: (id: KokoroVoiceId) => void;
+  styles: Styles;
+  colors: ThemeColors;
+}> = ({ kokoroReady, kokoroDownloadProgress, selectedVoiceId, isChangingVoice, onVoiceChange, styles, colors }) => {
+  const supported = isExecutorchSupported();
+  return (
+    <Card style={styles.section}>
+      <View style={styles.kokoroHeader}>
+        <Text style={styles.sectionLabel}>Voice</Text>
+        {!supported && (
+          <Text style={styles.hintText}>Requires Android 13+ / iOS 17</Text>
+        )}
+        {supported && !kokoroReady && kokoroDownloadProgress > 0 && (
+          <Text style={styles.hintText}>{Math.round(kokoroDownloadProgress * 100)}%</Text>
+        )}
+        {supported && !kokoroReady && kokoroDownloadProgress === 0 && (
+          <ActivityIndicator size="small" color={colors.textMuted} />
+        )}
+        {supported && kokoroReady && (
+          <Icon name="check-circle" size={14} color={colors.primary} />
+        )}
+      </View>
+      <Text style={styles.description}>
+        Fast on-device voice synthesis. Used for the speak button in Chat Mode.
+      </Text>
+      {KOKORO_VOICES.map((voice, i) => {
+        const active = selectedVoiceId === voice.id;
+        return (
+          <TouchableOpacity
+            key={voice.id}
+            style={[styles.voiceRow, i > 0 && styles.voiceRowBorder]}
+            onPress={() => onVoiceChange(voice.id)}
+            disabled={!supported}
+          >
+            <View style={styles.voiceInfo}>
+              <Text style={styles.voiceName}>{voice.label}</Text>
+              <Text style={styles.voiceMeta}>{voice.accent} · {voice.gender}</Text>
+            </View>
+            {active && (
+              isChangingVoice
+                ? <ActivityIndicator size="small" color={colors.primary} />
+                : <Icon name="check" size={14} color={colors.primary} />
+            )}
+          </TouchableOpacity>
+        );
+      })}
+    </Card>
+  );
+};
+
+// ─── Main screen ──────────────────────────────────────────────────────────────
+
+export const TTSSettingsScreen: React.FC = () => {
+  const navigation = useNavigation();
+  const { colors } = useTheme();
+  const styles = useThemedStyles(createStyles);
+  const [alertState, setAlertState] = useState<AlertState>(initialAlertState);
+  const [ramGB, setRamGB] = useState<number>(8);
+
+  const {
+    isBackboneDownloaded, isVocoderDownloaded,
+    isDownloadingBackbone, isDownloadingVocoder,
+    backboneDownloadProgress, vocoderDownloadProgress,
+    isModelLoaded, isModelLoading,
+    audioCacheSizeMB, settings, error,
+    kokoroReady, kokoroDownloadProgress, kokoroActiveVoiceId,
+    downloadModels, deleteModels, loadModels, unloadModels,
+    checkDownloadStatus, refreshCacheSize, clearAudioCache, updateSettings, clearError,
+  } = useTTSStore();
+
+  useEffect(() => {
+    setRamGB(hardwareService.getTotalMemoryGB());
+    checkDownloadStatus();
+    refreshCacheSize();
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const areBothDownloaded = isBackboneDownloaded && isVocoderDownloaded;
+  const isDownloading = isDownloadingBackbone || isDownloadingVocoder;
+  const deviceBlocked = ramGB < TTS_BLOCK_RAM_GB;
+  const deviceWarning = !deviceBlocked && ramGB < TTS_WARN_RAM_GB;
+  const totalSizeMB = TTS_BACKBONE_MODEL.backboneSizeMB + TTS_BACKBONE_MODEL.vocoderSizeMB;
+
+  const handleDelete = () => {
+    setAlertState(
+      showAlert('Remove TTS Models', 'This will delete both model files and disable text-to-speech.', [
+        { text: 'Cancel', style: 'cancel' },
+        { text: 'Remove', style: 'destructive', onPress: () => { setAlertState(hideAlert()); deleteModels(); } },
+      ]),
+    );
+  };
+
+  const handleClearCache = () => {
+    setAlertState(
+      showAlert('Clear Audio Cache', `This will delete ${audioCacheSizeMB.toFixed(1)} MB of cached audio.`, [
+        { text: 'Cancel', style: 'cancel' },
+        { text: 'Clear', style: 'destructive', onPress: () => { setAlertState(hideAlert()); clearAudioCache(); } },
+      ]),
+    );
+  };
+
+  const handleModeChange = (mode: InterfaceMode) => {
+    if (mode === 'audio' && deviceBlocked) { return; }
+    updateSettings({ interfaceMode: mode });
+    if (mode === 'audio' && !isModelLoaded && areBothDownloaded) { loadModels(); }
+    if (mode === 'chat' && isModelLoaded) { unloadModels(); }
+  };
+
+  return (
+    <SafeAreaView style={styles.container} edges={['top']}>
+      <View style={styles.header}>
+        <TouchableOpacity style={styles.backButton} onPress={() => navigation.goBack()}>
+          <Icon name="arrow-left" size={20} color={colors.text} />
+        </TouchableOpacity>
+        <Text style={styles.title}>Text to Speech</Text>
+        {isModelLoading && <ActivityIndicator size="small" color={colors.primary} />}
+      </View>
+
+      <ScrollView style={styles.scrollView} contentContainerStyle={styles.content}>
+
+        <InterfaceModeCard
+          mode={settings.interfaceMode}
+          deviceBlocked={deviceBlocked}
+          areBothDownloaded={areBothDownloaded}
+          onModeChange={handleModeChange}
+          styles={styles}
+        />
+
+        {settings.interfaceMode === 'chat' && (
+          <Card style={styles.section}>
+            <View style={styles.toggleRow}>
+              <View style={styles.toggleInfo}>
+                <Text style={styles.toggleTitle}>Enable TTS</Text>
+                <Text style={styles.toggleDesc}>Show play buttons on assistant messages</Text>
+              </View>
+              <Switch value={settings.enabled} onValueChange={(v) => updateSettings({ enabled: v })} trackColor={{ true: colors.primary }} />
+            </View>
+          </Card>
+        )}
+
+        <Card style={styles.section}>
+          <Text style={styles.sectionLabel}>Models ({totalSizeMB} MB total)</Text>
+          <ProgressRow label="Voice model" sizeMB={TTS_BACKBONE_MODEL.backboneSizeMB}
+            downloaded={isBackboneDownloaded} downloading={isDownloadingBackbone}
+            progress={backboneDownloadProgress} styles={styles} colors={colors} />
+          <ProgressRow label="Audio decoder" sizeMB={TTS_BACKBONE_MODEL.vocoderSizeMB}
+            downloaded={isVocoderDownloaded} downloading={isDownloadingVocoder}
+            progress={vocoderDownloadProgress} styles={styles} colors={colors} border />
+          <View style={styles.downloadActions}>
+            {areBothDownloaded
+              ? <Button title="Remove Models" variant="outline" size="small" onPress={handleDelete} style={styles.removeButton} />
+              : <Button title={isDownloading ? 'Downloading...' : `Download (${totalSizeMB} MB)`}
+                  variant="primary" size="small" onPress={downloadModels} disabled={isDownloading || deviceBlocked} />}
+          </View>
+          {error && <TouchableOpacity onPress={clearError}><Text style={styles.error}>{error}</Text></TouchableOpacity>}
+        </Card>
+
+        <KokoroCard
+          kokoroReady={kokoroReady}
+          kokoroDownloadProgress={kokoroDownloadProgress}
+          selectedVoiceId={settings.kokoroVoiceId as KokoroVoiceId}
+          isChangingVoice={(settings.kokoroVoiceId as KokoroVoiceId) !== kokoroActiveVoiceId}
+          onVoiceChange={(id) => updateSettings({ kokoroVoiceId: id })}
+          styles={styles}
+          colors={colors}
+        />
+
+        {(areBothDownloaded || kokoroReady) && (
+          <PlaybackCard settings={settings} onUpdate={updateSettings} colors={colors} styles={styles} />
+        )}
+
+        {settings.interfaceMode === 'audio' && (
+          <Card style={styles.section}>
+            <View style={styles.toggleRow}>
+              <View style={styles.toggleInfo}>
+                <Text style={styles.toggleTitle}>Audio cache</Text>
+                <Text style={styles.toggleDesc}>{audioCacheSizeMB.toFixed(1)} MB</Text>
+              </View>
+              <Button title="Clear" variant="outline" size="small" onPress={handleClearCache} disabled={audioCacheSizeMB === 0} />
+            </View>
+          </Card>
+        )}
+
+        <CompatibilityCard ramGB={ramGB} deviceBlocked={deviceBlocked} deviceWarning={deviceWarning} styles={styles} colors={colors} />
+
+        <Card style={styles.privacyCard}>
+          <Icon name="shield" size={18} color={colors.textSecondary} style={styles.privacyIcon} />
+          <Text style={styles.privacyTitle}>Fully private</Text>
+          <Text style={styles.privacyText}>
+            All speech is generated on your device. Nothing is sent to any server.
+          </Text>
+        </Card>
+
+      </ScrollView>
+
+      <CustomAlert visible={alertState.visible} title={alertState.title}
+        message={alertState.message} buttons={alertState.buttons}
+        onClose={() => setAlertState(hideAlert())} />
+    </SafeAreaView>
+  );
+};
+
+const createStyles = (colors: ThemeColors, shadows: ThemeShadows) =>
+  ({
+    container: { flex: 1, backgroundColor: colors.background },
+    header: {
+      flexDirection: 'row' as const, alignItems: 'center' as const,
+      paddingHorizontal: SPACING.lg, paddingVertical: SPACING.md,
+      borderBottomWidth: 1, borderBottomColor: colors.border,
+      backgroundColor: colors.surface, ...shadows.small, zIndex: 1, gap: SPACING.md,
+    },
+    backButton: { padding: SPACING.xs },
+    title: { ...TYPOGRAPHY.h2, flex: 1, color: colors.text },
+    scrollView: { flex: 1 },
+    content: { paddingHorizontal: SPACING.lg, paddingTop: SPACING.lg, paddingBottom: SPACING.xxl },
+    section: { marginBottom: SPACING.lg },
+    sectionLabel: {
+      ...TYPOGRAPHY.label, textTransform: 'uppercase' as const, color: colors.textMuted,
+      letterSpacing: 0.3, marginBottom: SPACING.sm,
+    },
+    description: { ...TYPOGRAPHY.bodySmall, color: colors.textSecondary, lineHeight: 18, marginBottom: SPACING.md },
+    modeRow: { flexDirection: 'row' as const, gap: SPACING.sm },
+    modeChip: {
+      flex: 1, paddingVertical: SPACING.sm, borderRadius: 8, borderWidth: 1,
+      borderColor: colors.border, alignItems: 'center' as const, backgroundColor: colors.surfaceLight,
+    },
+    modeChipActive: { backgroundColor: colors.primary, borderColor: colors.primary },
+    modeChipDisabled: { opacity: 0.4 },
+    modeChipText: { ...TYPOGRAPHY.bodySmall, color: colors.textSecondary },
+    modeChipTextActive: { color: colors.background },
+    hintText: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: SPACING.sm },
+    toggleRow: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const },
+    toggleRowBorder: { paddingTop: SPACING.md, marginTop: SPACING.md, borderTopWidth: 1, borderTopColor: colors.border },
+    toggleInfo: { flex: 1, marginRight: SPACING.md },
+    toggleTitle: { ...TYPOGRAPHY.body, color: colors.text },
+    toggleDesc: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: 2 },
+    modelRow: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const, paddingVertical: SPACING.sm },
+    modelRowBorder: { borderTopWidth: 1, borderTopColor: colors.border, marginTop: SPACING.xs },
+    modelInfo: { flex: 1 },
+    modelName: { ...TYPOGRAPHY.body, color: colors.text },
+    modelSize: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: 2 },
+    progressText: { ...TYPOGRAPHY.meta, color: colors.primary },
+    progressBar: { height: 4, backgroundColor: colors.surfaceLight, borderRadius: 2, overflow: 'hidden' as const, marginBottom: SPACING.xs },
+    progressFill: { height: '100%' as const, backgroundColor: colors.primary, borderRadius: 2 },
+    downloadActions: { marginTop: SPACING.md },
+    removeButton: { borderColor: colors.error },
+    error: { ...TYPOGRAPHY.bodySmall, color: colors.error, marginTop: SPACING.md, textAlign: 'center' as const },
+    sliderRow: { flexDirection: 'row' as const, justifyContent: 'space-between' as const, alignItems: 'center' as const, marginBottom: SPACING.xs },
+    sliderLabel: { ...TYPOGRAPHY.body, color: colors.text },
+    sliderValue: { ...TYPOGRAPHY.body, color: colors.primary },
+    sliderMarks: { flexDirection: 'row' as const, justifyContent: 'space-between' as const, marginBottom: SPACING.xs },
+    sliderMark: { ...TYPOGRAPHY.meta, color: colors.textMuted },
+    compatRow: { flexDirection: 'row' as const, alignItems: 'flex-start' as const, gap: SPACING.sm },
+    compatText: { ...TYPOGRAPHY.bodySmall, color: colors.textSecondary, flex: 1, lineHeight: 18 },
+    errorText: { color: colors.error },
+    warningCard: { marginBottom: SPACING.lg, borderColor: colors.border },
+    errorCard: { marginBottom: SPACING.lg, borderColor: colors.error },
+    privacyCard: { alignItems: 'center' as const, backgroundColor: colors.surface, borderWidth: 1, borderColor: colors.border },
+    privacyIcon: { marginBottom: SPACING.sm },
+    privacyTitle: { ...TYPOGRAPHY.h3, color: colors.text, marginBottom: SPACING.sm },
+    privacyText: { ...TYPOGRAPHY.body, color: colors.textSecondary, textAlign: 'center' as const, lineHeight: 20 },
+    kokoroHeader: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const, marginBottom: SPACING.xs },
+    voiceRow: { flexDirection: 'row' as const, alignItems: 'center' as const, justifyContent: 'space-between' as const, paddingVertical: SPACING.sm },
+    voiceRowBorder: { borderTopWidth: 1, borderTopColor: colors.border },
+    voiceInfo: { flex: 1 },
+    voiceName: { ...TYPOGRAPHY.body, color: colors.text },
+    voiceMeta: { ...TYPOGRAPHY.meta, color: colors.textMuted, marginTop: 2 },
+  });
diff --git a/src/screens/VoiceSettingsScreen.tsx b/src/screens/VoiceSettingsScreen.tsx
index 491176b3..f69ace94 100644
--- a/src/screens/VoiceSettingsScreen.tsx
+++ b/src/screens/VoiceSettingsScreen.tsx
@@ -1,9 +1,10 @@
-import React, { useState } from 'react';
+import React, { useState, useCallback, useRef } from 'react';
 import {
   View,
   Text,
   ScrollView,
   TouchableOpacity,
+  TextInput,
   ActivityIndicator,
 } from 'react-native';
 import { SafeAreaView } from 'react-native-safe-area-context';
@@ -16,128 +17,361 @@ import type { ThemeColors, ThemeShadows } from '../theme';
 import { TYPOGRAPHY, SPACING } from '../constants';
 import { useWhisperStore } from '../stores';
 import { WHISPER_MODELS } from '../services';
+import { huggingFaceService } from '../services/huggingface';
+import logger from '../utils/logger';
+
+// ─── Types ────────────────────────────────────────────────────────────────────
+
+interface HFRepo {
+  id: string;
+  author: string;
+  downloads: number;
+}
+
+interface HFFile {
+  name: string;
+  downloadUrl: string;
+  sizeMb: number;
+}
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+const ENGLISH_MODELS = WHISPER_MODELS.filter(m => m.lang === 'en');
+const MULTI_MODELS = WHISPER_MODELS.filter(m => m.lang === 'multi');
+
+function formatSize(mb: number): string {
+  if (mb >= 1000) return `${(mb / 1000).toFixed(1)} GB`;
+  return `${mb} MB`;
+}
+
+// ─── Sub-components ───────────────────────────────────────────────────────────
+
+interface ModelRowProps {
+  id: string;
+  name: string;
+  sizeMb: number;
+  description: string;
+  isDownloaded: boolean;
+  isDownloading: boolean;
+  downloadProgress: number;
+  onDownload: () => void;
+}
+
+const ModelRow: React.FC<ModelRowProps> = ({ id, name, sizeMb, description, isDownloaded, isDownloading, downloadProgress, onDownload }) => {
+  const { colors } = useTheme();
+  const styles = useThemedStyles(createStyles);
+  if (isDownloaded) {
+    return (
+      <View style={styles.modelRow} testID={`model-row-${id}`}>
+        <View style={styles.modelRowInfo}>
+          <Text style={styles.modelRowName}>{name}</Text>
+          <Text style={styles.modelRowDesc}>{description}</Text>
+        </View>
+        <View style={[styles.badge, styles.badgeDownloaded]}>
+          <Icon name="check" size={11} color={colors.primary} />
+          <Text style={[styles.badgeText, { color: colors.primary }]}>Active</Text>
+        </View>
+      </View>
+    );
+  }
+  if (isDownloading) {
+    return (
+      <View style={styles.modelRow}>
+        <View style={styles.modelRowInfo}>
+          <Text style={styles.modelRowName}>{name}</Text>
+          <Text style={styles.modelRowDesc}>{Math.round(downloadProgress * 100)}%</Text>
+        </View>
+        <ActivityIndicator size="small" color={colors.primary} />
+      </View>
+    );
+  }
+  return (
+    <TouchableOpacity style={styles.modelRow} onPress={onDownload} testID={`model-download-${id}`}>
+      <View style={styles.modelRowInfo}>
+        <Text style={styles.modelRowName}>{name}</Text>
+        <Text style={styles.modelRowDesc}>{description}</Text>
+      </View>
+      <View style={styles.modelRowRight}>
+        <Text style={styles.modelRowSize}>{formatSize(sizeMb)}</Text>
+        <Icon name="download" size={14} color={colors.textMuted} />
+      </View>
+    </TouchableOpacity>
+  );
+};
+
+// ─── Main Screen ──────────────────────────────────────────────────────────────
 
 export const VoiceSettingsScreen: React.FC = () => {
   const navigation = useNavigation();
   const { colors } = useTheme();
   const styles = useThemedStyles(createStyles);
   const [alertState, setAlertState] = useState<AlertState>(initialAlertState);
+  const [searchQuery, setSearchQuery] = useState('');
+  const [hfRepos, setHfRepos] = useState<HFRepo[]>([]);
+  const [hfFiles, setHfFiles] = useState<Record<string, HFFile[]>>({});
+  const [expandedRepo, setExpandedRepo] = useState<string | null>(null);
+  const [isSearching, setIsSearching] = useState(false);
+  const [loadingFiles, setLoadingFiles] = useState<string | null>(null);
+  const debounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+
   const {
-    downloadedModelId: whisperModelId,
-    isDownloading: isWhisperDownloading,
-    downloadProgress: whisperProgress,
-    downloadModel: downloadWhisperModel,
-    deleteModel: deleteWhisperModel,
+    downloadedModelId,
+    isDownloading,
+    downloadProgress,
+    downloadModel,
+    downloadFromUrl,
+    deleteModel,
     error: whisperError,
-    clearError: clearWhisperError,
+    clearError,
   } = useWhisperStore();
 
+  const currentModel = WHISPER_MODELS.find(m => m.id === downloadedModelId);
+
+  const handleSearch = useCallback((q: string) => {
+    setSearchQuery(q);
+    if (debounceRef.current) clearTimeout(debounceRef.current);
+    if (!q.trim()) { setHfRepos([]); return; }
+    debounceRef.current = setTimeout(async () => {
+      setIsSearching(true);
+      try {
+        const results = await huggingFaceService.searchWhisperRepos(q);
+        setHfRepos(results);
+      } catch (err) {
+        logger.error('[VoiceSettings] HF search error:', err);
+      } finally {
+        setIsSearching(false);
+      }
+    }, 500);
+  }, []);
+
+  const handleExpandRepo = useCallback(async (repoId: string) => {
+    if (expandedRepo === repoId) { setExpandedRepo(null); return; }
+    setExpandedRepo(repoId);
+    if (hfFiles[repoId]) return;
+    setLoadingFiles(repoId);
+    try {
+      const files = await huggingFaceService.getWhisperFiles(repoId);
+      setHfFiles(prev => ({ ...prev, [repoId]: files }));
+    } catch (err) {
+      logger.error('[VoiceSettings] Failed to fetch repo files:', err);
+    } finally {
+      setLoadingFiles(null);
+    }
+  }, [expandedRepo, hfFiles]);
+
+  const handleDownloadHfFile = useCallback((file: HFFile, repoId: string) => {
+    const modelId = `hf-${repoId.replace('/', '-')}-${file.name.replace('.bin', '')}`;
+    setAlertState(showAlert(
+      'Download Model',
+      `Download "${file.name}" (${formatSize(file.sizeMb)}) from ${repoId}?`,
+      [
+        { text: 'Cancel', style: 'cancel' },
+        {
+          text: 'Download',
+          onPress: () => {
+            setAlertState(hideAlert());
+            downloadFromUrl(file.downloadUrl, modelId).catch((err) => {
+              logger.error('[VoiceSettings] Custom download failed:', err);
+            });
+          },
+        },
+      ],
+    ));
+  }, [downloadFromUrl]);
+
+  const confirmDelete = () => {
+    setAlertState(showAlert(
+      'Remove Voice Model',
+      'This will disable voice input until you download a model again.',
+      [
+        { text: 'Cancel', style: 'cancel' },
+        {
+          text: 'Remove',
+          style: 'destructive',
+          onPress: () => { setAlertState(hideAlert()); deleteModel(); },
+        },
+      ],
+    ));
+  };
+
+  const filteredEnglish = searchQuery
+    ? ENGLISH_MODELS.filter(m => m.name.toLowerCase().includes(searchQuery.toLowerCase()))
+    : ENGLISH_MODELS;
+
+  const filteredMulti = searchQuery
+    ? MULTI_MODELS.filter(m => m.name.toLowerCase().includes(searchQuery.toLowerCase()) || 'multilingual'.includes(searchQuery.toLowerCase()))
+    : MULTI_MODELS;
+
   return (
     <SafeAreaView style={styles.container} edges={['top']}>
       <View style={styles.header}>
-        <TouchableOpacity
-          style={styles.backButton}
-          onPress={() => navigation.goBack()}
-        >
+        <TouchableOpacity style={styles.backButton} onPress={() => navigation.goBack()}>
           <Icon name="arrow-left" size={20} color={colors.text} />
         </TouchableOpacity>
         <Text style={styles.title}>Voice Transcription</Text>
       </View>
 
-      <ScrollView style={styles.scrollView} contentContainerStyle={styles.content}>
-        <Card style={styles.section}>
-          <Text style={styles.description}>
-            Download a Whisper model to enable on-device voice input. All transcription happens locally - no data is sent to any server.
-          </Text>
-
-          {(() => {
-            if (whisperModelId) {
-              return (
-                <View style={styles.modelInfo}>
-                  <View style={styles.modelHeader}>
-                    <Text style={styles.modelName}>
-                      {WHISPER_MODELS.find(m => m.id === whisperModelId)?.name || whisperModelId}
-                    </Text>
-                    <Text style={styles.modelStatus}>Downloaded</Text>
-                  </View>
-                  <Button
-                    title="Remove Model"
-                    variant="outline"
-                    size="small"
-                    onPress={() => {
-                      setAlertState(showAlert(
-                        'Remove Whisper Model',
-                        'This will disable voice input until you download a model again.',
-                        [
-                          { text: 'Cancel', style: 'cancel' },
-                          {
-                            text: 'Remove',
-                            style: 'destructive',
-                            onPress: () => {
-                              setAlertState(hideAlert());
-                              deleteWhisperModel();
-                            },
-                          },
-                        ]
-                      ));
-                    }}
-                    style={styles.removeButton}
-                  />
-                </View>
-              );
-            }
-            if (isWhisperDownloading) {
-              return (
-                <View style={styles.downloading}>
-                  <ActivityIndicator size="small" color={colors.primary} />
-                  <Text style={styles.downloadingText}>
-                    Downloading... {Math.round(whisperProgress * 100)}%
-                  </Text>
-                  <View style={styles.progressBar}>
-                    <View
-                      style={[styles.progressFill, { width: `${whisperProgress * 100}%` }]}
-                    />
-                  </View>
+      <ScrollView style={styles.scrollView} contentContainerStyle={styles.content} keyboardShouldPersistTaps="handled">
+
+        {/* ── Current model ── */}
+        {downloadedModelId && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>ACTIVE MODEL</Text>
+            <View style={styles.currentModelRow}>
+              <View style={styles.modelRowInfo}>
+                <Text style={styles.modelRowName}>
+                  {currentModel ? `${currentModel.name} — ${currentModel.lang === 'en' ? 'English' : 'Multilingual'}` : downloadedModelId}
+                </Text>
+                {currentModel && <Text style={styles.modelRowDesc}>{currentModel.description}</Text>}
+              </View>
+              <Button
+                title="Remove"
+                variant="outline"
+                size="small"
+                onPress={confirmDelete}
+                style={styles.removeButton}
+              />
+            </View>
+            {isDownloading && (
+              <View style={styles.progressWrap}>
+                <View style={styles.progressBar}>
+                  <View style={[styles.progressFill, { width: `${downloadProgress * 100}%` }]} />
                 </View>
-              );
-            }
-            return (
-              <View style={styles.modelList}>
-                <Text style={styles.selectLabel}>Select a model to download:</Text>
-                {WHISPER_MODELS.slice(0, 3).map((model) => (
-                  <TouchableOpacity
-                    key={model.id}
-                    style={styles.modelOption}
-                    onPress={() => downloadWhisperModel(model.id)}
-                  >
-                    <View style={styles.modelOptionInfo}>
-                      <Text style={styles.modelOptionName}>{model.name}</Text>
-                      <Text style={styles.modelOptionSize}>{model.size} MB</Text>
-                    </View>
-                    <Text style={styles.modelOptionDesc}>{model.description}</Text>
-                  </TouchableOpacity>
-                ))}
+                <Text style={styles.progressText}>{Math.round(downloadProgress * 100)}%</Text>
               </View>
-            );
-          })()}
-
-          {whisperError && (
-            <TouchableOpacity onPress={clearWhisperError}>
-              <Text style={styles.error}>{whisperError}</Text>
-            </TouchableOpacity>
-          )}
-        </Card>
-
-        <Card style={styles.privacyCard}>
-          <View style={styles.privacyIconContainer}>
-            <Icon name="mic" size={18} color={colors.textSecondary} />
-          </View>
-          <Text style={styles.privacyTitle}>Privacy First</Text>
-          <Text style={styles.privacyText}>
-            Voice transcription happens entirely on your device. Your audio is never sent to any server or stored anywhere.
-          </Text>
-        </Card>
+            )}
+          </Card>
+        )}
+
+        {/* ── Download progress when no model yet ── */}
+        {!downloadedModelId && isDownloading && (
+          <Card style={styles.section}>
+            <View style={styles.downloadingRow}>
+              <ActivityIndicator size="small" color={colors.primary} />
+              <Text style={styles.downloadingText}>Downloading... {Math.round(downloadProgress * 100)}%</Text>
+            </View>
+            <View style={styles.progressBar}>
+              <View style={[styles.progressFill, { width: `${downloadProgress * 100}%` }]} />
+            </View>
+          </Card>
+        )}
+
+        {/* ── Error ── */}
+        {whisperError && (
+          <TouchableOpacity onPress={clearError}>
+            <Text style={styles.error}>{whisperError} (tap to dismiss)</Text>
+          </TouchableOpacity>
+        )}
+
+        {/* ── Search bar ── */}
+        <View style={styles.searchBar}>
+          <Icon name="search" size={16} color={colors.textMuted} />
+          <TextInput
+            style={styles.searchInput}
+            value={searchQuery}
+            onChangeText={handleSearch}
+            placeholder="Search models or HuggingFace..."
+            placeholderTextColor={colors.textMuted}
+            autoCapitalize="none"
+            autoCorrect={false}
+            clearButtonMode="while-editing"
+          />
+          {isSearching && <ActivityIndicator size="small" color={colors.primary} />}
+        </View>
+
+        {/* ── Curated: English ── */}
+        {filteredEnglish.length > 0 && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>ENGLISH ONLY</Text>
+            {filteredEnglish.map((model, idx) => (
+              <React.Fragment key={model.id}>
+                {idx > 0 && <View style={styles.divider} />}
+                <ModelRow
+                  id={model.id}
+                  name={model.name}
+                  sizeMb={model.size}
+                  description={model.description}
+                  isDownloaded={downloadedModelId === model.id}
+                  isDownloading={isDownloading && downloadedModelId === model.id}
+                  downloadProgress={downloadProgress}
+                  onDownload={() => downloadModel(model.id)}
+                />
+              </React.Fragment>
+            ))}
+          </Card>
+        )}
+
+        {/* ── Curated: Multilingual ── */}
+        {filteredMulti.length > 0 && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>MULTILINGUAL — 99 LANGUAGES</Text>
+            {filteredMulti.map((model, idx) => (
+              <React.Fragment key={model.id}>
+                {idx > 0 && <View style={styles.divider} />}
+                <ModelRow
+                  id={model.id}
+                  name={model.name}
+                  sizeMb={model.size}
+                  description={model.description}
+                  isDownloaded={downloadedModelId === model.id}
+                  isDownloading={isDownloading && downloadedModelId === model.id}
+                  downloadProgress={downloadProgress}
+                  onDownload={() => downloadModel(model.id)}
+                />
+              </React.Fragment>
+            ))}
+          </Card>
+        )}
+
+        {/* ── HuggingFace search results ── */}
+        {hfRepos.length > 0 && (
+          <Card style={styles.section}>
+            <Text style={styles.sectionLabel}>HUGGINGFACE RESULTS</Text>
+            {hfRepos.map((repo, idx) => (
+              <React.Fragment key={repo.id}>
+                {idx > 0 && <View style={styles.divider} />}
+                <TouchableOpacity style={styles.repoRow} onPress={() => handleExpandRepo(repo.id)}>
+                  <View style={styles.modelRowInfo}>
+                    <Text style={styles.modelRowName} numberOfLines={1}>{repo.id}</Text>
+                    <Text style={styles.modelRowDesc}>{(repo.downloads / 1000).toFixed(0)}k downloads</Text>
+                  </View>
+                  {loadingFiles === repo.id
+                    ? <ActivityIndicator size="small" color={colors.textMuted} />
+                    : <Icon name={expandedRepo === repo.id ? 'chevron-up' : 'chevron-down'} size={16} color={colors.textMuted} />
+                  }
+                </TouchableOpacity>
+                {expandedRepo === repo.id && (
+                  <View style={styles.repoFiles}>
+                    {hfFiles[repo.id]?.length === 0 && (
+                      <Text style={styles.noFilesText}>No ggml .bin files found in this repo.</Text>
+                    )}
+                    {hfFiles[repo.id]?.map((file) => (
+                      <TouchableOpacity
+                        key={file.name}
+                        style={styles.fileRow}
+                        onPress={() => handleDownloadHfFile(file, repo.id)}
+                      >
+                        <Text style={styles.fileName} numberOfLines={1}>{file.name}</Text>
+                        <View style={styles.modelRowRight}>
+                          <Text style={styles.modelRowSize}>{formatSize(file.sizeMb)}</Text>
+                          <Icon name="download" size={13} color={colors.textMuted} />
+                        </View>
+                      </TouchableOpacity>
+                    ))}
+                  </View>
+                )}
+              </React.Fragment>
+            ))}
+          </Card>
+        )}
+
+        {/* ── Privacy note ── */}
+        <View style={styles.privacyNote}>
+          <Icon name="lock" size={13} color={colors.textMuted} />
+          <Text style={styles.privacyText}>All transcription runs on-device. Audio is never sent to any server.</Text>
+        </View>
       </ScrollView>
+
       <CustomAlert
         visible={alertState.visible}
         title={alertState.title}
@@ -149,11 +383,10 @@ export const VoiceSettingsScreen: React.FC = () => {
   );
 };
 
+// ─── Styles ───────────────────────────────────────────────────────────────────
+
 const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
-  container: {
-    flex: 1,
-    backgroundColor: colors.background,
-  },
+  container: { flex: 1, backgroundColor: colors.background },
   header: {
     flexDirection: 'row' as const,
     alignItems: 'center' as const,
@@ -166,148 +399,79 @@ const createStyles = (colors: ThemeColors, shadows: ThemeShadows) => ({
     zIndex: 1,
     gap: SPACING.md,
   },
-  backButton: {
-    padding: SPACING.xs,
-  },
-  title: {
-    ...TYPOGRAPHY.h2,
-    flex: 1,
-    color: colors.text,
-  },
-  scrollView: {
-    flex: 1,
-  },
-  content: {
-    paddingHorizontal: SPACING.lg,
-    paddingTop: SPACING.lg,
-    paddingBottom: SPACING.xxl,
-  },
-  section: {
-    marginBottom: SPACING.lg,
-  },
-  description: {
-    ...TYPOGRAPHY.bodySmall,
-    color: colors.textSecondary,
-    lineHeight: 18,
-    marginBottom: SPACING.lg,
-  },
-  modelInfo: {
-    backgroundColor: colors.surfaceLight,
-    borderRadius: 8,
-    borderWidth: 1,
-    borderColor: colors.border,
-    padding: SPACING.lg,
+  backButton: { padding: SPACING.xs },
+  title: { ...TYPOGRAPHY.h2, flex: 1, color: colors.text },
+  scrollView: { flex: 1 },
+  content: { paddingHorizontal: SPACING.lg, paddingTop: SPACING.lg, paddingBottom: SPACING.xxl, gap: SPACING.md },
+  section: { gap: SPACING.xs },
+  sectionLabel: {
+    ...TYPOGRAPHY.label,
+    color: colors.textMuted,
+    textTransform: 'uppercase' as const,
+    letterSpacing: 0.5,
+    marginBottom: SPACING.xs,
   },
-  modelHeader: {
+  currentModelRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.md },
+  modelRow: { flexDirection: 'row' as const, alignItems: 'center' as const, paddingVertical: SPACING.sm, gap: SPACING.md },
+  modelRowInfo: { flex: 1, gap: 2 },
+  modelRowName: { ...TYPOGRAPHY.body, color: colors.text },
+  modelRowDesc: { ...TYPOGRAPHY.bodySmall, color: colors.textMuted, lineHeight: 16 },
+  modelRowRight: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.xs },
+  modelRowSize: { ...TYPOGRAPHY.meta, color: colors.textMuted },
+  badge: {
     flexDirection: 'row' as const,
-    justifyContent: 'space-between' as const,
     alignItems: 'center' as const,
-    marginBottom: SPACING.md,
-  },
-  modelName: {
-    ...TYPOGRAPHY.body,
-    color: colors.text,
-  },
-  modelStatus: {
-    ...TYPOGRAPHY.label,
-    textTransform: 'uppercase' as const,
-    color: colors.primary,
-    backgroundColor: `${colors.primary  }20`,
+    gap: 3,
     paddingHorizontal: SPACING.sm,
-    paddingVertical: SPACING.xs,
+    paddingVertical: 3,
     borderRadius: 6,
   },
-  removeButton: {
-    borderColor: colors.error,
-  },
-  downloading: {
-    alignItems: 'center' as const,
-    padding: SPACING.lg,
-  },
-  downloadingText: {
-    ...TYPOGRAPHY.body,
-    color: colors.textSecondary,
-    marginTop: SPACING.sm,
-  },
+  badgeDownloaded: { backgroundColor: `${colors.primary}18` },
+  badgeText: { ...TYPOGRAPHY.meta },
+  removeButton: { borderColor: colors.error, flexShrink: 1 },
+  progressWrap: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.sm, marginTop: SPACING.sm },
   progressBar: {
-    width: '100%' as const,
-    height: 6,
+    flex: 1,
+    height: 4,
     backgroundColor: colors.surfaceLight,
-    borderRadius: 3,
-    marginTop: SPACING.md,
+    borderRadius: 2,
     overflow: 'hidden' as const,
   },
-  progressFill: {
-    height: '100%' as const,
-    backgroundColor: colors.primary,
-    borderRadius: 3,
-  },
-  modelList: {
+  progressFill: { height: '100%' as const, backgroundColor: colors.primary, borderRadius: 2 },
+  progressText: { ...TYPOGRAPHY.meta, color: colors.textMuted, minWidth: 36 },
+  downloadingRow: { flexDirection: 'row' as const, alignItems: 'center' as const, gap: SPACING.sm, marginBottom: SPACING.sm },
+  downloadingText: { ...TYPOGRAPHY.body, color: colors.textSecondary },
+  error: { ...TYPOGRAPHY.bodySmall, color: colors.error, textAlign: 'center' as const, paddingHorizontal: SPACING.sm },
+  searchBar: {
+    flexDirection: 'row' as const,
+    alignItems: 'center' as const,
     gap: SPACING.sm,
-  },
-  selectLabel: {
-    ...TYPOGRAPHY.label,
-    textTransform: 'uppercase' as const,
-    color: colors.textMuted,
-    marginBottom: SPACING.sm,
-    letterSpacing: 0.3,
-  },
-  modelOption: {
-    backgroundColor: colors.surfaceLight,
-    borderRadius: 8,
-    padding: SPACING.md,
+    backgroundColor: colors.surface,
+    borderRadius: 10,
     borderWidth: 1,
     borderColor: colors.border,
+    paddingHorizontal: SPACING.md,
+    paddingVertical: SPACING.sm,
+    ...shadows.small,
   },
-  modelOptionInfo: {
+  searchInput: { ...TYPOGRAPHY.body, flex: 1, color: colors.text, padding: 0 },
+  divider: { height: 1, backgroundColor: colors.border, marginVertical: 2 },
+  repoRow: { flexDirection: 'row' as const, alignItems: 'center' as const, paddingVertical: SPACING.sm, gap: SPACING.md },
+  repoFiles: { paddingLeft: SPACING.md, paddingBottom: SPACING.xs, gap: 4 },
+  fileRow: {
     flexDirection: 'row' as const,
-    justifyContent: 'space-between' as const,
-    alignItems: 'center' as const,
-    marginBottom: SPACING.xs,
-  },
-  modelOptionName: {
-    ...TYPOGRAPHY.body,
-    color: colors.text,
-  },
-  modelOptionSize: {
-    ...TYPOGRAPHY.meta,
-    color: colors.primary,
-  },
-  modelOptionDesc: {
-    ...TYPOGRAPHY.bodySmall,
-    color: colors.textMuted,
-    lineHeight: 18,
-  },
-  error: {
-    ...TYPOGRAPHY.bodySmall,
-    color: colors.error,
-    marginTop: SPACING.md,
-    textAlign: 'center' as const,
-  },
-  privacyCard: {
     alignItems: 'center' as const,
-    backgroundColor: colors.surface,
-    borderWidth: 1,
-    borderColor: colors.border,
+    paddingVertical: SPACING.xs,
+    gap: SPACING.md,
   },
-  privacyIconContainer: {
-    width: 36,
-    height: 36,
-    borderRadius: 18,
-    backgroundColor: 'transparent',
+  fileName: { ...TYPOGRAPHY.bodySmall, flex: 1, color: colors.textSecondary },
+  noFilesText: { ...TYPOGRAPHY.bodySmall, color: colors.textMuted, paddingVertical: SPACING.xs },
+  privacyNote: {
+    flexDirection: 'row' as const,
     alignItems: 'center' as const,
+    gap: SPACING.xs,
     justifyContent: 'center' as const,
-    marginBottom: SPACING.md,
-  },
-  privacyTitle: {
-    ...TYPOGRAPHY.h3,
-    color: colors.text,
-    marginBottom: SPACING.sm,
-  },
-  privacyText: {
-    ...TYPOGRAPHY.body,
-    color: colors.textSecondary,
-    textAlign: 'center' as const,
-    lineHeight: 20,
+    paddingTop: SPACING.sm,
   },
+  privacyText: { ...TYPOGRAPHY.meta, color: colors.textMuted },
 });
diff --git a/src/screens/index.ts b/src/screens/index.ts
index 49f37020..2fee9d28 100644
--- a/src/screens/index.ts
+++ b/src/screens/index.ts
@@ -17,6 +17,7 @@ export { PassphraseSetupScreen } from './PassphraseSetupScreen';
 export { DownloadManagerScreen } from './DownloadManagerScreen';
 export { ModelSettingsScreen } from './ModelSettingsScreen';
 export { VoiceSettingsScreen } from './VoiceSettingsScreen';
+export { TTSSettingsScreen } from './TTSSettingsScreen';
 export { DeviceInfoScreen } from './DeviceInfoScreen';
 export { StorageSettingsScreen } from './StorageSettingsScreen';
 export { SecuritySettingsScreen } from './SecuritySettingsScreen';
diff --git a/src/services/audioRecorderService.ts b/src/services/audioRecorderService.ts
new file mode 100644
index 00000000..7beda22f
--- /dev/null
+++ b/src/services/audioRecorderService.ts
@@ -0,0 +1,99 @@
+import { AudioRecorder, FileFormat, FileDirectory, BitDepth, IOSAudioQuality, FlacCompressionLevel } from 'react-native-audio-api';
+import { PermissionsAndroid, Platform } from 'react-native';
+import logger from '../utils/logger';
+
+/** Supported formats for llama.rn audio input */
+export type AudioInputFormat = 'wav' | 'mp3';
+
+class AudioRecorderService {
+  private recorder: AudioRecorder | null = null;
+  private isRecording = false;
+
+  supportsDirectAudioInput(): boolean {
+    return true;
+  }
+
+  getFormat(): AudioInputFormat {
+    return 'wav';
+  }
+
+  async requestPermissions(): Promise<boolean> {
+    if (Platform.OS === 'android') {
+      try {
+        const granted = await PermissionsAndroid.request(
+          PermissionsAndroid.PERMISSIONS.RECORD_AUDIO,
+          {
+            title: 'Microphone Permission',
+            message: 'This app needs microphone access for voice input.',
+            buttonPositive: 'OK',
+            buttonNegative: 'Cancel',
+          },
+        );
+        return granted === PermissionsAndroid.RESULTS.GRANTED;
+      } catch {
+        return false;
+      }
+    }
+    return true; // iOS: triggered by AVAudioSession on first use
+  }
+
+  async startRecording(): Promise<void> {
+    if (this.isRecording) {
+      await this.stopRecording().catch(() => {});
+    }
+    const hasPermission = await this.requestPermissions();
+    if (!hasPermission) {
+      throw new Error('Microphone permission denied');
+    }
+    const rec = new AudioRecorder();
+    // Whisper requires 16 kHz mono int16 PCM.
+    // Set sampleRate via preset so the WAV header and data match what whisper.rn expects.
+    rec.enableFileOutput({
+      format: FileFormat.Wav,
+      directory: FileDirectory.Document,
+      subDirectory: 'audio-input',
+      fileNamePrefix: `input_${Date.now()}`,
+      channelCount: 1,
+      preset: {
+        sampleRate: 16000,
+        bitDepth: BitDepth.Bit16,
+        bitRate: 256000,
+        iosQuality: IOSAudioQuality.High,
+        flacCompressionLevel: FlacCompressionLevel.L5,
+      },
+    });
+    this.recorder = rec;
+    this.isRecording = true;
+    rec.start();
+    logger.log('[AudioRecorder] Recording started');
+  }
+
+  async stopRecording(): Promise<{ path: string; durationSeconds: number }> {
+    if (!this.isRecording || !this.recorder) {
+      throw new Error('No active recording');
+    }
+    const result = this.recorder.stop();
+    this.isRecording = false;
+    this.recorder = null;
+    if (result.status !== 'success') {
+      throw new Error('Recording failed to save');
+    }
+    const path = result.path;
+    const durationSeconds = (result as any).duration ?? 0;
+    logger.log('[AudioRecorder] Saved to:', path, 'duration:', durationSeconds);
+    return { path, durationSeconds };
+  }
+
+  cancelRecording(): void {
+    if (!this.isRecording || !this.recorder) return;
+    this.recorder.stop();
+    this.isRecording = false;
+    this.recorder = null;
+  }
+
+  isCurrentlyRecording(): boolean {
+    return this.isRecording;
+  }
+}
+
+export const audioRecorderService = new AudioRecorderService();
diff --git a/src/services/generationToolLoop.ts b/src/services/generationToolLoop.ts
index e5b78f2c..964b3163 100644
--- a/src/services/generationToolLoop.ts
+++ b/src/services/generationToolLoop.ts
@@ -29,19 +29,36 @@ function parseToolCallBody(body: string, idSuffix: number): ToolCall | null {
   } catch { /* Not JSON — fall through to XML */ }
   return parseXmlStyleToolCall(body, idSuffix);
 }
-/** Parse tool calls from text output (fallback for small models). Supports JSON and XML-like formats. */
+/** Parse <invoke name="fn"><parameter name="k">v</parameter></invoke> blocks (minimax, Anthropic-style). */
+function parseInvokeBlocks(text: string, toolCalls: ToolCall[], matchedRanges: [number, number][]): void {
+  const invokePattern = /<invoke\s+name="([^"]+)">([\s\S]*?)<\/invoke>/g;
+  let match;
+  while ((match = invokePattern.exec(text)) !== null) {
+    const name = match[1];
+    const args: Record<string, any> = {};
+    const paramPattern = /<parameter\s+name="([^"]+)">([\s\S]*?)<\/parameter>/g;
+    let pm;
+    while ((pm = paramPattern.exec(match[2])) !== null) { args[pm[1]] = pm[2].trim(); }
+    toolCalls.push({ id: `text-tc-${Date.now()}-${toolCalls.length}`, name, arguments: args });
+    matchedRanges.push([match.index, match.index + match[0].length]);
+  }
+}
+
+/** Parse tool calls from text output (fallback for small models). Supports JSON, XML, and invoke formats. */
 export function parseToolCallsFromText(text: string): { cleanText: string; toolCalls: ToolCall[] } {
   const toolCalls: ToolCall[] = [];
+  const matchedRanges: [number, number][] = [];
+
+  // 1. Standard <tool_call>...</tool_call> blocks (JSON or XML body)
   const closedPattern = /<tool_call>([\s\S]*?)<\/tool_call>/g;
   let match;
-  const matchedRanges: [number, number][] = [];
   while ((match = closedPattern.exec(text)) !== null) {
     matchedRanges.push([match.index, match.index + match[0].length]);
     const call = parseToolCallBody(match[1].trim(), toolCalls.length);
     if (call) { toolCalls.push(call); }
     else { logger.log(`[ToolLoop] Failed to parse tool_call tag: ${match[1].trim().substring(0, 100)}`); }
   }
-  // Also match unclosed <tool_call> at end of text (model hit EOS without closing tag)
+  // Unclosed <tool_call> at end of text (model hit EOS without closing tag)
   const unclosedMatch = /<tool_call>([\s\S]+)$/.exec(text);
   if (unclosedMatch) {
     const unclosedStart = text.lastIndexOf(unclosedMatch[0]);
@@ -52,6 +69,21 @@ export function parseToolCallsFromText(text: string): { cleanText: string; toolC
       matchedRanges.push([unclosedStart, text.length]);
     }
   }
+
+  // 2. <invoke name="...">...</invoke> blocks (minimax, Anthropic-style)
+  parseInvokeBlocks(text, toolCalls, matchedRanges);
+
+  // 3. Namespaced wrapper blocks: namespace:tool_call ... </namespace:tool_call>
+  const nsPattern = /[\w]+:tool_call[\s\S]*?<\/[\w]+:tool_call>/g;
+  while ((match = nsPattern.exec(text)) !== null) {
+    const alreadyMatched = matchedRanges.some(([s, e]) => match!.index >= s && match!.index < e);
+    if (!alreadyMatched) {
+      // Parse invoke blocks within this namespace wrapper
+      parseInvokeBlocks(match[0], toolCalls, []);
+      matchedRanges.push([match.index, match.index + match[0].length]);
+    }
+  }
+
   // Remove all matched ranges from text (reverse order to preserve indices)
   matchedRanges.sort((a, b) => b[0] - a[0]);
   let cleanText = text;
@@ -207,9 +239,17 @@ async function callLLMWithRetry(
   return callLocalWithRetry(messages, tools, onStream);
 }
 
-/** If no structured tool calls, try parsing <tool_call> tags from text. */
+/** Detect if text contains any tool call pattern (various model formats). */
+function containsToolCallMarkup(text: string): boolean {
+  return text.includes('<tool_call>') ||
+    text.includes('<invoke') ||
+    /\w+:tool_call/.test(text) ||
+    text.includes('<function_call>');
+}
+
+/** If no structured tool calls, try parsing tool call markup from text. */
 function resolveToolCalls(fullResponse: string, toolCalls: ToolCall[]) {
-  if (toolCalls.length > 0 || !fullResponse.includes('<tool_call>'))
+  if (toolCalls.length > 0 || !containsToolCallMarkup(fullResponse))
     return { effectiveToolCalls: toolCalls, displayResponse: fullResponse };
   const parsed = parseToolCallsFromText(fullResponse);
   if (parsed.toolCalls.length > 0) {
diff --git a/src/services/huggingface.ts b/src/services/huggingface.ts
index a91cfcc3..5f38f81b 100644
--- a/src/services/huggingface.ts
+++ b/src/services/huggingface.ts
@@ -223,6 +223,46 @@ class HuggingFaceService {
     };
   }
 
+  /** Search HuggingFace for Whisper/ASR models (returns repos that may contain ggml .bin files). */
+  async searchWhisperRepos(query: string, limit = 20): Promise<Array<{ id: string; author: string; downloads: number; lastModified?: string }>> {
+    const params = new URLSearchParams({
+      search: query || 'whisper',
+      pipeline_tag: 'automatic-speech-recognition',
+      sort: 'downloads',
+      direction: '-1',
+      limit: limit.toString(),
+    });
+    try {
+      const results = await this.fetchJson<HFModelSearchResult[]>(`${this.apiUrl}/models?${params.toString()}`);
+      return results.map(r => ({
+        id: r.id,
+        author: r.author || r.id.split('/')[0] || '',
+        downloads: r.downloads || 0,
+        lastModified: r.lastModified,
+      }));
+    } catch {
+      return [];
+    }
+  }
+
+  /** Fetch ggml-compatible .bin files from any HuggingFace model repo tree. */
+  async getWhisperFiles(modelId: string): Promise<Array<{ name: string; downloadUrl: string; sizeMb: number }>> {
+    try {
+      const files: Array<{ type: string; path: string; size?: number; lfs?: { size: number } }> =
+        await this.fetchJson(`${this.apiUrl}/models/${modelId}/tree/main`);
+      return files
+        .filter(f => f.type === 'file' && f.path.endsWith('.bin') && f.path.toLowerCase().includes('ggml'))
+        .map(f => ({
+          name: f.path.split('/').pop() || f.path,
+          downloadUrl: `${this.baseUrl}/${modelId}/resolve/main/${f.path}`,
+          sizeMb: Math.round((f.lfs?.size || f.size || 0) / (1024 * 1024)),
+        }))
+        .sort((a, b) => a.sizeMb - b.sizeMb);
+    } catch {
+      return [];
+    }
+  }
+
 }
 
 export const huggingFaceService = new HuggingFaceService();
diff --git a/src/services/index.ts b/src/services/index.ts
index b4fe5fcd..bde487b8 100644
--- a/src/services/index.ts
+++ b/src/services/index.ts
@@ -8,6 +8,8 @@ export type { Intent } from './intentClassifier';
 export { voiceService } from './voiceService';
 export { authService } from './authService';
 export { whisperService, WHISPER_MODELS } from './whisperService';
+export { ttsService } from './ttsService';
+export type { TTSOptions, GeneratedAudio } from './ttsService';
 export type { TranscriptionResult, TranscriptionCallback } from './whisperService';
 export { backgroundDownloadService } from './backgroundDownloadService';
 export { activeModelService } from './activeModelService';
diff --git a/src/services/llm.ts b/src/services/llm.ts
index 1fdcf145..bbb549ed 100644
--- a/src/services/llm.ts
+++ b/src/services/llm.ts
@@ -287,8 +287,8 @@ class LLMService {
     return { gpu: this.gpuEnabled, gpuBackend: resolveGpuBackend(this.gpuEnabled, this.gpuDevices), gpuLayers: this.activeGpuLayers, reasonNoGPU: this.gpuReason };
   }
   isCurrentlyGenerating(): boolean { return this.isGenerating; }
-  private formatMessages(messages: Message[]): string { return formatLlamaMessages(messages, this.supportsVision()); }
-  private convertToOAIMessages(messages: Message[]): RNLlamaOAICompatibleMessage[] { return buildOAIMessages(messages); }
+  private formatMessages(messages: Message[]): string { return formatLlamaMessages(messages, this.supportsVision(), this.multimodalSupport?.audio ?? false); }
+  private convertToOAIMessages(messages: Message[]): RNLlamaOAICompatibleMessage[] { return buildOAIMessages(messages, this.multimodalSupport?.audio ?? false); }
   async getModelInfo() { return this.context ? { contextLength: APP_CONFIG.maxContextLength, vocabSize: 0 } : null; }
   async tokenize(text: string) {
     if (!this.context) throw new Error('No model loaded');
diff --git a/src/services/llmMessages.ts b/src/services/llmMessages.ts
index c359651b..1e93f6e7 100644
--- a/src/services/llmMessages.ts
+++ b/src/services/llmMessages.ts
@@ -1,19 +1,21 @@
 import { RNLlamaOAICompatibleMessage, RNLlamaMessagePart } from 'llama.rn';
 import { Message } from '../types';
 
-export function formatLlamaMessages(messages: Message[], supportsVision: boolean): string {
+export function formatLlamaMessages(messages: Message[], supportsVision: boolean, supportsAudio = false): string {
   let prompt = '';
   for (const message of messages.filter(m => !m.isSystemInfo)) {
     if (message.role === 'system') {
       prompt += `<|im_start|>system\n${message.content}<|im_end|>\n`;
     } else if (message.role === 'user') {
       let content = message.content;
-      if (message.attachments && message.attachments.length > 0 && supportsVision) {
-        const imageMarkers = message.attachments
-          .filter(a => a.type === 'image')
-          .map(() => '<__media__>')
-          .join('');
-        content = imageMarkers + content;
+      if (message.attachments && message.attachments.length > 0) {
+        const imageMarkers = supportsVision
+          ? message.attachments.filter(a => a.type === 'image').map(() => '<__media__>').join('')
+          : '';
+        const audioMarkers = supportsAudio
+          ? message.attachments.filter(a => a.type === 'audio').map(() => '<__media__>').join('')
+          : '';
+        content = imageMarkers + audioMarkers + content;
       }
       prompt += `<|im_start|>user\n${content}<|im_end|>\n`;
     } else if (message.role === 'assistant') {
@@ -48,45 +50,38 @@ function formatToolCallAsText(tc: { name: string; arguments: string }): string {
   return `<tool_call>{"name":${escapedName},"arguments":${tc.arguments}}</tool_call>`;
 }
 
-export function buildOAIMessages(messages: Message[]): RNLlamaOAICompatibleMessage[] {
-  const filtered = messages.filter(m => !m.isSystemInfo);
-  return filtered.map((message) => {
-    // Flatten tool result messages into user messages —
-    // avoids role:"tool" which some Jinja templates don't handle
+function toFileUrl(uri: string, requireFilePrefix = false): string {
+  if (requireFilePrefix) return uri.startsWith('file://') ? uri : `file://${uri}`;
+  return uri.startsWith('file://') || uri.startsWith('http') ? uri : `file://${uri}`;
+}
+
+function buildMediaParts(message: Message, supportsAudio: boolean): RNLlamaMessagePart[] {
+  const parts: RNLlamaMessagePart[] = [];
+  for (const a of message.attachments?.filter(att => att.type === 'image') ?? []) {
+    parts.push({ type: 'image_url', image_url: { url: toFileUrl(a.uri) } });
+  }
+  if (supportsAudio) {
+    for (const a of message.attachments?.filter(att => att.type === 'audio') ?? []) {
+      parts.push({ type: 'input_audio', input_audio: { format: a.audioFormat ?? 'wav', url: toFileUrl(a.uri, true) } });
+    }
+  }
+  if (message.content) parts.push({ type: 'text', text: message.content });
+  return parts;
+}
+
+export function buildOAIMessages(messages: Message[], supportsAudio = false): RNLlamaOAICompatibleMessage[] {
+  return messages.filter(m => !m.isSystemInfo).map((message) => {
     if (message.role === 'tool') {
       const label = message.toolName || 'tool';
-      return {
-        role: 'user' as const,
-        content: `[Tool Result: ${label}]\n${message.content}\n[End Tool Result]`,
-      };
+      return { role: 'user' as const, content: `[Tool Result: ${label}]\n${message.content}\n[End Tool Result]` };
     }
-
-    // Flatten assistant tool calls into plain text —
-    // structured tool_calls in history cause Jinja/C++ conflicts
     if (message.role === 'assistant' && message.toolCalls?.length) {
       const toolCallText = message.toolCalls.map(formatToolCallAsText).join('\n');
-      const content = message.content
-        ? `${message.content}\n${toolCallText}`
-        : toolCallText;
-      return { role: 'assistant' as const, content };
-    }
-
-    const imageAttachments = message.attachments?.filter(a => a.type === 'image') || [];
-    if (imageAttachments.length === 0 || message.role !== 'user') {
-      return { role: message.role, content: message.content };
-    }
-
-    const contentParts: RNLlamaMessagePart[] = [];
-    for (const attachment of imageAttachments) {
-      let imagePath = attachment.uri;
-      if (!imagePath.startsWith('file://') && !imagePath.startsWith('http')) {
-        imagePath = `file://${imagePath}`;
-      }
-      contentParts.push({ type: 'image_url', image_url: { url: imagePath } });
-    }
-    if (message.content) {
-      contentParts.push({ type: 'text', text: message.content });
+      return { role: 'assistant' as const, content: message.content ? `${message.content}\n${toolCallText}` : toolCallText };
     }
-    return { role: message.role, content: contentParts };
+    const hasImage = message.role === 'user' && message.attachments?.some(a => a.type === 'image');
+    const hasAudio = supportsAudio && message.role === 'user' && message.attachments?.some(a => a.type === 'audio');
+    if (!hasImage && !hasAudio) return { role: message.role, content: message.content };
+    return { role: message.role, content: buildMediaParts(message, supportsAudio) };
   });
 }
diff --git a/src/services/ttsService.ts b/src/services/ttsService.ts
new file mode 100644
index 00000000..1fd9506a
--- /dev/null
+++ b/src/services/ttsService.ts
@@ -0,0 +1,354 @@
+import { initLlama, LlamaContext } from 'llama.rn';
+import RNFS from 'react-native-fs';
+import { AudioContext, AudioBufferSourceNode } from 'react-native-audio-api';
+import logger from '../utils/logger';
+import { TTS_BACKBONE_MODEL } from '../constants/ttsModels';
+
+export interface TTSOptions {
+  speed?: number;
+  voiceId?: string;
+}
+
+export interface GeneratedAudio {
+  samples: Float32Array;
+  durationSeconds: number;
+  sampleRate: number;
+  /** Downsampled amplitude envelope (~200 points) for waveform visualization */
+  waveformData: number[];
+}
+
+class TTSService {
+  private context: LlamaContext | null = null;
+  private isVocoderReady = false;
+  private isSpeakingFlag = false;
+  private audioCtx: AudioContext | null = null;
+  private currentSource: AudioBufferSourceNode | null = null;
+  private contextLoadPromise: Promise<void> = Promise.resolve();
+
+  // ─── Paths ────────────────────────────────────────────────────────────────
+
+  getModelsDir(): string {
+    return `${RNFS.DocumentDirectoryPath}/tts-models`;
+  }
+
+  getAudioCacheDir(conversationId: string): string {
+    return `${RNFS.DocumentDirectoryPath}/audio-cache/${conversationId}`;
+  }
+
+  getAudioFilePath(conversationId: string, messageId: string): string {
+    return `${this.getAudioCacheDir(conversationId)}/${messageId}.pcm`;
+  }
+
+  getBackbonePath(): string {
+    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.backboneFile}`;
+  }
+
+  getVocoderPath(): string {
+    return `${this.getModelsDir()}/${TTS_BACKBONE_MODEL.vocoderFile}`;
+  }
+
+  private async ensureDir(dir: string): Promise<void> {
+    if (!(await RNFS.exists(dir))) {
+      await RNFS.mkdir(dir);
+    }
+  }
+
+  // ─── Download Status ─────────────────────────────────────────────────────
+
+  async isBackboneDownloaded(): Promise<boolean> {
+    return RNFS.exists(this.getBackbonePath());
+  }
+
+  async isVocoderDownloaded(): Promise<boolean> {
+    return RNFS.exists(this.getVocoderPath());
+  }
+
+  async areBothModelsDownloaded(): Promise<boolean> {
+    return (await this.isBackboneDownloaded()) && (await this.isVocoderDownloaded());
+  }
+
+  async isAudioCached(conversationId: string, messageId: string): Promise<boolean> {
+    return RNFS.exists(this.getAudioFilePath(conversationId, messageId));
+  }
+
+  async getAudioCacheSizeMB(): Promise<number> {
+    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
+    if (!(await RNFS.exists(cacheRoot))) return 0;
+    let totalBytes = 0;
+    const convDirs = await RNFS.readDir(cacheRoot);
+    for (const convDir of convDirs) {
+      if (convDir.isDirectory()) {
+        const files = await RNFS.readDir(convDir.path);
+        for (const file of files) { totalBytes += Number(file.size); }
+      }
+    }
+    return totalBytes / (1024 * 1024);
+  }
+
+  async clearAudioCache(): Promise<void> {
+    const cacheRoot = `${RNFS.DocumentDirectoryPath}/audio-cache`;
+    if (await RNFS.exists(cacheRoot)) {
+      await RNFS.unlink(cacheRoot);
+    }
+  }
+
+  // ─── Download ────────────────────────────────────────────────────────────
+
+  async downloadBackbone(onProgress?: (p: number) => void): Promise<string> {
+    await this.ensureDir(this.getModelsDir());
+    const dest = this.getBackbonePath();
+    if (await RNFS.exists(dest)) {
+      return dest;
+    }
+    const dl = RNFS.downloadFile({
+      fromUrl: TTS_BACKBONE_MODEL.backboneUrl,
+      toFile: dest,
+      progressDivider: 1,
+      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
+    });
+    const result = await dl.promise;
+    if (result.statusCode !== 200) {
+      await RNFS.unlink(dest).catch(() => {});
+      throw new Error(`Backbone download failed: HTTP ${result.statusCode}`);
+    }
+    return dest;
+  }
+
+  async downloadVocoder(onProgress?: (p: number) => void): Promise<string> {
+    await this.ensureDir(this.getModelsDir());
+    const dest = this.getVocoderPath();
+    if (await RNFS.exists(dest)) {
+      return dest;
+    }
+    const dl = RNFS.downloadFile({
+      fromUrl: TTS_BACKBONE_MODEL.vocoderUrl,
+      toFile: dest,
+      progressDivider: 1,
+      progress: (res) => onProgress?.(res.bytesWritten / res.contentLength),
+    });
+    const result = await dl.promise;
+    if (result.statusCode !== 200) {
+      await RNFS.unlink(dest).catch(() => {});
+      throw new Error(`Vocoder download failed: HTTP ${result.statusCode}`);
+    }
+    return dest;
+  }
+
+  async deleteModels(): Promise<void> {
+    await this.unloadModels();
+    const bp = this.getBackbonePath();
+    const vp = this.getVocoderPath();
+    if (await RNFS.exists(bp)) {
+      await RNFS.unlink(bp);
+    }
+    if (await RNFS.exists(vp)) {
+      await RNFS.unlink(vp);
+    }
+  }
+
+  // ─── Model Lifecycle ─────────────────────────────────────────────────────
+
+  async loadModels(): Promise<void> {
+    if (this.context && this.isVocoderReady) {
+      return;
+    }
+    // Serial load — prevent double init
+    this.contextLoadPromise = this.contextLoadPromise.then(async () => {
+      if (this.context && this.isVocoderReady) {
+        return;
+      }
+      logger.log('[TTS] Loading backbone...');
+      this.context = await initLlama({
+        model: this.getBackbonePath(),
+        n_ctx: 8192,
+        n_threads: 4,
+      });
+      logger.log('[TTS] Loading vocoder...');
+      await this.context.initVocoder({ path: this.getVocoderPath(), n_batch: 4096 });
+      this.isVocoderReady = await this.context.isVocoderEnabled();
+      if (!this.isVocoderReady) {
+        throw new Error('Vocoder failed to initialize — check model files.');
+      }
+      logger.log('[TTS] Ready.');
+    });
+    return this.contextLoadPromise;
+  }
+
+  async unloadModels(): Promise<void> {
+    this.stop();
+    if (this.context) {
+      await this.context.releaseVocoder().catch(() => {});
+      await this.context.release().catch(() => {});
+      this.context = null;
+    }
+    this.isVocoderReady = false;
+    this.audioCtx?.close().catch(() => {});
+    this.audioCtx = null;
+  }
+
+  isLoaded(): boolean {
+    return this.context !== null && this.isVocoderReady;
+  }
+
+  // ─── Audio Generation ────────────────────────────────────────────────────
+
+  async generate(text: string, _options: TTSOptions = {}): Promise<GeneratedAudio> {
+    if (!this.context || !this.isVocoderReady) {
+      throw new Error('TTS models not loaded.');
+    }
+    const { prompt, grammar } = await this.context.getFormattedAudioCompletion(
+      null, // null = default speaker
+      text,
+    );
+    const guideTokens = (await this.context.getAudioCompletionGuideTokens(text)) ?? [];
+    const result = await this.context.completion({
+      prompt,
+      grammar,
+      guide_tokens: guideTokens,
+      n_predict: 4096,
+      temperature: 0.7,
+      top_p: 0.9,
+      stop: ['<|im_end|>'],
+    });
+    const pcmArray = await this.context.decodeAudioTokens(result.audio_tokens ?? []);
+    const samples = new Float32Array(pcmArray);
+    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
+    return {
+      samples,
+      durationSeconds: samples.length / sampleRate,
+      sampleRate,
+      waveformData: this.buildWaveformData(samples, 200),
+    };
+  }
+
+  async saveToFile(audio: GeneratedAudio, conversationId: string, messageId: string): Promise<string> {
+    await this.ensureDir(this.getAudioCacheDir(conversationId));
+    const path = this.getAudioFilePath(conversationId, messageId);
+    const base64 = this.float32ToBase64(audio.samples);
+    await RNFS.writeFile(path, base64, 'base64');
+    return path;
+  }
+
+  async generateAndSave(
+    text: string,
+    ctx: { conversationId: string; messageId: string },
+    options: TTSOptions = {},
+  ): Promise<{ path: string; audio: GeneratedAudio }> {
+    const audio = await this.generate(text, options);
+    const path = await this.saveToFile(audio, ctx.conversationId, ctx.messageId);
+    return { path, audio };
+  }
+
+  // ─── Playback ────────────────────────────────────────────────────────────
+
+  async playFromSamples(samples: Float32Array, speed = 1.0, startOffset = 0): Promise<void> {
+    const sampleRate = TTS_BACKBONE_MODEL.sampleRate;
+    this.audioCtx?.close().catch(() => {});
+    this.audioCtx = new AudioContext({ sampleRate });
+    const buffer = this.audioCtx.createBuffer(1, samples.length, sampleRate);
+    buffer.copyToChannel(samples, 0);
+    const source = this.audioCtx.createBufferSource();
+    source.buffer = buffer;
+    source.playbackRate.value = speed;
+    source.connect(this.audioCtx.destination);
+    this.currentSource = source;
+    this.isSpeakingFlag = true;
+    return new Promise((resolve) => {
+      source.onEnded = () => {
+        this.currentSource = null;
+        this.isSpeakingFlag = false;
+        resolve();
+      };
+      source.start(0, startOffset);
+    });
+  }
+
+  async playFromFile(filePath: string, speed = 1.0, startOffset = 0): Promise<void> {
+    // WAV/PCM files must be decoded with decodeAudioData — NOT cast from raw bytes.
+    // The old base64→Float32 path was designed for OuteTTS raw Float32 output only.
+    this.audioCtx?.close().catch(() => {});
+    this.audioCtx = new AudioContext();
+    const src = filePath.startsWith('file://') ? filePath : `file://${filePath}`;
+    // decodeAudioData accepts a string path as DecodeDataInput
+    const buffer = await this.audioCtx.decodeAudioData(src as unknown as ArrayBuffer);
+    const source = this.audioCtx.createBufferSource();
+    source.buffer = buffer;
+    source.playbackRate.value = speed;
+    source.connect(this.audioCtx.destination);
+    this.currentSource = source;
+    this.isSpeakingFlag = true;
+    return new Promise((resolve) => {
+      source.onEnded = () => {
+        this.currentSource = null;
+        this.isSpeakingFlag = false;
+        resolve();
+      };
+      source.start(0, startOffset);
+    });
+  }
+
+  /** Chat Mode: generate + play + discard. No disk write.
+   *  @param onStartPlayback  Called once generation is done and audio is about to play.
+   */
+  async speak(text: string, options: TTSOptions = {}, onStartPlayback?: () => void): Promise<void> {
+    this.stop();
+    this.isSpeakingFlag = true; // mark in-progress so stop() during generation works
+    try {
+      const audio = await this.generate(text, options);
+      if (!this.isSpeakingFlag) return; // stop() was called during generation
+      onStartPlayback?.();
+      await this.playFromSamples(audio.samples, options.speed ?? 1.0);
+    } finally {
+      this.isSpeakingFlag = false;
+    }
+  }
+
+  stop(): void {
+    this.isSpeakingFlag = false;
+    try {
+      this.currentSource?.stop();
+    } catch {
+      // already stopped
+    }
+    this.currentSource = null;
+  }
+
+  isSpeaking(): boolean {
+    return this.isSpeakingFlag;
+  }
+
+  // ─── Utilities ───────────────────────────────────────────────────────────
+
+  private buildWaveformData(samples: Float32Array, points: number): number[] {
+    const blockSize = Math.floor(samples.length / points);
+    const result: number[] = [];
+    for (let i = 0; i < points; i++) {
+      let sum = 0;
+      for (let j = 0; j < blockSize; j++) {
+        sum += Math.abs(samples[i * blockSize + j] ?? 0);
+      }
+      result.push(blockSize > 0 ? sum / blockSize : 0);
+    }
+    return result;
+  }
+
+  private float32ToBase64(samples: Float32Array): string {
+    const uint8 = new Uint8Array(samples.buffer);
+    let binary = '';
+    for (let i = 0; i < uint8.length; i++) {
+      binary += String.fromCharCode(uint8[i]);
+    }
+    return btoa(binary);
+  }
+
+  private base64ToFloat32(base64: string): Float32Array {
+    const binary = atob(base64);
+    const uint8 = new Uint8Array(binary.length);
+    for (let i = 0; i < binary.length; i++) {
+      uint8[i] = binary.charCodeAt(i);
+    }
+    return new Float32Array(uint8.buffer);
+  }
+}
+
+export const ttsService = new TTSService();
diff --git a/src/services/whisperService.ts b/src/services/whisperService.ts
index 4d945423..d1b77cd5 100644
--- a/src/services/whisperService.ts
+++ b/src/services/whisperService.ts
@@ -11,12 +11,21 @@ export interface TranscriptionResult {
 }
 export type TranscriptionCallback = (result: TranscriptionResult) => void;
 
+const GGML_BASE = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main';
+
 export const WHISPER_MODELS = [
-  { id: 'tiny.en', name: 'Whisper Tiny (English)', size: 75, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin', description: 'Fastest, English only, good for basic transcription' },
-  { id: 'tiny', name: 'Whisper Tiny (Multilingual)', size: 75, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin', description: 'Fast, supports multiple languages' },
-  { id: 'base.en', name: 'Whisper Base (English)', size: 142, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin', description: 'Better accuracy, English only' },
-  { id: 'base', name: 'Whisper Base (Multilingual)', size: 142, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin', description: 'Better accuracy, multiple languages' },
-  { id: 'small.en', name: 'Whisper Small (English)', size: 466, url: 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin', description: 'High accuracy, English only, needs more RAM' },
+  // ── English-only ──────────────────────────────────────────────────────────
+  { id: 'tiny.en',   name: 'Tiny',   size: 75,   lang: 'en',    url: `${GGML_BASE}/ggml-tiny.en.bin`,   description: 'Fastest, English only' },
+  { id: 'base.en',   name: 'Base',   size: 142,  lang: 'en',    url: `${GGML_BASE}/ggml-base.en.bin`,   description: 'Better accuracy, English only' },
+  { id: 'small.en',  name: 'Small',  size: 466,  lang: 'en',    url: `${GGML_BASE}/ggml-small.en.bin`,  description: 'High accuracy, English only' },
+  { id: 'medium.en', name: 'Medium', size: 1500, lang: 'en',    url: `${GGML_BASE}/ggml-medium.en.bin`, description: 'Near human-level, English only, ~2 GB RAM' },
+  // ── Multilingual ──────────────────────────────────────────────────────────
+  { id: 'tiny',           name: 'Tiny',             size: 75,   lang: 'multi', url: `${GGML_BASE}/ggml-tiny.bin`,           description: 'Fastest, 99 languages' },
+  { id: 'base',           name: 'Base',             size: 142,  lang: 'multi', url: `${GGML_BASE}/ggml-base.bin`,           description: 'Better accuracy, 99 languages' },
+  { id: 'small',          name: 'Small',            size: 466,  lang: 'multi', url: `${GGML_BASE}/ggml-small.bin`,          description: 'High accuracy, 99 languages' },
+  { id: 'medium',         name: 'Medium',           size: 1500, lang: 'multi', url: `${GGML_BASE}/ggml-medium.bin`,         description: 'Near human-level, 99 languages, ~2 GB RAM' },
+  { id: 'large-v3-turbo', name: 'Large v3 Turbo',  size: 809,  lang: 'multi', url: `${GGML_BASE}/ggml-large-v3-turbo.bin`, description: 'Fast + accurate, distilled large, 99 languages' },
+  { id: 'large-v3',       name: 'Large v3',         size: 1550, lang: 'multi', url: `${GGML_BASE}/ggml-large-v3.bin`,       description: 'Best quality, 99 languages, ~3 GB RAM' },
 ];
 
 class WhisperService {
@@ -62,6 +71,30 @@ class WhisperService {
     logger.log(`[Whisper] Downloaded to ${destPath}`);
     return destPath;
   }
+  async downloadFromUrl(url: string, modelId: string, onProgress?: (progress: number) => void): Promise<string> {
+    await this.ensureModelsDirExists();
+    const destPath = this.getModelPath(modelId);
+    if (await RNFS.exists(destPath)) return destPath;
+    logger.log(`[Whisper] Downloading from URL: ${url}`);
+    const download = RNFS.downloadFile({
+      fromUrl: url, toFile: destPath, progressDivider: 1,
+      progress: (res) => { onProgress?.(res.bytesWritten / res.contentLength); },
+    });
+    const result = await download.promise;
+    if (result.statusCode !== 200) {
+      await RNFS.unlink(destPath).catch(() => {});
+      throw new Error(`Download failed with status ${result.statusCode}`);
+    }
+    try {
+      await this.validateModelFile(destPath);
+    } catch (validationError) {
+      await RNFS.unlink(destPath).catch(() => {});
+      throw validationError;
+    }
+    logger.log(`[Whisper] Downloaded to ${destPath}`);
+    return destPath;
+  }
+
   async deleteModel(modelId: string): Promise<void> {
     const path = this.getModelPath(modelId);
     if (await RNFS.exists(path)) await RNFS.unlink(path);
diff --git a/src/stores/chatStore.ts b/src/stores/chatStore.ts
index 31b93335..de80080b 100644
--- a/src/stores/chatStore.ts
+++ b/src/stores/chatStore.ts
@@ -4,6 +4,7 @@ import AsyncStorage from '@react-native-async-storage/async-storage';
 import { Message, Conversation, GenerationMeta } from '../types';
 import { stripControlTokens, stripStreamingControlTokens } from '../utils/messageContent';
 import { generateId } from '../utils/generateId';
+import '../types/tts';
 
 function nextUpdatedAt(previousUpdatedAt?: string): string {
   const now = Date.now();
@@ -51,6 +52,10 @@ function extractChannelThinking(rawContent: string): { reasoningContent: string
   // Qwen channel format: <|channel|>analysis<|message|>[thinking]<|channel|>final<|message|>[response]
   const qwen = sliceThinkingBlock(rawContent, '<|channel|>analysis<|message|>', '<|channel|>final<|message|>');
   if (qwen) return qwen;
+  // <think>...</think> format (Qwen 3.5, DeepSeek, etc.)
+  const thinkTags = sliceThinkingBlock(rawContent, '<think>', '</think>');
+  if (thinkTags) return thinkTags;
+
   return { reasoningContent: undefined, responseContent: rawContent };
 }
 
@@ -86,6 +91,7 @@ interface ChatState {
   addMessage: (conversationId: string, message: Omit<Message, 'id' | 'timestamp'>) => Message;
   updateMessageContent: (conversationId: string, messageId: string, content: string) => void;
   updateMessageThinking: (conversationId: string, messageId: string, isThinking: boolean) => void;
+  updateMessageAudio: (conversationId: string, messageId: string, audio: { audioPath?: string; waveformData?: number[]; audioDurationSeconds?: number; isGeneratingAudio?: boolean; isAudioModeMessage?: boolean }) => void;
   deleteMessage: (conversationId: string, messageId: string) => void;
   deleteMessagesAfter: (conversationId: string, messageId: string) => void;
   startStreaming: (conversationId: string) => void;
@@ -198,6 +204,10 @@ export const useChatStore = create<ChatState>()(
         }));
       },
 
+      updateMessageAudio: (conversationId, messageId, audio) => {
+        set((state) => ({ conversations: mapConversation(state.conversations, conversationId, (conv) => updateMessageInConv(conv, messageId, (msg) => ({ ...msg, ...audio }))) }));
+      },
+
       deleteMessage: (conversationId, messageId) => {
         set((state) => ({
           conversations: mapConversation(state.conversations, conversationId, (conv) => ({
diff --git a/src/stores/index.ts b/src/stores/index.ts
index fd14cb48..92a1201f 100644
--- a/src/stores/index.ts
+++ b/src/stores/index.ts
@@ -3,4 +3,6 @@ export { useChatStore } from './chatStore';
 export { useProjectStore } from './projectStore';
 export { useAuthStore } from './authStore';
 export { useWhisperStore } from './whisperStore';
+export { useTTSStore } from './ttsStore';
+export type { TTSSettings, TTSState, InterfaceMode } from './ttsStore';
 export { useRemoteServerStore } from './remoteServerStore';
diff --git a/src/stores/ttsStore.ts b/src/stores/ttsStore.ts
new file mode 100644
index 00000000..9de06af2
--- /dev/null
+++ b/src/stores/ttsStore.ts
@@ -0,0 +1,371 @@
+import { create } from 'zustand';
+import { persist, createJSONStorage } from 'zustand/middleware';
+import AsyncStorage from '@react-native-async-storage/async-storage';
+import { ttsService } from '../services/ttsService';
+import { kokoroRef } from '../components/KokoroTTSManager';
+import { isExecutorchSupported } from '../constants/kokoroModels';
+import type { KokoroVoiceId } from '../constants/kokoroModels';
+import { DEFAULT_KOKORO_VOICE_ID } from '../constants/kokoroModels';
+import logger from '../utils/logger';
+
+export type InterfaceMode = 'chat' | 'audio';
+
+export interface TTSSettings {
+  /** 'chat' = text bubbles + play button per message; 'audio' = waveform bubbles */
+  interfaceMode: InterfaceMode;
+  enabled: boolean;
+  /** Chat Mode only — auto-speak AI responses after streaming */
+  autoPlay: boolean;
+  speed: number;
+  voiceId: string;
+  /** Kokoro voice used for Chat Mode speak (fast path) */
+  kokoroVoiceId: KokoroVoiceId;
+}
+
+export interface TTSState {
+  // Download
+  isBackboneDownloaded: boolean;
+  isVocoderDownloaded: boolean;
+  isDownloadingBackbone: boolean;
+  isDownloadingVocoder: boolean;
+  backboneDownloadProgress: number;
+  vocoderDownloadProgress: number;
+
+  // Model lifecycle
+  isModelLoading: boolean;
+  isModelLoaded: boolean;
+
+  // Playback
+  isSpeaking: boolean;
+  isPaused: boolean;
+  /** True while LLM inference is running to generate audio tokens (before audio plays). OuteTTS only — Kokoro streams so this is never set. */
+  isGeneratingAudio: boolean;
+  currentMessageId: string | null;
+
+  // Kokoro (fast TTS, Android 13+ / iOS 17+)
+  kokoroReady: boolean;
+  kokoroDownloadProgress: number;
+  /** The voice ID Kokoro is currently loaded with (lags behind settings.kokoroVoiceId during changes) */
+  kokoroActiveVoiceId: KokoroVoiceId;
+  /** True only while Kokoro is actively pushing audio chunks (first chunk received) */
+  isAudioPlaying: boolean;
+  /** RMS amplitude of the current audio chunk (0–1), updated per chunk for waveform sync */
+  currentAmplitude: number;
+  /** Elapsed playback seconds — accumulated per Kokoro chunk for progress display */
+  playbackElapsed: number;
+  /** Monotonic counter — increments each time a new play session starts */
+  playSessionId: number;
+
+  // Cache
+  audioCacheSizeMB: number;
+
+  // Settings (persisted)
+  settings: TTSSettings;
+
+  error: string | null;
+
+  // Actions
+  checkDownloadStatus: () => Promise<void>;
+  downloadModels: () => Promise<void>;
+  deleteModels: () => Promise<void>;
+  loadModels: () => Promise<void>;
+  unloadModels: () => Promise<void>;
+
+  // Chat Mode
+  speak: (text: string, messageId: string) => Promise<void>;
+  stop: () => void;
+  pause: () => void;
+  resume: () => void;
+
+  // Audio Mode
+  generateAndSave: (
+    text: string,
+    conversationId: string,
+    messageId: string,
+  ) => Promise<{ path: string; waveformData: number[]; durationSeconds: number }>;
+  playMessage: (messageId: string, filePath: string, startOffset?: number) => Promise<void>;
+  stopPlayback: () => void;
+
+  // Cache management
+  refreshCacheSize: () => Promise<void>;
+  clearAudioCache: () => Promise<void>;
+
+  setKokoroState: (ready: boolean, progress: number) => void;
+  setKokoroActiveVoiceId: (id: KokoroVoiceId) => void;
+  setAudioPlaying: (playing: boolean) => void;
+  setCurrentAmplitude: (amplitude: number) => void;
+  addPlaybackElapsed: (seconds: number) => void;
+  updateSettings: (patch: Partial<TTSSettings>) => void;
+  clearError: () => void;
+}
+
+export const useTTSStore = create<TTSState>()(
+  persist(
+    (set, get) => ({
+      isBackboneDownloaded: false,
+      isVocoderDownloaded: false,
+      isDownloadingBackbone: false,
+      isDownloadingVocoder: false,
+      backboneDownloadProgress: 0,
+      vocoderDownloadProgress: 0,
+      isModelLoading: false,
+      isModelLoaded: false,
+      isSpeaking: false,
+      isPaused: false,
+      isGeneratingAudio: false,
+      currentMessageId: null,
+      kokoroReady: false,
+      kokoroDownloadProgress: 0,
+      kokoroActiveVoiceId: DEFAULT_KOKORO_VOICE_ID,
+      isAudioPlaying: false,
+      currentAmplitude: 0,
+      playbackElapsed: 0,
+      playSessionId: 0,
+      audioCacheSizeMB: 0,
+      settings: {
+        interfaceMode: 'chat',
+        enabled: true,
+        autoPlay: false,
+        speed: 1.0,
+        voiceId: '0',
+        kokoroVoiceId: DEFAULT_KOKORO_VOICE_ID,
+      },
+      error: null,
+
+      checkDownloadStatus: async () => {
+        const [backbone, vocoder] = await Promise.all([
+          ttsService.isBackboneDownloaded(),
+          ttsService.isVocoderDownloaded(),
+        ]);
+        set({ isBackboneDownloaded: backbone, isVocoderDownloaded: vocoder });
+      },
+
+      downloadModels: async () => {
+        set({ error: null });
+        try {
+          set({ isDownloadingBackbone: true, backboneDownloadProgress: 0 });
+          await ttsService.downloadBackbone((p) => set({ backboneDownloadProgress: p }));
+          set({ isDownloadingBackbone: false, isBackboneDownloaded: true });
+
+          set({ isDownloadingVocoder: true, vocoderDownloadProgress: 0 });
+          await ttsService.downloadVocoder((p) => set({ vocoderDownloadProgress: p }));
+          set({ isDownloadingVocoder: false, isVocoderDownloaded: true });
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Download failed';
+          logger.error('[TTS Store] Download error:', msg);
+          set({ isDownloadingBackbone: false, isDownloadingVocoder: false, error: msg });
+        }
+      },
+
+      deleteModels: async () => {
+        await ttsService.deleteModels();
+        set({
+          isBackboneDownloaded: false,
+          isVocoderDownloaded: false,
+          isModelLoaded: false,
+        });
+      },
+
+      loadModels: async () => {
+        if (get().isModelLoaded || get().isModelLoading) {
+          return;
+        }
+        set({ isModelLoading: true, error: null });
+        try {
+          await ttsService.loadModels();
+          set({ isModelLoaded: true });
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Failed to load TTS models';
+          logger.error('[TTS Store] Load error:', msg);
+          set({ error: msg });
+        } finally {
+          set({ isModelLoading: false });
+        }
+      },
+
+      unloadModels: async () => {
+        await ttsService.unloadModels();
+        set({ isModelLoaded: false, isSpeaking: false, currentMessageId: null });
+      },
+
+      // ── Chat Mode ───────────────────────────────────────────────────────────
+
+      speak: async (text: string, messageId: string) => {
+        const { settings } = get();
+        logger.log('[TTS] speak() called, messageId=', messageId, 'enabled=', settings.enabled, 'isSpeaking=', get().isSpeaking, 'currentMessageId=', get().currentMessageId);
+        if (!settings.enabled) { logger.log('[TTS] speak() early return: not enabled'); return; }
+
+        // Tapping same message while speaking → stop
+        if (get().currentMessageId === messageId && get().isSpeaking) {
+          logger.log('[TTS] speak() toggling off (same message)');
+          get().stop();
+          return;
+        }
+
+        // ── Kokoro fast path (Android 13+ / iOS 17+, model ready) ────────────
+        if (get().kokoroReady && isExecutorchSupported()) {
+          logger.log('[TTS] speak() Kokoro path');
+          ttsService.stop();
+          kokoroRef.stop(true);
+          // Show loader immediately while we wait for executorch to become available
+          set({ isSpeaking: true, isPaused: false, isAudioPlaying: false, isGeneratingAudio: false, currentMessageId: messageId, playbackElapsed: 0, playSessionId: get().playSessionId + 1, error: null });
+          try {
+            kokoroRef.setKeepAlive(false);
+            // Retry loop — executorch may still be busy from a previous stream.
+            // Loader stays visible the whole time (isSpeaking=true, isAudioPlaying=false).
+            const MAX_RETRIES = 10;
+            for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
+              try {
+                logger.log('[TTS] speak() attempt', attempt + 1);
+                set({ isAudioPlaying: true });
+                await kokoroRef.speak(text, settings.speed);
+                logger.log('[TTS] speak() kokoroRef.speak resolved');
+                break;
+              } catch (err: any) {
+                if (err?.code === 104 && attempt < MAX_RETRIES - 1) {
+                  logger.log('[TTS] speak() executorch busy, retrying in 200ms');
+                  set({ isAudioPlaying: false });
+                  await new Promise<void>((r) => setTimeout(r, 200));
+                  continue;
+                }
+                throw err;
+              }
+            }
+          } catch (err) {
+            const msg = err instanceof Error ? err.message : 'Speech failed';
+            logger.error('[TTS Store] Kokoro speak error:', msg);
+            set({ error: msg });
+          } finally {
+            const stillOwns = get().currentMessageId === messageId;
+            logger.log('[TTS] speak() finally: currentMessageId=', get().currentMessageId, 'messageId=', messageId, 'stillOwns=', stillOwns);
+            // Only clear state if this speak call still owns playback
+            if (stillOwns) {
+              set({ isSpeaking: false, isPaused: false, isAudioPlaying: false, currentAmplitude: 0, playbackElapsed: 0, currentMessageId: null });
+            }
+          }
+          return;
+        }
+
+        // ── OuteTTS fallback (slow, Android <13 / Kokoro not loaded yet) ─────
+        if (!get().isModelLoaded) return;
+        kokoroRef.stop(true); // ensure Kokoro is silent
+        // Truncate to keep generation time reasonable (~300 chars ≈ 20-30s on device)
+        const truncated = text.length > 300 ? `${text.slice(0, 297)}...` : text;
+        set({ isSpeaking: true, isGeneratingAudio: true, currentMessageId: messageId, playSessionId: get().playSessionId + 1, error: null });
+        try {
+          await ttsService.speak(
+            truncated,
+            { speed: settings.speed, voiceId: settings.voiceId },
+            () => set({ isGeneratingAudio: false }),
+          );
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Speech failed';
+          logger.error('[TTS Store] OuteTTS speak error:', msg);
+          set({ error: msg });
+        } finally {
+          if (get().currentMessageId === messageId) {
+            set({ isSpeaking: false, isGeneratingAudio: false, currentMessageId: null });
+          }
+        }
+      },
+
+      stop: () => {
+        logger.log('[TTS Store] stop() called, isSpeaking:', get().isSpeaking);
+        kokoroRef.stop(true);
+        ttsService.stop();
+        set({ isSpeaking: false, isPaused: false, isAudioPlaying: false, currentAmplitude: 0, playbackElapsed: 0, isGeneratingAudio: false, currentMessageId: null });
+      },
+
+      pause: () => {
+        kokoroRef.pause();
+        set({ isPaused: true, isAudioPlaying: false, currentAmplitude: 0 });
+      },
+
+      resume: () => {
+        kokoroRef.resume();
+        set({ isPaused: false, isAudioPlaying: true });
+      },
+
+      // ── Audio Mode ──────────────────────────────────────────────────────────
+
+      generateAndSave: async (text, conversationId, messageId) => {
+        const { settings } = get();
+        const { path, audio } = await ttsService.generateAndSave(
+          text,
+          { conversationId, messageId },
+          { voiceId: settings.voiceId },
+        );
+        await get().refreshCacheSize();
+        return { path, waveformData: audio.waveformData, durationSeconds: audio.durationSeconds };
+      },
+
+      playMessage: async (messageId, filePath, startOffset = 0) => {
+        const { settings } = get();
+        logger.log('[TTS] playMessage() called, messageId=', messageId, 'isSpeaking=', get().isSpeaking);
+        if (get().currentMessageId === messageId && get().isSpeaking) {
+          logger.log('[TTS] playMessage() toggling off (same message)');
+          get().stopPlayback();
+          return;
+        }
+        // Claim playback ownership FIRST so in-flight speak() finally blocks see the new messageId
+        set({ isSpeaking: true, isAudioPlaying: false, currentMessageId: messageId, playbackElapsed: 0, playSessionId: get().playSessionId + 1, error: null });
+        kokoroRef.stop(true);
+        ttsService.stop();
+        // Signal audio is playing so the seekbar timer starts
+        set({ isAudioPlaying: true });
+        try {
+          await ttsService.playFromFile(filePath, settings.speed, startOffset);
+        } catch (err) {
+          const msg = err instanceof Error ? err.message : 'Playback failed';
+          logger.error('[TTS Store] Playback error:', msg);
+          if (get().currentMessageId === messageId) { set({ error: msg }); }
+        } finally {
+          if (get().currentMessageId === messageId) {
+            set({ isSpeaking: false, isAudioPlaying: false, currentMessageId: null });
+          }
+        }
+      },
+
+      stopPlayback: () => {
+        kokoroRef.stop(true);
+        ttsService.stop();
+        set({ isSpeaking: false, isAudioPlaying: false, currentMessageId: null });
+      },
+
+      // ── Cache ───────────────────────────────────────────────────────────────
+
+      refreshCacheSize: async () => {
+        const mb = await ttsService.getAudioCacheSizeMB();
+        set({ audioCacheSizeMB: mb });
+      },
+
+      clearAudioCache: async () => {
+        await ttsService.clearAudioCache();
+        set({ audioCacheSizeMB: 0 });
+      },
+
+      setKokoroState: (ready, progress) => {
+        set({ kokoroReady: ready, kokoroDownloadProgress: progress });
+      },
+      setKokoroActiveVoiceId: (id) => {
+        set({ kokoroActiveVoiceId: id });
+      },
+
+      setAudioPlaying: (playing) => set({ isAudioPlaying: playing }),
+      setCurrentAmplitude: (amplitude) => set({ currentAmplitude: amplitude }),
+      addPlaybackElapsed: (seconds) => set((s) => ({ playbackElapsed: s.playbackElapsed + seconds })),
+
+      updateSettings: (patch) => {
+        set((state) => ({ settings: { ...state.settings, ...patch } }));
+      },
+
+      clearError: () => set({ error: null }),
+    }),
+    {
+      name: 'tts-store',
+      storage: createJSONStorage(() => AsyncStorage),
+      // Only persist settings — runtime state is transient
+      partialize: (state) => ({ settings: state.settings }),
+    },
+  ),
+);
diff --git a/src/stores/whisperStore.ts b/src/stores/whisperStore.ts
index 6c3d811b..6b3f9739 100644
--- a/src/stores/whisperStore.ts
+++ b/src/stores/whisperStore.ts
@@ -14,6 +14,7 @@ interface WhisperState {
 
   // Actions
   downloadModel: (modelId: string) => Promise<void>;
+  downloadFromUrl: (url: string, modelId: string) => Promise<void>;
   loadModel: () => Promise<void>;
   unloadModel: () => Promise<void>;
   deleteModel: () => Promise<void>;
@@ -55,6 +56,23 @@ export const useWhisperStore = create<WhisperState>()(
         }
       },
 
+      downloadFromUrl: async (url: string, modelId: string) => {
+        set({ isDownloading: true, downloadProgress: 0, error: null });
+        try {
+          await whisperService.downloadFromUrl(url, modelId, (progress) => {
+            set({ downloadProgress: progress });
+          });
+          set({ downloadedModelId: modelId, isDownloading: false, downloadProgress: 1 });
+          await get().loadModel();
+        } catch (error) {
+          set({
+            isDownloading: false,
+            downloadProgress: 0,
+            error: error instanceof Error ? error.message : 'Download failed',
+          });
+        }
+      },
+
       loadModel: async () => {
         const { downloadedModelId, isModelLoading } = get();
         if (!downloadedModelId) {
diff --git a/src/types/index.ts b/src/types/index.ts
index dc72044a..db78c349 100644
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -125,16 +125,16 @@ export interface ModelRecommendation {
 // Media attachment types
 export interface MediaAttachment {
   id: string;
-  type: 'image' | 'document';
+  type: 'image' | 'document' | 'audio';
   uri: string;
   mimeType?: string;
   width?: number;
   height?: number;
   fileName?: string;
-  /** For documents: the extracted text content */
-  textContent?: string;
-  /** For documents: file size in bytes */
-  fileSize?: number;
+  textContent?: string; // documents: extracted text
+  fileSize?: number; // documents: file size in bytes
+  audioFormat?: 'wav' | 'mp3'; // audio attachments: format for model input
+  audioDurationSeconds?: number; // audio attachments: recorded duration in seconds
 }
 
 // Generation metadata - details about how a message was generated
@@ -187,6 +187,8 @@ export interface Message {
   toolCalls?: Array<{ id?: string; name: string; arguments: string }>;
   /** Tool name (for tool result messages) */
   toolName?: string;
+  /** True when this assistant message was generated while interfaceMode === 'audio' */
+  isAudioModeMessage?: boolean;
 }
 
 export interface Conversation {
diff --git a/src/types/tts.ts b/src/types/tts.ts
new file mode 100644
index 00000000..e9fed2a1
--- /dev/null
+++ b/src/types/tts.ts
@@ -0,0 +1,17 @@
+// Extends the Message interface with Audio Mode fields.
+// Kept separate to avoid exceeding the line limit in types/index.ts.
+
+declare module './index' {
+  interface Message {
+    /** Audio Mode: path to PCM file on disk */
+    audioPath?: string;
+    /** Audio Mode: 200-point amplitude envelope for waveform bar */
+    waveformData?: number[];
+    /** Audio Mode: total audio duration in seconds */
+    audioDurationSeconds?: number;
+    /** True while TTS is generating audio for this message */
+    isGeneratingAudio?: boolean;
+  }
+}
+
+export {};
diff --git a/src/utils/messageContent.ts b/src/utils/messageContent.ts
index 59dc47c4..a80cc8ea 100644
--- a/src/utils/messageContent.ts
+++ b/src/utils/messageContent.ts
@@ -33,7 +33,26 @@ export function stripControlTokens(content: string): string {
   result = result.replace(CHANNEL_FINAL_START, '');
   result = result.replace(GEMMA4_THINK_OPEN, '');
   result = result.replace(GEMMA4_THINK_CLOSE, '');
-  return result;
+
+  // ── Generic XML/structured block stripping ──────────────────────────────
+  // Catches tool calls from any provider (minimax, anthropic, gemma, generic)
+  // by matching any XML-like block whose tag name contains tool/invoke/function/parameter keywords.
+  // This is intentionally broad — these blocks never contain natural language the user should see.
+  result = result.replace(/<\/?(?:[\w:-]*(?:tool_call|invoke|function_call|parameters?)[\w:-]*)(?:\s[^>]*)?>[\s\S]*?(?=<\/?(?:[\w:-]*(?:tool_call|invoke|function_call|parameters?)[\w:-]*)(?:\s[^>]*)?>|$)/gi, '');
+  // Safety net: strip any remaining paired XML blocks with tool/invoke in the tag name
+  result = result.replace(/<([\w:-]*(?:tool_call|invoke|function_call)[\w:-]*)[\s\S]*?<\/\1>/gi, '');
+  // Strip bare lines that are just a namespace:tag_name pattern (e.g. "minimax:tool_call")
+  result = result.replace(/^[\w]+:[\w_]+\s*$/gm, '');
+
+  // ── Thinking blocks ─────────────────────────────────────────────────────
+  // Complete <think>...</think> blocks (Qwen 3.5, DeepSeek, etc.)
+  result = result.replace(/<think>[\s\S]*?<\/think>/gi, '');
+  // Orphaned thinking: streaming parser may consume <think> but leave content + </think>
+  result = result.replace(/^[\s\S]*?<\/think>\s*/i, '');
+  // Bare <think> or </think> tags
+  result = result.replace(/<\/?think>/gi, '');
+
+  return result.trim();
 }
 
 /**
@@ -43,4 +62,39 @@ export function stripControlTokens(content: string): string {
  */
 export function stripStreamingControlTokens(content: string): string {
   return CONTROL_TOKEN_PATTERNS.reduce((acc, pattern) => acc.replace(pattern, ''), content);
+}
+
+/**
+ * Strip markdown formatting for TTS speech. Preserves the readable text
+ * but removes syntax that Kokoro would read aloud as literal characters.
+ */
+export function stripMarkdownForSpeech(content: string): string {
+  let result = content;
+  // Headers: ### Title → Title
+  result = result.replace(/^#{1,6}\s+/gm, '');
+  // Bold/italic: **text** or *text* or __text__ or _text_ → text
+  result = result.replace(/\*{1,3}([^*]+)\*{1,3}/g, '$1');
+  result = result.replace(/_{1,3}([^_]+)_{1,3}/g, '$1');
+  // Links: [text](url) → text
+  result = result.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
+  // Images: ![alt](url) → alt
+  result = result.replace(/!\[([^\]]*)\]\([^)]+\)/g, '$1');
+  // Inline code: `code` → code
+  result = result.replace(/`([^`]+)`/g, '$1');
+  // Code blocks: ```...``` → (removed)
+  result = result.replace(/```[\s\S]*?```/g, '');
+  // Tables: | cell | cell | → cell, cell (keep cell content, drop pipes/dashes)
+  result = result.replace(/^\|[-:|\s]+\|$/gm, ''); // separator rows
+  result = result.replace(/\|/g, ','); // pipes → commas
+  // Bullet markers: * item or - item → item
+  result = result.replace(/^[\s]*[*\-+]\s+/gm, '');
+  // Numbered lists: 1. item → item
+  result = result.replace(/^[\s]*\d+\.\s+/gm, '');
+  // Horizontal rules
+  result = result.replace(/^[-*_]{3,}$/gm, '');
+  // Blockquotes: > text → text
+  result = result.replace(/^>\s+/gm, '');
+  // Clean up excessive whitespace/newlines
+  result = result.replace(/\n{3,}/g, '\n\n');
+  return result.trim();
 }
\ No newline at end of file