From 2e84a449a7f5b6e213996989a492201f937e7169 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Mon, 2 Feb 2026 20:12:19 +0100
Subject: [PATCH 1/2] Fix transcription for longer audio

---
 apps/speech/screens/SpeechToTextScreen.tsx    | 22 +++++++++++++++----
 .../models/speech_to_text/asr/ASR.h           |  4 +++-
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx
index 1e4525986..da0374da0 100644
--- a/apps/speech/screens/SpeechToTextScreen.tsx
+++ b/apps/speech/screens/SpeechToTextScreen.tsx
@@ -50,16 +50,30 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
     AudioManager.requestRecordingPermissions();
   }, []);
 
+  async function getAudioFile(sourceUri: string) {
+    const destination = FileSystem.cacheDirectory + 'audio_file.wav';
+
+    if (sourceUri.startsWith('http')) {
+      // Case A: Remote URL -> Download it
+      const { uri } = await FileSystem.downloadAsync(sourceUri, destination);
+      return uri;
+    } else {
+      // Case B: Local URI -> Copy it
+      await FileSystem.copyAsync({
+        from: sourceUri,
+        to: destination,
+      });
+      return destination;
+    }
+  }
+
   const handleTranscribeFromURL = async () => {
     if (!audioURL.trim()) {
       console.warn('Please provide a valid audio file URL');
       return;
     }
 
-    const { uri } = await FileSystem.downloadAsync(
-      audioURL,
-      FileSystem.cacheDirectory + 'audio_file'
-    );
+    const uri = await getAudioFile(audioURL);
 
     const audioContext = new AudioContext({ sampleRate: 16000 });
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h
index a0ea7e181..41d1578b4 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/speech_to_text/asr/ASR.h
@@ -34,7 +34,9 @@ class ASR {
   // The maximum number of tokens the decoder can generate per chunk
   constexpr static int32_t kMaxDecodeLength = 128;
   // Maximum duration of each audio chunk to process (in seconds)
-  constexpr static int32_t kChunkSize = 30;
+  // It is intentionally set to 29 since otherwise only the last chunk would be
+  // correctly transcribe due to the model's positional encoding limit
+  constexpr static int32_t kChunkSize = 29;
   // Sampling rate expected by Whisper and the model's audio pipeline (16 kHz)
   constexpr static int32_t kSamplingRate = 16000;
   // Minimum allowed chunk length before processing (in audio samples)

From c20e7f2840c65348e724279bb778432f20eceff0 Mon Sep 17 00:00:00 2001
From: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com>
Date: Mon, 2 Feb 2026 20:14:44 +0100
Subject: [PATCH 2/2] Update apps/speech/screens/SpeechToTextScreen.tsx

---
 apps/speech/screens/SpeechToTextScreen.tsx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/apps/speech/screens/SpeechToTextScreen.tsx b/apps/speech/screens/SpeechToTextScreen.tsx
index da0374da0..da7ed0f7e 100644
--- a/apps/speech/screens/SpeechToTextScreen.tsx
+++ b/apps/speech/screens/SpeechToTextScreen.tsx
@@ -54,11 +54,9 @@ export const SpeechToTextScreen = ({ onBack }: { onBack: () => void }) => {
     const destination = FileSystem.cacheDirectory + 'audio_file.wav';
 
     if (sourceUri.startsWith('http')) {
-      // Case A: Remote URL -> Download it
       const { uri } = await FileSystem.downloadAsync(sourceUri, destination);
       return uri;
     } else {
-      // Case B: Local URI -> Copy it
       await FileSystem.copyAsync({
         from: sourceUri,
         to: destination,