From 05ac22429a3c21ba94b523ff280d45069d2351e3 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sat, 28 Mar 2026 14:19:11 -0400
Subject: [PATCH 1/5] Simplify folderName logic by stripping -coreml suffix by
 default

Remove redundant special cases in folderName property. Now only keeps special
cases for nested directory structures (EOU and Nemotron variants) and uses
a simple default rule: strip "-coreml" suffix from the name.

This eliminates the inconsistency raised in #442 by applying a consistent
pattern across all models.

Before:
- Had 10+ special cases explicitly returning shortened names
- parakeetTdtCtc110m was inconsistent with other Parakeet models

After:
- Only 5 special cases for nested directories (parakeet-eou-streaming/*, nemotron-streaming/*)
- Default strips -coreml suffix for all other models
- All Parakeet models now follow the same pattern

Fixes #442
---
 Sources/FluidAudio/ModelNames.swift                | 14 +-------------
 .../ASR/Parakeet/ModelNamesTests.swift             |  2 +-
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/Sources/FluidAudio/ModelNames.swift b/Sources/FluidAudio/ModelNames.swift
index 1d4d7e9fc..d5efec205 100644
--- a/Sources/FluidAudio/ModelNames.swift
+++ b/Sources/FluidAudio/ModelNames.swift
@@ -115,8 +115,6 @@ public enum Repo: String, CaseIterable {
     /// Local folder name used for caching
     public var folderName: String {
         switch self {
-        case .kokoro:
-            return "kokoro"
         case .parakeetEou160:
             return "parakeet-eou-streaming/160ms"
         case .parakeetEou320:
@@ -127,18 +125,8 @@ public enum Repo: String, CaseIterable {
             return "nemotron-streaming/1120ms"
         case .nemotronStreaming560:
             return "nemotron-streaming/560ms"
-        case .sortformer:
-            return "sortformer"
-        case .lseend:
-            return "ls-eend"
-        case .pocketTts:
-            return "pocket-tts"
-        case .multilingualG2p:
-            return "charsiu-g2p-byt5"
-        case .parakeetTdtCtc110m:
-            return "parakeet-tdt-ctc-110m"
         default:
-            return name
+            return name.replacingOccurrences(of: "-coreml", with: "")
         }
     }
 }
diff --git a/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift b/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift
index 3e3607394..6048ff95c 100644
--- a/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift
+++ b/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift
@@ -125,7 +125,7 @@ final class ModelNamesTests: XCTestCase {
         // Verify name (repo slug with -coreml suffix)
         XCTAssertEqual(repo.name, "parakeet-tdt-ctc-110m-coreml")
 
-        // Verify folder name (simplified local folder name)
+        // Verify folder name (simplified - strips -coreml suffix by default)
         XCTAssertEqual(repo.folderName, "parakeet-tdt-ctc-110m")
 
         // Should have no subpath (not a variant repo)

From c3aba6a96c7fa63fbb1fee75e8b78de312525f99 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sat, 28 Mar 2026 14:24:11 -0400
Subject: [PATCH 2/5] Update kokoro folderName test expectation to match
 simplified logic

---
 Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift b/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift
index 8a059833c..1f8eb3274 100644
--- a/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift
+++ b/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift
@@ -111,7 +111,7 @@ final class MultilingualG2PTests: XCTestCase {
 
     func testRepoMultilingualG2P() {
         // Multilingual G2P models are bundled inside the kokoro repo
-        XCTAssertEqual(Repo.kokoro.folderName, "kokoro")
+        XCTAssertEqual(Repo.kokoro.folderName, "kokoro-82m")
         XCTAssertEqual(Repo.kokoro.remotePath, "FluidInference/kokoro-82m-coreml")
     }
 }

From 8b79517cde879e4f9c23c910895461343f661971 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sat, 28 Mar 2026 14:29:29 -0400
Subject: [PATCH 3/5] Keep kokoro and sortformer special cases to avoid
 breaking changes

Add back special cases for kokoro and sortformer to preserve existing
folder names and avoid forcing users to re-download models. Still removes
redundant special cases (lseend, pocketTts, multilingualG2p, parakeetTdtCtc110m)
that can safely use the default -coreml stripping logic.

Result: 7 special cases total (kokoro, sortformer, + 5 nested directories)
vs 11 special cases before. Still achieves consistency for Parakeet models
without breaking existing cached model locations.
---
 Sources/FluidAudio/ModelNames.swift                  | 4 ++++
 Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Sources/FluidAudio/ModelNames.swift b/Sources/FluidAudio/ModelNames.swift
index d5efec205..afc7d6af3 100644
--- a/Sources/FluidAudio/ModelNames.swift
+++ b/Sources/FluidAudio/ModelNames.swift
@@ -115,6 +115,8 @@ public enum Repo: String, CaseIterable {
     /// Local folder name used for caching
     public var folderName: String {
         switch self {
+        case .kokoro:
+            return "kokoro"
         case .parakeetEou160:
             return "parakeet-eou-streaming/160ms"
         case .parakeetEou320:
@@ -125,6 +127,8 @@ public enum Repo: String, CaseIterable {
             return "nemotron-streaming/1120ms"
         case .nemotronStreaming560:
             return "nemotron-streaming/560ms"
+        case .sortformer:
+            return "sortformer"
         default:
             return name.replacingOccurrences(of: "-coreml", with: "")
         }
diff --git a/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift b/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift
index 1f8eb3274..8a059833c 100644
--- a/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift
+++ b/Tests/FluidAudioTests/TTS/MultilingualG2PTests.swift
@@ -111,7 +111,7 @@ final class MultilingualG2PTests: XCTestCase {
 
     func testRepoMultilingualG2P() {
         // Multilingual G2P models are bundled inside the kokoro repo
-        XCTAssertEqual(Repo.kokoro.folderName, "kokoro-82m")
+        XCTAssertEqual(Repo.kokoro.folderName, "kokoro")
         XCTAssertEqual(Repo.kokoro.remotePath, "FluidInference/kokoro-82m-coreml")
     }
 }

From 31066deeec3a310e5ed47678376e8274ea434a46 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sat, 28 Mar 2026 14:37:46 -0400
Subject: [PATCH 4/5] Add RTFx tracking to qwen3-asr-benchmark workflow

Extract and display medianRTFx and overallRTFx metrics in the PR comment.
Previously the workflow was running benchmarks but not displaying the
performance metrics.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/qwen3-asr-benchmark.yml | 24 +++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/.github/workflows/qwen3-asr-benchmark.yml b/.github/workflows/qwen3-asr-benchmark.yml
index 074bede82..959f77ffa 100644
--- a/.github/workflows/qwen3-asr-benchmark.yml
+++ b/.github/workflows/qwen3-asr-benchmark.yml
@@ -69,6 +69,21 @@ jobs:
             echo "SMOKE_STATUS=FAILED" >> $GITHUB_OUTPUT
           fi
 
+          # Extract RTFx metrics if results file exists
+          if [ -f qwen3_results_int8.json ]; then
+            MEDIAN_RTFx=$(jq -r '.summary.medianRTFx // "N/A"' qwen3_results_int8.json 2>/dev/null)
+            OVERALL_RTFx=$(jq -r '.summary.overallRTFx // "N/A"' qwen3_results_int8.json 2>/dev/null)
+
+            [ "$MEDIAN_RTFx" != "null" ] && [ "$MEDIAN_RTFx" != "N/A" ] && MEDIAN_RTFx=$(printf "%.2f" "$MEDIAN_RTFx") || MEDIAN_RTFx="N/A"
+            [ "$OVERALL_RTFx" != "null" ] && [ "$OVERALL_RTFx" != "N/A" ] && OVERALL_RTFx=$(printf "%.2f" "$OVERALL_RTFx") || OVERALL_RTFx="N/A"
+
+            echo "MEDIAN_RTFx=$MEDIAN_RTFx" >> $GITHUB_OUTPUT
+            echo "OVERALL_RTFx=$OVERALL_RTFx" >> $GITHUB_OUTPUT
+          else
+            echo "MEDIAN_RTFx=N/A" >> $GITHUB_OUTPUT
+            echo "OVERALL_RTFx=N/A" >> $GITHUB_OUTPUT
+          fi
+
           EXECUTION_TIME=$(( ($(date +%s) - BENCHMARK_START) / 60 ))m$(( ($(date +%s) - BENCHMARK_START) % 60 ))s
           echo "EXECUTION_TIME=$EXECUTION_TIME" >> $GITHUB_OUTPUT
 
@@ -81,6 +96,9 @@ jobs:
             const status = '${{ steps.smoketest.outputs.SMOKE_STATUS }}';
             const emoji = status === 'PASSED' ? '✅' : '❌';
 
+            const medianRTFx = '${{ steps.smoketest.outputs.MEDIAN_RTFx }}';
+            const overallRTFx = '${{ steps.smoketest.outputs.OVERALL_RTFx }}';
+
             const body = `## Qwen3-ASR int8 Smoke Test ${emoji}
 
             | Check | Result |
@@ -91,6 +109,12 @@ jobs:
             | Transcription pipeline | ${emoji} |
             | Decoder size | 571 MB (vs 1.1 GB f32) |
 
+            ### Performance Metrics
+            | Metric | CI Value | Expected on Apple Silicon |
+            |--------|----------|--------------------------|
+            | Median RTFx | ${medianRTFx}x | ~2.5x |
+            | Overall RTFx | ${overallRTFx}x | ~2.5x |
+
             <sub>Runtime: ${{ steps.smoketest.outputs.EXECUTION_TIME }}</sub>
 
             <sub>**Note:** CI VM lacks physical GPU — CoreML MLState (macOS 15) KV cache produces degraded results on virtualized runners. On Apple Silicon: ~1.3% WER / 2.5x RTFx.</sub>

From 8ea07ff5bb26a26c4e5fe56dd418cf5bdcb79b02 Mon Sep 17 00:00:00 2001
From: Alex-Wengg <hanweng9@gmail.com>
Date: Sat, 28 Mar 2026 14:42:18 -0400
Subject: [PATCH 5/5] Fail benchmark workflows if RTFx is 0

Add validation to all benchmark workflows to fail with exit 1 if RTFx
metrics are 0 or N/A, indicating a silent benchmark failure.

Changes:
- qwen3-asr-benchmark.yml: Validate medianRTFx and overallRTFx
- asr-benchmark.yml: Validate all 6 RTFx metrics (v2/v3, clean/other, streaming)
- diarizer-benchmark.yml: Validate RTFx
- parakeet-eou-benchmark.yml: Validate RTFx
- sortformer-benchmark.yml: Validate RTFx
- vad-benchmark.yml: Validate MUSAN and VOiCES RTFx

If RTFx is 0, it means either:
1. Benchmark didn't run properly
2. Audio duration was 0
3. Processing failed silently

Better to fail fast than report misleading metrics.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 .github/workflows/asr-benchmark.yml          | 39 +++++++++++++++++++-
 .github/workflows/diarizer-benchmark.yml     |  7 ++++
 .github/workflows/parakeet-eou-benchmark.yml |  7 ++++
 .github/workflows/qwen3-asr-benchmark.yml    | 10 +++++
 .github/workflows/sortformer-benchmark.yml   |  7 ++++
 .github/workflows/vad-benchmark.yml          | 26 +++++++++++++
 6 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/asr-benchmark.yml b/.github/workflows/asr-benchmark.yml
index 302577ffb..6702ee1b3 100644
--- a/.github/workflows/asr-benchmark.yml
+++ b/.github/workflows/asr-benchmark.yml
@@ -198,9 +198,38 @@ jobs:
           echo "EXECUTION_TIME=$EXECUTION_TIME" >> $GITHUB_OUTPUT
           echo "FILES_COUNT=$MAX_FILES" >> $GITHUB_OUTPUT
 
+          # Validate RTFx values - 0 indicates benchmark failure
+          if [ "$CLEAN_RTFx" = "0.00" ] || [ "$CLEAN_RTFx" = "N/A" ]; then
+            echo "⚠️ test-clean RTFx is 0 or N/A - benchmark may have failed"
+            CLEAN_RTFX_FAILED=1
+          fi
+          if [ "$CLEAN_V2_RTFx" = "0.00" ] || [ "$CLEAN_V2_RTFx" = "N/A" ]; then
+            echo "⚠️ test-clean (v2) RTFx is 0 or N/A - benchmark may have failed"
+            CLEAN_V2_RTFX_FAILED=1
+          fi
+          if [ "$OTHER_RTFx" = "0.00" ] || [ "$OTHER_RTFx" = "N/A" ]; then
+            echo "⚠️ test-other RTFx is 0 or N/A - benchmark may have failed"
+            OTHER_RTFX_FAILED=1
+          fi
+          if [ "$OTHER_V2_RTFx" = "0.00" ] || [ "$OTHER_V2_RTFx" = "N/A" ]; then
+            echo "⚠️ test-other (v2) RTFx is 0 or N/A - benchmark may have failed"
+            OTHER_V2_RTFX_FAILED=1
+          fi
+          if [ "$STREAMING_RTFx" = "0.00" ] || [ "$STREAMING_RTFx" = "N/A" ]; then
+            echo "⚠️ streaming RTFx is 0 or N/A - benchmark may have failed"
+            STREAMING_RTFX_FAILED=1
+          fi
+          if [ "$STREAMING_V2_RTFx" = "0.00" ] || [ "$STREAMING_V2_RTFx" = "N/A" ]; then
+            echo "⚠️ streaming (v2) RTFx is 0 or N/A - benchmark may have failed"
+            STREAMING_V2_RTFX_FAILED=1
+          fi
+
           # Report failures summary
           if [ ! -z "$CLEAN_FAILED" ] || [ ! -z "$OTHER_FAILED" ] || [ ! -z "$STREAMING_FAILED" ] || \
-             [ ! -z "$CLEAN_V2_FAILED" ] || [ ! -z "$OTHER_V2_FAILED" ] || [ ! -z "$STREAMING_V2_FAILED" ]; then
+             [ ! -z "$CLEAN_V2_FAILED" ] || [ ! -z "$OTHER_V2_FAILED" ] || [ ! -z "$STREAMING_V2_FAILED" ] || \
+             [ ! -z "$CLEAN_RTFX_FAILED" ] || [ ! -z "$CLEAN_V2_RTFX_FAILED" ] || \
+             [ ! -z "$OTHER_RTFX_FAILED" ] || [ ! -z "$OTHER_V2_RTFX_FAILED" ] || \
+             [ ! -z "$STREAMING_RTFX_FAILED" ] || [ ! -z "$STREAMING_V2_RTFX_FAILED" ]; then
             echo "BENCHMARK_STATUS=PARTIAL_FAILURE" >> $GITHUB_OUTPUT
             echo "⚠️ Some benchmarks failed:"
             [ ! -z "$CLEAN_FAILED" ] && echo "  - test-clean benchmark failed"
@@ -209,7 +238,13 @@ jobs:
             [ ! -z "$CLEAN_V2_FAILED" ] && echo "  - test-clean (v2) benchmark failed"
             [ ! -z "$OTHER_V2_FAILED" ] && echo "  - test-other (v2) benchmark failed"
             [ ! -z "$STREAMING_V2_FAILED" ] && echo "  - streaming (v2) benchmark failed"
-            # Don't exit with error to allow PR comment to be posted
+            [ ! -z "$CLEAN_RTFX_FAILED" ] && echo "  - test-clean RTFx is 0"
+            [ ! -z "$CLEAN_V2_RTFX_FAILED" ] && echo "  - test-clean (v2) RTFx is 0"
+            [ ! -z "$OTHER_RTFX_FAILED" ] && echo "  - test-other RTFx is 0"
+            [ ! -z "$OTHER_V2_RTFX_FAILED" ] && echo "  - test-other (v2) RTFx is 0"
+            [ ! -z "$STREAMING_RTFX_FAILED" ] && echo "  - streaming RTFx is 0"
+            [ ! -z "$STREAMING_V2_RTFX_FAILED" ] && echo "  - streaming (v2) RTFx is 0"
+            exit 1
           else
             echo "BENCHMARK_STATUS=SUCCESS" >> $GITHUB_OUTPUT
             echo "✅ All benchmarks completed successfully"
diff --git a/.github/workflows/diarizer-benchmark.yml b/.github/workflows/diarizer-benchmark.yml
index 43251f671..0775ad844 100644
--- a/.github/workflows/diarizer-benchmark.yml
+++ b/.github/workflows/diarizer-benchmark.yml
@@ -115,6 +115,13 @@ jobs:
           echo "CLUSTERING_TIME=${CLUSTERING_TIME}" >> $GITHUB_OUTPUT
           echo "INFERENCE_TIME=${INFERENCE_TIME}" >> $GITHUB_OUTPUT
 
+          # Validate RTFx - 0 indicates benchmark failure
+          if [ "$RTF" = "0" ] || [ -z "$RTF" ]; then
+            echo "❌ CRITICAL: RTFx is 0 or empty - benchmark failed"
+            echo "RTFx value: $RTF"
+            exit 1
+          fi
+
       - name: Comment PR with Benchmark Results
         if: always()
         uses: actions/github-script@v7
diff --git a/.github/workflows/parakeet-eou-benchmark.yml b/.github/workflows/parakeet-eou-benchmark.yml
index bef59e306..62f45b556 100644
--- a/.github/workflows/parakeet-eou-benchmark.yml
+++ b/.github/workflows/parakeet-eou-benchmark.yml
@@ -104,6 +104,13 @@ jobs:
           echo "MAX_FILES=$MAX_FILES" >> $GITHUB_OUTPUT
           echo "BENCHMARK_STATUS=$BENCHMARK_STATUS" >> $GITHUB_OUTPUT
 
+          # Validate RTFx - 0 or N/A indicates benchmark failure
+          if [ "$RTFx" = "0.00" ] || [ "$RTFx" = "N/A" ]; then
+            echo "❌ CRITICAL: RTFx is 0 or N/A - benchmark failed"
+            echo "RTFx value: $RTFx"
+            exit 1
+          fi
+
       - name: Comment PR
         if: github.event_name == 'pull_request'
         continue-on-error: true
diff --git a/.github/workflows/qwen3-asr-benchmark.yml b/.github/workflows/qwen3-asr-benchmark.yml
index 959f77ffa..ef62bd7a2 100644
--- a/.github/workflows/qwen3-asr-benchmark.yml
+++ b/.github/workflows/qwen3-asr-benchmark.yml
@@ -79,9 +79,19 @@ jobs:
 
             echo "MEDIAN_RTFx=$MEDIAN_RTFx" >> $GITHUB_OUTPUT
             echo "OVERALL_RTFx=$OVERALL_RTFx" >> $GITHUB_OUTPUT
+
+            # Fail if RTFx is 0 or N/A - indicates benchmark failure
+            if [ "$MEDIAN_RTFx" = "N/A" ] || [ "$MEDIAN_RTFx" = "0.00" ] || [ "$OVERALL_RTFx" = "N/A" ] || [ "$OVERALL_RTFx" = "0.00" ]; then
+              echo "❌ CRITICAL: RTFx is 0 or N/A - benchmark failed to produce valid results"
+              echo "Median RTFx: $MEDIAN_RTFx"
+              echo "Overall RTFx: $OVERALL_RTFx"
+              exit 1
+            fi
           else
+            echo "❌ CRITICAL: Results file not found - benchmark failed"
             echo "MEDIAN_RTFx=N/A" >> $GITHUB_OUTPUT
             echo "OVERALL_RTFx=N/A" >> $GITHUB_OUTPUT
+            exit 1
           fi
 
           EXECUTION_TIME=$(( ($(date +%s) - BENCHMARK_START) / 60 ))m$(( ($(date +%s) - BENCHMARK_START) % 60 ))s
diff --git a/.github/workflows/sortformer-benchmark.yml b/.github/workflows/sortformer-benchmark.yml
index 2f9edd701..a3e04d662 100644
--- a/.github/workflows/sortformer-benchmark.yml
+++ b/.github/workflows/sortformer-benchmark.yml
@@ -115,6 +115,13 @@ jobs:
           echo "DETECTED=${DETECTED}" >> $GITHUB_OUTPUT
           echo "GROUND_TRUTH=${GROUND_TRUTH}" >> $GITHUB_OUTPUT
 
+          # Validate RTFx - 0 indicates benchmark failure
+          if [ "$RTF" = "0" ] || [ -z "$RTF" ]; then
+            echo "❌ CRITICAL: RTFx is 0 or empty - benchmark failed"
+            echo "RTFx value: $RTF"
+            exit 1
+          fi
+
       - name: Comment PR with Benchmark Results
         if: always()
         uses: actions/github-script@v7
diff --git a/.github/workflows/vad-benchmark.yml b/.github/workflows/vad-benchmark.yml
index 3a75a0e60..22806c1fc 100644
--- a/.github/workflows/vad-benchmark.yml
+++ b/.github/workflows/vad-benchmark.yml
@@ -74,6 +74,32 @@ jobs:
             --threshold 0.5 \
             --output voices_vad_results.json
 
+      - name: Validate RTFx metrics
+        run: |
+          # Validate MUSAN RTFx
+          if [ -f musan_vad_results.json ]; then
+            MUSAN_RTFx=$(jq -r '.rtfx // 0' musan_vad_results.json)
+            if [ "$MUSAN_RTFx" = "0" ] || [ -z "$MUSAN_RTFx" ]; then
+              echo "❌ CRITICAL: MUSAN RTFx is 0 or empty - benchmark failed"
+              exit 1
+            fi
+          else
+            echo "❌ CRITICAL: musan_vad_results.json not found"
+            exit 1
+          fi
+
+          # Validate VOiCES RTFx
+          if [ -f voices_vad_results.json ]; then
+            VOICES_RTFx=$(jq -r '.rtfx // 0' voices_vad_results.json)
+            if [ "$VOICES_RTFx" = "0" ] || [ -z "$VOICES_RTFx" ]; then
+              echo "❌ CRITICAL: VOiCES RTFx is 0 or empty - benchmark failed"
+              exit 1
+            fi
+          else
+            echo "❌ CRITICAL: voices_vad_results.json not found"
+            exit 1
+          fi
+
       - name: Upload results
         if: always()
         uses: actions/upload-artifact@v4