diff --git a/.github/workflows/asr-benchmark.yml b/.github/workflows/asr-benchmark.yml
index 302577ffb..6702ee1b3 100644
--- a/.github/workflows/asr-benchmark.yml
+++ b/.github/workflows/asr-benchmark.yml
@@ -198,9 +198,38 @@ jobs:
echo "EXECUTION_TIME=$EXECUTION_TIME" >> $GITHUB_OUTPUT
echo "FILES_COUNT=$MAX_FILES" >> $GITHUB_OUTPUT
+ # Validate RTFx values - 0 indicates benchmark failure
+ if [ "$CLEAN_RTFx" = "0.00" ] || [ "$CLEAN_RTFx" = "N/A" ]; then
+ echo "⚠️ test-clean RTFx is 0 or N/A - benchmark may have failed"
+ CLEAN_RTFX_FAILED=1
+ fi
+ if [ "$CLEAN_V2_RTFx" = "0.00" ] || [ "$CLEAN_V2_RTFx" = "N/A" ]; then
+ echo "⚠️ test-clean (v2) RTFx is 0 or N/A - benchmark may have failed"
+ CLEAN_V2_RTFX_FAILED=1
+ fi
+ if [ "$OTHER_RTFx" = "0.00" ] || [ "$OTHER_RTFx" = "N/A" ]; then
+ echo "⚠️ test-other RTFx is 0 or N/A - benchmark may have failed"
+ OTHER_RTFX_FAILED=1
+ fi
+ if [ "$OTHER_V2_RTFx" = "0.00" ] || [ "$OTHER_V2_RTFx" = "N/A" ]; then
+ echo "⚠️ test-other (v2) RTFx is 0 or N/A - benchmark may have failed"
+ OTHER_V2_RTFX_FAILED=1
+ fi
+ if [ "$STREAMING_RTFx" = "0.00" ] || [ "$STREAMING_RTFx" = "N/A" ]; then
+ echo "⚠️ streaming RTFx is 0 or N/A - benchmark may have failed"
+ STREAMING_RTFX_FAILED=1
+ fi
+ if [ "$STREAMING_V2_RTFx" = "0.00" ] || [ "$STREAMING_V2_RTFx" = "N/A" ]; then
+ echo "⚠️ streaming (v2) RTFx is 0 or N/A - benchmark may have failed"
+ STREAMING_V2_RTFX_FAILED=1
+ fi
+
# Report failures summary
if [ ! -z "$CLEAN_FAILED" ] || [ ! -z "$OTHER_FAILED" ] || [ ! -z "$STREAMING_FAILED" ] || \
- [ ! -z "$CLEAN_V2_FAILED" ] || [ ! -z "$OTHER_V2_FAILED" ] || [ ! -z "$STREAMING_V2_FAILED" ]; then
+ [ ! -z "$CLEAN_V2_FAILED" ] || [ ! -z "$OTHER_V2_FAILED" ] || [ ! -z "$STREAMING_V2_FAILED" ] || \
+ [ ! -z "$CLEAN_RTFX_FAILED" ] || [ ! -z "$CLEAN_V2_RTFX_FAILED" ] || \
+ [ ! -z "$OTHER_RTFX_FAILED" ] || [ ! -z "$OTHER_V2_RTFX_FAILED" ] || \
+ [ ! -z "$STREAMING_RTFX_FAILED" ] || [ ! -z "$STREAMING_V2_RTFX_FAILED" ]; then
echo "BENCHMARK_STATUS=PARTIAL_FAILURE" >> $GITHUB_OUTPUT
echo "⚠️ Some benchmarks failed:"
[ ! -z "$CLEAN_FAILED" ] && echo " - test-clean benchmark failed"
@@ -209,7 +238,13 @@ jobs:
[ ! -z "$CLEAN_V2_FAILED" ] && echo " - test-clean (v2) benchmark failed"
[ ! -z "$OTHER_V2_FAILED" ] && echo " - test-other (v2) benchmark failed"
[ ! -z "$STREAMING_V2_FAILED" ] && echo " - streaming (v2) benchmark failed"
- # Don't exit with error to allow PR comment to be posted
+ [ ! -z "$CLEAN_RTFX_FAILED" ] && echo " - test-clean RTFx is 0"
+ [ ! -z "$CLEAN_V2_RTFX_FAILED" ] && echo " - test-clean (v2) RTFx is 0"
+ [ ! -z "$OTHER_RTFX_FAILED" ] && echo " - test-other RTFx is 0"
+ [ ! -z "$OTHER_V2_RTFX_FAILED" ] && echo " - test-other (v2) RTFx is 0"
+ [ ! -z "$STREAMING_RTFX_FAILED" ] && echo " - streaming RTFx is 0"
+ [ ! -z "$STREAMING_V2_RTFX_FAILED" ] && echo " - streaming (v2) RTFx is 0"
+ exit 1
else
echo "BENCHMARK_STATUS=SUCCESS" >> $GITHUB_OUTPUT
echo "✅ All benchmarks completed successfully"
diff --git a/.github/workflows/diarizer-benchmark.yml b/.github/workflows/diarizer-benchmark.yml
index 43251f671..0775ad844 100644
--- a/.github/workflows/diarizer-benchmark.yml
+++ b/.github/workflows/diarizer-benchmark.yml
@@ -115,6 +115,13 @@ jobs:
echo "CLUSTERING_TIME=${CLUSTERING_TIME}" >> $GITHUB_OUTPUT
echo "INFERENCE_TIME=${INFERENCE_TIME}" >> $GITHUB_OUTPUT
+ # Validate RTFx - 0 indicates benchmark failure
+ if [ "$RTF" = "0" ] || [ -z "$RTF" ]; then
+ echo "❌ CRITICAL: RTFx is 0 or empty - benchmark failed"
+ echo "RTFx value: $RTF"
+ exit 1
+ fi
+
- name: Comment PR with Benchmark Results
if: always()
uses: actions/github-script@v7
diff --git a/.github/workflows/parakeet-eou-benchmark.yml b/.github/workflows/parakeet-eou-benchmark.yml
index bef59e306..62f45b556 100644
--- a/.github/workflows/parakeet-eou-benchmark.yml
+++ b/.github/workflows/parakeet-eou-benchmark.yml
@@ -104,6 +104,13 @@ jobs:
echo "MAX_FILES=$MAX_FILES" >> $GITHUB_OUTPUT
echo "BENCHMARK_STATUS=$BENCHMARK_STATUS" >> $GITHUB_OUTPUT
+ # Validate RTFx - 0 or N/A indicates benchmark failure
+ if [ "$RTFx" = "0.00" ] || [ "$RTFx" = "N/A" ]; then
+ echo "❌ CRITICAL: RTFx is 0 or N/A - benchmark failed"
+ echo "RTFx value: $RTFx"
+ exit 1
+ fi
+
- name: Comment PR
if: github.event_name == 'pull_request'
continue-on-error: true
diff --git a/.github/workflows/qwen3-asr-benchmark.yml b/.github/workflows/qwen3-asr-benchmark.yml
index 074bede82..ef62bd7a2 100644
--- a/.github/workflows/qwen3-asr-benchmark.yml
+++ b/.github/workflows/qwen3-asr-benchmark.yml
@@ -69,6 +69,31 @@ jobs:
echo "SMOKE_STATUS=FAILED" >> $GITHUB_OUTPUT
fi
+ # Extract RTFx metrics if results file exists
+ if [ -f qwen3_results_int8.json ]; then
+ MEDIAN_RTFx=$(jq -r '.summary.medianRTFx // "N/A"' qwen3_results_int8.json 2>/dev/null)
+ OVERALL_RTFx=$(jq -r '.summary.overallRTFx // "N/A"' qwen3_results_int8.json 2>/dev/null)
+
+ [ "$MEDIAN_RTFx" != "null" ] && [ "$MEDIAN_RTFx" != "N/A" ] && MEDIAN_RTFx=$(printf "%.2f" "$MEDIAN_RTFx") || MEDIAN_RTFx="N/A"
+ [ "$OVERALL_RTFx" != "null" ] && [ "$OVERALL_RTFx" != "N/A" ] && OVERALL_RTFx=$(printf "%.2f" "$OVERALL_RTFx") || OVERALL_RTFx="N/A"
+
+ echo "MEDIAN_RTFx=$MEDIAN_RTFx" >> $GITHUB_OUTPUT
+ echo "OVERALL_RTFx=$OVERALL_RTFx" >> $GITHUB_OUTPUT
+
+ # Fail if RTFx is 0 or N/A - indicates benchmark failure
+ if [ "$MEDIAN_RTFx" = "N/A" ] || [ "$MEDIAN_RTFx" = "0.00" ] || [ "$OVERALL_RTFx" = "N/A" ] || [ "$OVERALL_RTFx" = "0.00" ]; then
+ echo "❌ CRITICAL: RTFx is 0 or N/A - benchmark failed to produce valid results"
+ echo "Median RTFx: $MEDIAN_RTFx"
+ echo "Overall RTFx: $OVERALL_RTFx"
+ exit 1
+ fi
+ else
+ echo "❌ CRITICAL: Results file not found - benchmark failed"
+ echo "MEDIAN_RTFx=N/A" >> $GITHUB_OUTPUT
+ echo "OVERALL_RTFx=N/A" >> $GITHUB_OUTPUT
+ exit 1
+ fi
+
EXECUTION_TIME=$(( ($(date +%s) - BENCHMARK_START) / 60 ))m$(( ($(date +%s) - BENCHMARK_START) % 60 ))s
echo "EXECUTION_TIME=$EXECUTION_TIME" >> $GITHUB_OUTPUT
@@ -81,6 +106,9 @@ jobs:
const status = '${{ steps.smoketest.outputs.SMOKE_STATUS }}';
const emoji = status === 'PASSED' ? '✅' : '❌';
+ const medianRTFx = '${{ steps.smoketest.outputs.MEDIAN_RTFx }}';
+ const overallRTFx = '${{ steps.smoketest.outputs.OVERALL_RTFx }}';
+
const body = `## Qwen3-ASR int8 Smoke Test ${emoji}
| Check | Result |
@@ -91,6 +119,12 @@ jobs:
| Transcription pipeline | ${emoji} |
| Decoder size | 571 MB (vs 1.1 GB f32) |
+ ### Performance Metrics
+ | Metric | CI Value | Expected on Apple Silicon |
+ |--------|----------|--------------------------|
+ | Median RTFx | ${medianRTFx}x | ~2.5x |
+ | Overall RTFx | ${overallRTFx}x | ~2.5x |
+
Runtime: ${{ steps.smoketest.outputs.EXECUTION_TIME }}
**Note:** CI VM lacks physical GPU — CoreML MLState (macOS 15) KV cache produces degraded results on virtualized runners. On Apple Silicon: ~1.3% WER / 2.5x RTFx.
diff --git a/.github/workflows/sortformer-benchmark.yml b/.github/workflows/sortformer-benchmark.yml
index 2f9edd701..a3e04d662 100644
--- a/.github/workflows/sortformer-benchmark.yml
+++ b/.github/workflows/sortformer-benchmark.yml
@@ -115,6 +115,13 @@ jobs:
echo "DETECTED=${DETECTED}" >> $GITHUB_OUTPUT
echo "GROUND_TRUTH=${GROUND_TRUTH}" >> $GITHUB_OUTPUT
+ # Validate RTFx - 0 indicates benchmark failure
+ if [ "$RTF" = "0" ] || [ -z "$RTF" ]; then
+ echo "❌ CRITICAL: RTFx is 0 or empty - benchmark failed"
+ echo "RTFx value: $RTF"
+ exit 1
+ fi
+
- name: Comment PR with Benchmark Results
if: always()
uses: actions/github-script@v7
diff --git a/.github/workflows/vad-benchmark.yml b/.github/workflows/vad-benchmark.yml
index 3a75a0e60..22806c1fc 100644
--- a/.github/workflows/vad-benchmark.yml
+++ b/.github/workflows/vad-benchmark.yml
@@ -74,6 +74,32 @@ jobs:
--threshold 0.5 \
--output voices_vad_results.json
+ - name: Validate RTFx metrics
+ run: |
+ # Validate MUSAN RTFx
+ if [ -f musan_vad_results.json ]; then
+ MUSAN_RTFx=$(jq -r '.rtfx // 0' musan_vad_results.json)
+ if [ "$MUSAN_RTFx" = "0" ] || [ -z "$MUSAN_RTFx" ]; then
+ echo "❌ CRITICAL: MUSAN RTFx is 0 or empty - benchmark failed"
+ exit 1
+ fi
+ else
+ echo "❌ CRITICAL: musan_vad_results.json not found"
+ exit 1
+ fi
+
+ # Validate VOiCES RTFx
+ if [ -f voices_vad_results.json ]; then
+ VOICES_RTFx=$(jq -r '.rtfx // 0' voices_vad_results.json)
+ if [ "$VOICES_RTFx" = "0" ] || [ -z "$VOICES_RTFx" ]; then
+ echo "❌ CRITICAL: VOiCES RTFx is 0 or empty - benchmark failed"
+ exit 1
+ fi
+ else
+ echo "❌ CRITICAL: voices_vad_results.json not found"
+ exit 1
+ fi
+
- name: Upload results
if: always()
uses: actions/upload-artifact@v4
diff --git a/Sources/FluidAudio/ModelNames.swift b/Sources/FluidAudio/ModelNames.swift
index 1d4d7e9fc..afc7d6af3 100644
--- a/Sources/FluidAudio/ModelNames.swift
+++ b/Sources/FluidAudio/ModelNames.swift
@@ -129,16 +129,8 @@ public enum Repo: String, CaseIterable {
return "nemotron-streaming/560ms"
case .sortformer:
return "sortformer"
- case .lseend:
- return "ls-eend"
- case .pocketTts:
- return "pocket-tts"
- case .multilingualG2p:
- return "charsiu-g2p-byt5"
- case .parakeetTdtCtc110m:
- return "parakeet-tdt-ctc-110m"
default:
- return name
+ return name.replacingOccurrences(of: "-coreml", with: "")
}
}
}
diff --git a/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift b/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift
index 3e3607394..6048ff95c 100644
--- a/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift
+++ b/Tests/FluidAudioTests/ASR/Parakeet/ModelNamesTests.swift
@@ -125,7 +125,7 @@ final class ModelNamesTests: XCTestCase {
// Verify name (repo slug with -coreml suffix)
XCTAssertEqual(repo.name, "parakeet-tdt-ctc-110m-coreml")
- // Verify folder name (simplified local folder name)
+ // Verify folder name (simplified - strips -coreml suffix by default)
XCTAssertEqual(repo.folderName, "parakeet-tdt-ctc-110m")
// Should have no subpath (not a variant repo)