Skip to content

Make parakeetTdtCtc110m folderName consistent with other Parakeet models #2014

Make parakeetTdtCtc110m folderName consistent with other Parakeet models

Make parakeetTdtCtc110m folderName consistent with other Parakeet models #2014

Workflow file for this run

name: ASR Benchmark
on:
pull_request:
branches: [main]
workflow_dispatch:
jobs:
asr-benchmark:
name: ASR Benchmark
runs-on: macos-15
permissions:
contents: read
pull-requests: write
steps:
- uses: actions/checkout@v5
- uses: swift-actions/setup-swift@v2
with:
swift-version: "6.1"
- name: Cache Dependencies
uses: actions/cache@v4
with:
path: |
.build
~/Library/Application Support/FluidAudio/Models/parakeet-tdt-0.6b-v3-coreml
~/Library/Application Support/FluidAudio/Models/parakeet-tdt-0.6b-v2-coreml
~/Library/Application Support/FluidAudio/Datasets/LibriSpeech
~/Library/Caches/Homebrew
/usr/local/Cellar/ffmpeg
/opt/homebrew/Cellar/ffmpeg
key: ${{ runner.os }}-asr-${{ hashFiles('Package.resolved', 'Sources/FluidAudio/Frameworks/**', 'Sources/FluidAudio/ModelRegistry.swift', 'Sources/FluidAudio/ModelNames.swift') }}
- name: Install ffmpeg
run: |
brew install ffmpeg || echo "ffmpeg may already be installed"
ffmpeg -version || echo "ffmpeg not available"
- name: Build
run: swift build -c release
- name: Run Benchmarks
id: benchmark
run: |
MAX_FILES="25"
BENCHMARK_START=$(date +%s)
# Set error handling
set -o pipefail
# Function to run benchmark with error capture
run_benchmark() {
local SUBSET=$1
local MAX=$2
local OUTPUT=$3
local EXTRA_ARGS="${4:-}"
echo "========================================="
echo "Running ASR benchmark: $SUBSET (max $MAX files)"
echo "Output: $OUTPUT"
echo "Extra args: $EXTRA_ARGS"
echo "========================================="
if swift run fluidaudiocli asr-benchmark \
--subset "$SUBSET" --max-files "$MAX" \
--auto-download --output "$OUTPUT" $EXTRA_ARGS > benchmark_log.txt 2>&1; then
echo "✅ Benchmark $SUBSET completed successfully"
return 0
else
echo "❌ Benchmark $SUBSET FAILED with exit code $?"
echo "Full output:"
cat benchmark_log.txt
# Continue with other benchmarks even if one fails
return 1
fi
}
# Run benchmarks with error capture
run_benchmark "test-clean" "$MAX_FILES" "asr_results_clean.json" || CLEAN_FAILED=1
run_benchmark "test-other" "$MAX_FILES" "asr_results_other.json" || OTHER_FAILED=1
run_benchmark "test-clean" "5" "asr_results_streaming.json" "--test-streaming --chunk-duration 0.5" || STREAMING_FAILED=1
# English-optimized (v2) runs
run_benchmark "test-clean" "$MAX_FILES" "asr_results_clean_v2.json" "--model-version v2" || CLEAN_V2_FAILED=1
run_benchmark "test-other" "$MAX_FILES" "asr_results_other_v2.json" "--model-version v2" || OTHER_V2_FAILED=1
run_benchmark "test-clean" "5" "asr_results_streaming_v2.json" "--test-streaming --chunk-duration 0.5 --model-version v2" || STREAMING_V2_FAILED=1
# Extract metrics with error handling
if [ -f asr_results_clean.json ]; then
CLEAN_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_clean.json 2>/dev/null)
CLEAN_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_clean.json 2>/dev/null)
CLEAN_AUDIO=$(jq -r '.summary.totalAudioDuration' asr_results_clean.json 2>/dev/null)
CLEAN_TIME=$(jq -r '.summary.totalProcessingTime' asr_results_clean.json 2>/dev/null)
CLEAN_RTFx=$(jq -r '.summary.medianRTFx' asr_results_clean.json 2>/dev/null)
# Format values only if they exist and are not null
[ "$CLEAN_WER_AVG" != "null" ] && [ -n "$CLEAN_WER_AVG" ] && CLEAN_WER_AVG=$(printf "%.2f" "$CLEAN_WER_AVG") || CLEAN_WER_AVG="N/A"
[ "$CLEAN_WER_MED" != "null" ] && [ -n "$CLEAN_WER_MED" ] && CLEAN_WER_MED=$(printf "%.2f" "$CLEAN_WER_MED") || CLEAN_WER_MED="N/A"
[ "$CLEAN_RTFx" != "null" ] && [ -n "$CLEAN_RTFx" ] && CLEAN_RTFx=$(printf "%.2f" "$CLEAN_RTFx") || CLEAN_RTFx="N/A"
fi
if [ -f asr_results_clean_v2.json ]; then
CLEAN_V2_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_clean_v2.json 2>/dev/null)
CLEAN_V2_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_clean_v2.json 2>/dev/null)
CLEAN_V2_RTFx=$(jq -r '.summary.medianRTFx' asr_results_clean_v2.json 2>/dev/null)
[ "$CLEAN_V2_WER_AVG" != "null" ] && [ -n "$CLEAN_V2_WER_AVG" ] && CLEAN_V2_WER_AVG=$(printf "%.2f" "$CLEAN_V2_WER_AVG") || CLEAN_V2_WER_AVG="N/A"
[ "$CLEAN_V2_WER_MED" != "null" ] && [ -n "$CLEAN_V2_WER_MED" ] && CLEAN_V2_WER_MED=$(printf "%.2f" "$CLEAN_V2_WER_MED") || CLEAN_V2_WER_MED="N/A"
[ "$CLEAN_V2_RTFx" != "null" ] && [ -n "$CLEAN_V2_RTFx" ] && CLEAN_V2_RTFx=$(printf "%.2f" "$CLEAN_V2_RTFx") || CLEAN_V2_RTFx="N/A"
fi
if [ -f asr_results_other.json ]; then
OTHER_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_other.json 2>/dev/null)
OTHER_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_other.json 2>/dev/null)
OTHER_AUDIO=$(jq -r '.summary.totalAudioDuration' asr_results_other.json 2>/dev/null)
OTHER_TIME=$(jq -r '.summary.totalProcessingTime' asr_results_other.json 2>/dev/null)
OTHER_RTFx=$(jq -r '.summary.medianRTFx' asr_results_other.json 2>/dev/null)
# Format values only if they exist and are not null
[ "$OTHER_WER_AVG" != "null" ] && [ -n "$OTHER_WER_AVG" ] && OTHER_WER_AVG=$(printf "%.2f" "$OTHER_WER_AVG") || OTHER_WER_AVG="N/A"
[ "$OTHER_WER_MED" != "null" ] && [ -n "$OTHER_WER_MED" ] && OTHER_WER_MED=$(printf "%.2f" "$OTHER_WER_MED") || OTHER_WER_MED="N/A"
[ "$OTHER_RTFx" != "null" ] && [ -n "$OTHER_RTFx" ] && OTHER_RTFx=$(printf "%.2f" "$OTHER_RTFx") || OTHER_RTFx="N/A"
fi
if [ -f asr_results_other_v2.json ]; then
OTHER_V2_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_other_v2.json 2>/dev/null)
OTHER_V2_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_other_v2.json 2>/dev/null)
OTHER_V2_RTFx=$(jq -r '.summary.medianRTFx' asr_results_other_v2.json 2>/dev/null)
[ "$OTHER_V2_WER_AVG" != "null" ] && [ -n "$OTHER_V2_WER_AVG" ] && OTHER_V2_WER_AVG=$(printf "%.2f" "$OTHER_V2_WER_AVG") || OTHER_V2_WER_AVG="N/A"
[ "$OTHER_V2_WER_MED" != "null" ] && [ -n "$OTHER_V2_WER_MED" ] && OTHER_V2_WER_MED=$(printf "%.2f" "$OTHER_V2_WER_MED") || OTHER_V2_WER_MED="N/A"
[ "$OTHER_V2_RTFx" != "null" ] && [ -n "$OTHER_V2_RTFx" ] && OTHER_V2_RTFx=$(printf "%.2f" "$OTHER_V2_RTFx") || OTHER_V2_RTFx="N/A"
fi
if [ -f asr_results_streaming.json ]; then
STREAMING_WER=$(jq -r '.summary.averageWER * 100' asr_results_streaming.json 2>/dev/null)
STREAMING_RTFx=$(jq -r '.summary.medianRTFx' asr_results_streaming.json 2>/dev/null)
STREAMING_AVG_CHUNK=$(jq -r '.summary.streaming.avgChunkProcessingTime' asr_results_streaming.json 2>/dev/null)
STREAMING_MAX_CHUNK=$(jq -r '.summary.streaming.maxChunkProcessingTime' asr_results_streaming.json 2>/dev/null)
STREAMING_CHUNKS=$(jq -r '.summary.streaming.totalChunksProcessed' asr_results_streaming.json 2>/dev/null)
STREAMING_FIRST_TOKEN=$(jq -r '.summary.streaming.avgFirstTokenLatency // "N/A"' asr_results_streaming.json 2>/dev/null)
# Format values only if they exist and are not null
[ "$STREAMING_WER" != "null" ] && [ -n "$STREAMING_WER" ] && STREAMING_WER=$(printf "%.2f" "$STREAMING_WER") || STREAMING_WER="N/A"
[ "$STREAMING_RTFx" != "null" ] && [ -n "$STREAMING_RTFx" ] && STREAMING_RTFx=$(printf "%.2f" "$STREAMING_RTFx") || STREAMING_RTFx="N/A"
[ "$STREAMING_AVG_CHUNK" != "null" ] && [ -n "$STREAMING_AVG_CHUNK" ] && STREAMING_AVG_CHUNK=$(printf "%.3f" "$STREAMING_AVG_CHUNK") || STREAMING_AVG_CHUNK="N/A"
[ "$STREAMING_MAX_CHUNK" != "null" ] && [ -n "$STREAMING_MAX_CHUNK" ] && STREAMING_MAX_CHUNK=$(printf "%.3f" "$STREAMING_MAX_CHUNK") || STREAMING_MAX_CHUNK="N/A"
[ "$STREAMING_FIRST_TOKEN" != "null" ] && [ -n "$STREAMING_FIRST_TOKEN" ] && [ "$STREAMING_FIRST_TOKEN" != "N/A" ] && STREAMING_FIRST_TOKEN=$(printf "%.3f" "$STREAMING_FIRST_TOKEN")
fi
if [ -f asr_results_streaming_v2.json ]; then
STREAMING_V2_WER=$(jq -r '.summary.averageWER * 100' asr_results_streaming_v2.json 2>/dev/null)
STREAMING_V2_RTFx=$(jq -r '.summary.medianRTFx' asr_results_streaming_v2.json 2>/dev/null)
STREAMING_V2_AVG_CHUNK=$(jq -r '.summary.streaming.avgChunkProcessingTime' asr_results_streaming_v2.json 2>/dev/null)
STREAMING_V2_MAX_CHUNK=$(jq -r '.summary.streaming.maxChunkProcessingTime' asr_results_streaming_v2.json 2>/dev/null)
STREAMING_V2_CHUNKS=$(jq -r '.summary.streaming.totalChunksProcessed' asr_results_streaming_v2.json 2>/dev/null)
STREAMING_V2_FIRST_TOKEN=$(jq -r '.summary.streaming.avgFirstTokenLatency // "N/A"' asr_results_streaming_v2.json 2>/dev/null)
[ "$STREAMING_V2_WER" != "null" ] && [ -n "$STREAMING_V2_WER" ] && STREAMING_V2_WER=$(printf "%.2f" "$STREAMING_V2_WER") || STREAMING_V2_WER="N/A"
[ "$STREAMING_V2_RTFx" != "null" ] && [ -n "$STREAMING_V2_RTFx" ] && STREAMING_V2_RTFx=$(printf "%.2f" "$STREAMING_V2_RTFx") || STREAMING_V2_RTFx="N/A"
[ "$STREAMING_V2_AVG_CHUNK" != "null" ] && [ -n "$STREAMING_V2_AVG_CHUNK" ] && STREAMING_V2_AVG_CHUNK=$(printf "%.3f" "$STREAMING_V2_AVG_CHUNK") || STREAMING_V2_AVG_CHUNK="N/A"
[ "$STREAMING_V2_MAX_CHUNK" != "null" ] && [ -n "$STREAMING_V2_MAX_CHUNK" ] && STREAMING_V2_MAX_CHUNK=$(printf "%.3f" "$STREAMING_V2_MAX_CHUNK") || STREAMING_V2_MAX_CHUNK="N/A"
[ "$STREAMING_V2_FIRST_TOKEN" != "null" ] && [ -n "$STREAMING_V2_FIRST_TOKEN" ] && [ "$STREAMING_V2_FIRST_TOKEN" != "N/A" ] && STREAMING_V2_FIRST_TOKEN=$(printf "%.3f" "$STREAMING_V2_FIRST_TOKEN")
fi
# Output metrics
echo "CLEAN_WER_AVG=${CLEAN_WER_AVG:-N/A}" >> $GITHUB_OUTPUT
echo "CLEAN_WER_MED=${CLEAN_WER_MED:-N/A}" >> $GITHUB_OUTPUT
echo "CLEAN_RTFx=${CLEAN_RTFx:-N/A}" >> $GITHUB_OUTPUT
echo "CLEAN_V2_WER_AVG=${CLEAN_V2_WER_AVG:-N/A}" >> $GITHUB_OUTPUT
echo "CLEAN_V2_WER_MED=${CLEAN_V2_WER_MED:-N/A}" >> $GITHUB_OUTPUT
echo "CLEAN_V2_RTFx=${CLEAN_V2_RTFx:-N/A}" >> $GITHUB_OUTPUT
echo "OTHER_WER_AVG=${OTHER_WER_AVG:-N/A}" >> $GITHUB_OUTPUT
echo "OTHER_WER_MED=${OTHER_WER_MED:-N/A}" >> $GITHUB_OUTPUT
echo "OTHER_RTFx=${OTHER_RTFx:-N/A}" >> $GITHUB_OUTPUT
echo "OTHER_V2_WER_AVG=${OTHER_V2_WER_AVG:-N/A}" >> $GITHUB_OUTPUT
echo "OTHER_V2_WER_MED=${OTHER_V2_WER_MED:-N/A}" >> $GITHUB_OUTPUT
echo "OTHER_V2_RTFx=${OTHER_V2_RTFx:-N/A}" >> $GITHUB_OUTPUT
# Streaming metrics
echo "STREAMING_WER=${STREAMING_WER:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_RTFx=${STREAMING_RTFx:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_AVG_CHUNK=${STREAMING_AVG_CHUNK:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_MAX_CHUNK=${STREAMING_MAX_CHUNK:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_CHUNKS=${STREAMING_CHUNKS:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_FIRST_TOKEN=${STREAMING_FIRST_TOKEN:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_V2_WER=${STREAMING_V2_WER:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_V2_RTFx=${STREAMING_V2_RTFx:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_V2_AVG_CHUNK=${STREAMING_V2_AVG_CHUNK:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_V2_MAX_CHUNK=${STREAMING_V2_MAX_CHUNK:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_V2_CHUNKS=${STREAMING_V2_CHUNKS:-N/A}" >> $GITHUB_OUTPUT
echo "STREAMING_V2_FIRST_TOKEN=${STREAMING_V2_FIRST_TOKEN:-N/A}" >> $GITHUB_OUTPUT
EXECUTION_TIME=$(( ($(date +%s) - BENCHMARK_START) / 60 ))m$(( ($(date +%s) - BENCHMARK_START) % 60 ))s
echo "EXECUTION_TIME=$EXECUTION_TIME" >> $GITHUB_OUTPUT
echo "FILES_COUNT=$MAX_FILES" >> $GITHUB_OUTPUT
# Validate RTFx values - 0 indicates benchmark failure
if [ "$CLEAN_RTFx" = "0.00" ] || [ "$CLEAN_RTFx" = "0.0" ] || [ "$CLEAN_RTFx" = "0" ] || [ "$CLEAN_RTFx" = "N/A" ]; then
echo "❌ CRITICAL: test-clean RTFx is 0 or N/A - benchmark failed"
CLEAN_RTFX_FAILED=1
fi
if [ "$CLEAN_V2_RTFx" = "0.00" ] || [ "$CLEAN_V2_RTFx" = "0.0" ] || [ "$CLEAN_V2_RTFx" = "0" ] || [ "$CLEAN_V2_RTFx" = "N/A" ]; then
echo "❌ CRITICAL: test-clean (v2) RTFx is 0 or N/A - benchmark failed"
CLEAN_V2_RTFX_FAILED=1
fi
if [ "$OTHER_RTFx" = "0.00" ] || [ "$OTHER_RTFx" = "0.0" ] || [ "$OTHER_RTFx" = "0" ] || [ "$OTHER_RTFx" = "N/A" ]; then
echo "❌ CRITICAL: test-other RTFx is 0 or N/A - benchmark failed"
OTHER_RTFX_FAILED=1
fi
if [ "$OTHER_V2_RTFx" = "0.00" ] || [ "$OTHER_V2_RTFx" = "0.0" ] || [ "$OTHER_V2_RTFx" = "0" ] || [ "$OTHER_V2_RTFx" = "N/A" ]; then
echo "❌ CRITICAL: test-other (v2) RTFx is 0 or N/A - benchmark failed"
OTHER_V2_RTFX_FAILED=1
fi
if [ "$STREAMING_RTFx" = "0.00" ] || [ "$STREAMING_RTFx" = "0.0" ] || [ "$STREAMING_RTFx" = "0" ] || [ "$STREAMING_RTFx" = "N/A" ]; then
echo "❌ CRITICAL: streaming RTFx is 0 or N/A - benchmark failed"
STREAMING_RTFX_FAILED=1
fi
if [ "$STREAMING_V2_RTFx" = "0.00" ] || [ "$STREAMING_V2_RTFx" = "0.0" ] || [ "$STREAMING_V2_RTFx" = "0" ] || [ "$STREAMING_V2_RTFx" = "N/A" ]; then
echo "❌ CRITICAL: streaming (v2) RTFx is 0 or N/A - benchmark failed"
STREAMING_V2_RTFX_FAILED=1
fi
# Report failures summary
if [ ! -z "$CLEAN_FAILED" ] || [ ! -z "$OTHER_FAILED" ] || [ ! -z "$STREAMING_FAILED" ] || \
[ ! -z "$CLEAN_V2_FAILED" ] || [ ! -z "$OTHER_V2_FAILED" ] || [ ! -z "$STREAMING_V2_FAILED" ] || \
[ ! -z "$CLEAN_RTFX_FAILED" ] || [ ! -z "$CLEAN_V2_RTFX_FAILED" ] || \
[ ! -z "$OTHER_RTFX_FAILED" ] || [ ! -z "$OTHER_V2_RTFX_FAILED" ] || \
[ ! -z "$STREAMING_RTFX_FAILED" ] || [ ! -z "$STREAMING_V2_RTFX_FAILED" ]; then
echo "BENCHMARK_STATUS=PARTIAL_FAILURE" >> $GITHUB_OUTPUT
echo "⚠️ Some benchmarks failed:"
[ ! -z "$CLEAN_FAILED" ] && echo " - test-clean benchmark failed"
[ ! -z "$OTHER_FAILED" ] && echo " - test-other benchmark failed"
[ ! -z "$STREAMING_FAILED" ] && echo " - streaming benchmark failed"
[ ! -z "$CLEAN_V2_FAILED" ] && echo " - test-clean (v2) benchmark failed"
[ ! -z "$OTHER_V2_FAILED" ] && echo " - test-other (v2) benchmark failed"
[ ! -z "$STREAMING_V2_FAILED" ] && echo " - streaming (v2) benchmark failed"
[ ! -z "$CLEAN_RTFX_FAILED" ] && echo " - test-clean RTFx is 0"
[ ! -z "$CLEAN_V2_RTFX_FAILED" ] && echo " - test-clean (v2) RTFx is 0"
[ ! -z "$OTHER_RTFX_FAILED" ] && echo " - test-other RTFx is 0"
[ ! -z "$OTHER_V2_RTFX_FAILED" ] && echo " - test-other (v2) RTFx is 0"
[ ! -z "$STREAMING_RTFX_FAILED" ] && echo " - streaming RTFx is 0"
[ ! -z "$STREAMING_V2_RTFX_FAILED" ] && echo " - streaming (v2) RTFx is 0"
exit 1
else
echo "BENCHMARK_STATUS=SUCCESS" >> $GITHUB_OUTPUT
echo "✅ All benchmarks completed successfully"
fi
- name: Comment PR
if: always() && github.event_name == 'pull_request'
continue-on-error: true
uses: actions/github-script@v7
with:
script: |
const benchmarkStatus = '${{ steps.benchmark.outputs.BENCHMARK_STATUS }}';
const statusEmoji = benchmarkStatus === 'SUCCESS' ? '✅' : '⚠️';
const statusText = benchmarkStatus === 'SUCCESS' ? 'All benchmarks passed' : 'Some benchmarks failed (see logs)';
const body = `## ASR Benchmark Results ${statusEmoji}
**Status:** ${statusText}
### Parakeet v3 (multilingual)
| Dataset | WER Avg | WER Med | RTFx | Status |
|---------|---------|---------|------|--------|
| test-clean | ${{ steps.benchmark.outputs.CLEAN_WER_AVG }}% | ${{ steps.benchmark.outputs.CLEAN_WER_MED }}% | ${{ steps.benchmark.outputs.CLEAN_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.CLEAN_WER_AVG }}') < 10 ? '✅' : '${{ steps.benchmark.outputs.CLEAN_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} |
| test-other | ${{ steps.benchmark.outputs.OTHER_WER_AVG }}% | ${{ steps.benchmark.outputs.OTHER_WER_MED }}% | ${{ steps.benchmark.outputs.OTHER_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.OTHER_WER_AVG }}') < 20 ? '✅' : '${{ steps.benchmark.outputs.OTHER_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} |
### Parakeet v2 (English-optimized)
| Dataset | WER Avg | WER Med | RTFx | Status |
|---------|---------|---------|------|--------|
| test-clean | ${{ steps.benchmark.outputs.CLEAN_V2_WER_AVG }}% | ${{ steps.benchmark.outputs.CLEAN_V2_WER_MED }}% | ${{ steps.benchmark.outputs.CLEAN_V2_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.CLEAN_V2_WER_AVG }}') < 10 ? '✅' : '${{ steps.benchmark.outputs.CLEAN_V2_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} |
| test-other | ${{ steps.benchmark.outputs.OTHER_V2_WER_AVG }}% | ${{ steps.benchmark.outputs.OTHER_V2_WER_MED }}% | ${{ steps.benchmark.outputs.OTHER_V2_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.OTHER_V2_WER_AVG }}') < 20 ? '✅' : '${{ steps.benchmark.outputs.OTHER_V2_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} |
### Streaming (v3)
| Metric | Value | Description |
|--------|-------|-------------|
| WER | ${{ steps.benchmark.outputs.STREAMING_WER }}% | Word Error Rate in streaming mode |
| RTFx | ${{ steps.benchmark.outputs.STREAMING_RTFx }}x | Streaming real-time factor |
| Avg Chunk Time | ${{ steps.benchmark.outputs.STREAMING_AVG_CHUNK }}s | Average time to process each chunk |
| Max Chunk Time | ${{ steps.benchmark.outputs.STREAMING_MAX_CHUNK }}s | Maximum chunk processing time |
| First Token | ${{ steps.benchmark.outputs.STREAMING_FIRST_TOKEN }}s | Latency to first transcription token |
| Total Chunks | ${{ steps.benchmark.outputs.STREAMING_CHUNKS }} | Number of chunks processed |
### Streaming (v2)
| Metric | Value | Description |
|--------|-------|-------------|
| WER | ${{ steps.benchmark.outputs.STREAMING_V2_WER }}% | Word Error Rate in streaming mode |
| RTFx | ${{ steps.benchmark.outputs.STREAMING_V2_RTFx }}x | Streaming real-time factor |
| Avg Chunk Time | ${{ steps.benchmark.outputs.STREAMING_V2_AVG_CHUNK }}s | Average time to process each chunk |
| Max Chunk Time | ${{ steps.benchmark.outputs.STREAMING_V2_MAX_CHUNK }}s | Maximum chunk processing time |
| First Token | ${{ steps.benchmark.outputs.STREAMING_V2_FIRST_TOKEN }}s | Latency to first transcription token |
| Total Chunks | ${{ steps.benchmark.outputs.STREAMING_V2_CHUNKS }} | Number of chunks processed |
<sub>*Streaming tests use 5 files with 0.5s chunks to simulate real-time audio streaming*</sub>
<sub>${{ steps.benchmark.outputs.FILES_COUNT }} files per dataset • Test runtime: ${{ steps.benchmark.outputs.EXECUTION_TIME }} • ${new Date().toLocaleString('en-US', { timeZone: 'America/New_York', year: 'numeric', month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit', hour12: true })} EST</sub>
<sub>**RTFx** = Real-Time Factor (higher is better) • Calculated as: Total audio duration ÷ Total processing time<br>Processing time includes: Model inference on Apple Neural Engine, audio preprocessing, state resets between files, token-to-text conversion, and file I/O<br>Example: RTFx of 2.0x means 10 seconds of audio processed in 5 seconds (2x faster than real-time)</sub>
### Expected RTFx Performance on Physical M1 Hardware:
**• M1 Mac: ~28x (clean), ~25x (other)**
**• CI shows ~0.5-3x due to virtualization limitations**
<sub>Testing methodology follows [HuggingFace Open ASR Leaderboard](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard)</sub>
<!-- fluidaudio-benchmark-asr -->`;
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
});
const existing = comments.find(c =>
c.body.includes('<!-- fluidaudio-benchmark-asr -->')
);
if (existing) {
await github.rest.issues.updateComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: existing.id,
body: body
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: body
});
}
- name: Upload Results
if: always()
uses: actions/upload-artifact@v4
with:
name: asr-results
path: asr_results_*.json