Make parakeetTdtCtc110m folderName consistent with other Parakeet models #2014
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ASR Benchmark | |
| on: | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| jobs: | |
| asr-benchmark: | |
| name: ASR Benchmark | |
| runs-on: macos-15 | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: swift-actions/setup-swift@v2 | |
| with: | |
| swift-version: "6.1" | |
| - name: Cache Dependencies | |
| uses: actions/cache@v4 | |
| with: | |
| path: | | |
| .build | |
| ~/Library/Application Support/FluidAudio/Models/parakeet-tdt-0.6b-v3-coreml | |
| ~/Library/Application Support/FluidAudio/Models/parakeet-tdt-0.6b-v2-coreml | |
| ~/Library/Application Support/FluidAudio/Datasets/LibriSpeech | |
| ~/Library/Caches/Homebrew | |
| /usr/local/Cellar/ffmpeg | |
| /opt/homebrew/Cellar/ffmpeg | |
| key: ${{ runner.os }}-asr-${{ hashFiles('Package.resolved', 'Sources/FluidAudio/Frameworks/**', 'Sources/FluidAudio/ModelRegistry.swift', 'Sources/FluidAudio/ModelNames.swift') }} | |
| - name: Install ffmpeg | |
| run: | | |
| brew install ffmpeg || echo "ffmpeg may already be installed" | |
| ffmpeg -version || echo "ffmpeg not available" | |
| - name: Build | |
| run: swift build -c release | |
| - name: Run Benchmarks | |
| id: benchmark | |
| run: | | |
| MAX_FILES="25" | |
| BENCHMARK_START=$(date +%s) | |
| # Set error handling | |
| set -o pipefail | |
| # Function to run benchmark with error capture | |
| run_benchmark() { | |
| local SUBSET=$1 | |
| local MAX=$2 | |
| local OUTPUT=$3 | |
| local EXTRA_ARGS="${4:-}" | |
| echo "=========================================" | |
| echo "Running ASR benchmark: $SUBSET (max $MAX files)" | |
| echo "Output: $OUTPUT" | |
| echo "Extra args: $EXTRA_ARGS" | |
| echo "=========================================" | |
| if swift run fluidaudiocli asr-benchmark \ | |
| --subset "$SUBSET" --max-files "$MAX" \ | |
| --auto-download --output "$OUTPUT" $EXTRA_ARGS > benchmark_log.txt 2>&1; then | |
| echo "✅ Benchmark $SUBSET completed successfully" | |
| return 0 | |
| else | |
| echo "❌ Benchmark $SUBSET FAILED with exit code $?" | |
| echo "Full output:" | |
| cat benchmark_log.txt | |
| # Continue with other benchmarks even if one fails | |
| return 1 | |
| fi | |
| } | |
| # Run benchmarks with error capture | |
| run_benchmark "test-clean" "$MAX_FILES" "asr_results_clean.json" || CLEAN_FAILED=1 | |
| run_benchmark "test-other" "$MAX_FILES" "asr_results_other.json" || OTHER_FAILED=1 | |
| run_benchmark "test-clean" "5" "asr_results_streaming.json" "--test-streaming --chunk-duration 0.5" || STREAMING_FAILED=1 | |
| # English-optimized (v2) runs | |
| run_benchmark "test-clean" "$MAX_FILES" "asr_results_clean_v2.json" "--model-version v2" || CLEAN_V2_FAILED=1 | |
| run_benchmark "test-other" "$MAX_FILES" "asr_results_other_v2.json" "--model-version v2" || OTHER_V2_FAILED=1 | |
| run_benchmark "test-clean" "5" "asr_results_streaming_v2.json" "--test-streaming --chunk-duration 0.5 --model-version v2" || STREAMING_V2_FAILED=1 | |
| # Extract metrics with error handling | |
| if [ -f asr_results_clean.json ]; then | |
| CLEAN_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_clean.json 2>/dev/null) | |
| CLEAN_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_clean.json 2>/dev/null) | |
| CLEAN_AUDIO=$(jq -r '.summary.totalAudioDuration' asr_results_clean.json 2>/dev/null) | |
| CLEAN_TIME=$(jq -r '.summary.totalProcessingTime' asr_results_clean.json 2>/dev/null) | |
| CLEAN_RTFx=$(jq -r '.summary.medianRTFx' asr_results_clean.json 2>/dev/null) | |
| # Format values only if they exist and are not null | |
| [ "$CLEAN_WER_AVG" != "null" ] && [ -n "$CLEAN_WER_AVG" ] && CLEAN_WER_AVG=$(printf "%.2f" "$CLEAN_WER_AVG") || CLEAN_WER_AVG="N/A" | |
| [ "$CLEAN_WER_MED" != "null" ] && [ -n "$CLEAN_WER_MED" ] && CLEAN_WER_MED=$(printf "%.2f" "$CLEAN_WER_MED") || CLEAN_WER_MED="N/A" | |
| [ "$CLEAN_RTFx" != "null" ] && [ -n "$CLEAN_RTFx" ] && CLEAN_RTFx=$(printf "%.2f" "$CLEAN_RTFx") || CLEAN_RTFx="N/A" | |
| fi | |
| if [ -f asr_results_clean_v2.json ]; then | |
| CLEAN_V2_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_clean_v2.json 2>/dev/null) | |
| CLEAN_V2_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_clean_v2.json 2>/dev/null) | |
| CLEAN_V2_RTFx=$(jq -r '.summary.medianRTFx' asr_results_clean_v2.json 2>/dev/null) | |
| [ "$CLEAN_V2_WER_AVG" != "null" ] && [ -n "$CLEAN_V2_WER_AVG" ] && CLEAN_V2_WER_AVG=$(printf "%.2f" "$CLEAN_V2_WER_AVG") || CLEAN_V2_WER_AVG="N/A" | |
| [ "$CLEAN_V2_WER_MED" != "null" ] && [ -n "$CLEAN_V2_WER_MED" ] && CLEAN_V2_WER_MED=$(printf "%.2f" "$CLEAN_V2_WER_MED") || CLEAN_V2_WER_MED="N/A" | |
| [ "$CLEAN_V2_RTFx" != "null" ] && [ -n "$CLEAN_V2_RTFx" ] && CLEAN_V2_RTFx=$(printf "%.2f" "$CLEAN_V2_RTFx") || CLEAN_V2_RTFx="N/A" | |
| fi | |
| if [ -f asr_results_other.json ]; then | |
| OTHER_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_other.json 2>/dev/null) | |
| OTHER_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_other.json 2>/dev/null) | |
| OTHER_AUDIO=$(jq -r '.summary.totalAudioDuration' asr_results_other.json 2>/dev/null) | |
| OTHER_TIME=$(jq -r '.summary.totalProcessingTime' asr_results_other.json 2>/dev/null) | |
| OTHER_RTFx=$(jq -r '.summary.medianRTFx' asr_results_other.json 2>/dev/null) | |
| # Format values only if they exist and are not null | |
| [ "$OTHER_WER_AVG" != "null" ] && [ -n "$OTHER_WER_AVG" ] && OTHER_WER_AVG=$(printf "%.2f" "$OTHER_WER_AVG") || OTHER_WER_AVG="N/A" | |
| [ "$OTHER_WER_MED" != "null" ] && [ -n "$OTHER_WER_MED" ] && OTHER_WER_MED=$(printf "%.2f" "$OTHER_WER_MED") || OTHER_WER_MED="N/A" | |
| [ "$OTHER_RTFx" != "null" ] && [ -n "$OTHER_RTFx" ] && OTHER_RTFx=$(printf "%.2f" "$OTHER_RTFx") || OTHER_RTFx="N/A" | |
| fi | |
| if [ -f asr_results_other_v2.json ]; then | |
| OTHER_V2_WER_AVG=$(jq -r '.summary.averageWER * 100' asr_results_other_v2.json 2>/dev/null) | |
| OTHER_V2_WER_MED=$(jq -r '.summary.medianWER * 100' asr_results_other_v2.json 2>/dev/null) | |
| OTHER_V2_RTFx=$(jq -r '.summary.medianRTFx' asr_results_other_v2.json 2>/dev/null) | |
| [ "$OTHER_V2_WER_AVG" != "null" ] && [ -n "$OTHER_V2_WER_AVG" ] && OTHER_V2_WER_AVG=$(printf "%.2f" "$OTHER_V2_WER_AVG") || OTHER_V2_WER_AVG="N/A" | |
| [ "$OTHER_V2_WER_MED" != "null" ] && [ -n "$OTHER_V2_WER_MED" ] && OTHER_V2_WER_MED=$(printf "%.2f" "$OTHER_V2_WER_MED") || OTHER_V2_WER_MED="N/A" | |
| [ "$OTHER_V2_RTFx" != "null" ] && [ -n "$OTHER_V2_RTFx" ] && OTHER_V2_RTFx=$(printf "%.2f" "$OTHER_V2_RTFx") || OTHER_V2_RTFx="N/A" | |
| fi | |
| if [ -f asr_results_streaming.json ]; then | |
| STREAMING_WER=$(jq -r '.summary.averageWER * 100' asr_results_streaming.json 2>/dev/null) | |
| STREAMING_RTFx=$(jq -r '.summary.medianRTFx' asr_results_streaming.json 2>/dev/null) | |
| STREAMING_AVG_CHUNK=$(jq -r '.summary.streaming.avgChunkProcessingTime' asr_results_streaming.json 2>/dev/null) | |
| STREAMING_MAX_CHUNK=$(jq -r '.summary.streaming.maxChunkProcessingTime' asr_results_streaming.json 2>/dev/null) | |
| STREAMING_CHUNKS=$(jq -r '.summary.streaming.totalChunksProcessed' asr_results_streaming.json 2>/dev/null) | |
| STREAMING_FIRST_TOKEN=$(jq -r '.summary.streaming.avgFirstTokenLatency // "N/A"' asr_results_streaming.json 2>/dev/null) | |
| # Format values only if they exist and are not null | |
| [ "$STREAMING_WER" != "null" ] && [ -n "$STREAMING_WER" ] && STREAMING_WER=$(printf "%.2f" "$STREAMING_WER") || STREAMING_WER="N/A" | |
| [ "$STREAMING_RTFx" != "null" ] && [ -n "$STREAMING_RTFx" ] && STREAMING_RTFx=$(printf "%.2f" "$STREAMING_RTFx") || STREAMING_RTFx="N/A" | |
| [ "$STREAMING_AVG_CHUNK" != "null" ] && [ -n "$STREAMING_AVG_CHUNK" ] && STREAMING_AVG_CHUNK=$(printf "%.3f" "$STREAMING_AVG_CHUNK") || STREAMING_AVG_CHUNK="N/A" | |
| [ "$STREAMING_MAX_CHUNK" != "null" ] && [ -n "$STREAMING_MAX_CHUNK" ] && STREAMING_MAX_CHUNK=$(printf "%.3f" "$STREAMING_MAX_CHUNK") || STREAMING_MAX_CHUNK="N/A" | |
| [ "$STREAMING_FIRST_TOKEN" != "null" ] && [ -n "$STREAMING_FIRST_TOKEN" ] && [ "$STREAMING_FIRST_TOKEN" != "N/A" ] && STREAMING_FIRST_TOKEN=$(printf "%.3f" "$STREAMING_FIRST_TOKEN") | |
| fi | |
| if [ -f asr_results_streaming_v2.json ]; then | |
| STREAMING_V2_WER=$(jq -r '.summary.averageWER * 100' asr_results_streaming_v2.json 2>/dev/null) | |
| STREAMING_V2_RTFx=$(jq -r '.summary.medianRTFx' asr_results_streaming_v2.json 2>/dev/null) | |
| STREAMING_V2_AVG_CHUNK=$(jq -r '.summary.streaming.avgChunkProcessingTime' asr_results_streaming_v2.json 2>/dev/null) | |
| STREAMING_V2_MAX_CHUNK=$(jq -r '.summary.streaming.maxChunkProcessingTime' asr_results_streaming_v2.json 2>/dev/null) | |
| STREAMING_V2_CHUNKS=$(jq -r '.summary.streaming.totalChunksProcessed' asr_results_streaming_v2.json 2>/dev/null) | |
| STREAMING_V2_FIRST_TOKEN=$(jq -r '.summary.streaming.avgFirstTokenLatency // "N/A"' asr_results_streaming_v2.json 2>/dev/null) | |
| [ "$STREAMING_V2_WER" != "null" ] && [ -n "$STREAMING_V2_WER" ] && STREAMING_V2_WER=$(printf "%.2f" "$STREAMING_V2_WER") || STREAMING_V2_WER="N/A" | |
| [ "$STREAMING_V2_RTFx" != "null" ] && [ -n "$STREAMING_V2_RTFx" ] && STREAMING_V2_RTFx=$(printf "%.2f" "$STREAMING_V2_RTFx") || STREAMING_V2_RTFx="N/A" | |
| [ "$STREAMING_V2_AVG_CHUNK" != "null" ] && [ -n "$STREAMING_V2_AVG_CHUNK" ] && STREAMING_V2_AVG_CHUNK=$(printf "%.3f" "$STREAMING_V2_AVG_CHUNK") || STREAMING_V2_AVG_CHUNK="N/A" | |
| [ "$STREAMING_V2_MAX_CHUNK" != "null" ] && [ -n "$STREAMING_V2_MAX_CHUNK" ] && STREAMING_V2_MAX_CHUNK=$(printf "%.3f" "$STREAMING_V2_MAX_CHUNK") || STREAMING_V2_MAX_CHUNK="N/A" | |
| [ "$STREAMING_V2_FIRST_TOKEN" != "null" ] && [ -n "$STREAMING_V2_FIRST_TOKEN" ] && [ "$STREAMING_V2_FIRST_TOKEN" != "N/A" ] && STREAMING_V2_FIRST_TOKEN=$(printf "%.3f" "$STREAMING_V2_FIRST_TOKEN") | |
| fi | |
| # Output metrics | |
| echo "CLEAN_WER_AVG=${CLEAN_WER_AVG:-N/A}" >> $GITHUB_OUTPUT | |
| echo "CLEAN_WER_MED=${CLEAN_WER_MED:-N/A}" >> $GITHUB_OUTPUT | |
| echo "CLEAN_RTFx=${CLEAN_RTFx:-N/A}" >> $GITHUB_OUTPUT | |
| echo "CLEAN_V2_WER_AVG=${CLEAN_V2_WER_AVG:-N/A}" >> $GITHUB_OUTPUT | |
| echo "CLEAN_V2_WER_MED=${CLEAN_V2_WER_MED:-N/A}" >> $GITHUB_OUTPUT | |
| echo "CLEAN_V2_RTFx=${CLEAN_V2_RTFx:-N/A}" >> $GITHUB_OUTPUT | |
| echo "OTHER_WER_AVG=${OTHER_WER_AVG:-N/A}" >> $GITHUB_OUTPUT | |
| echo "OTHER_WER_MED=${OTHER_WER_MED:-N/A}" >> $GITHUB_OUTPUT | |
| echo "OTHER_RTFx=${OTHER_RTFx:-N/A}" >> $GITHUB_OUTPUT | |
| echo "OTHER_V2_WER_AVG=${OTHER_V2_WER_AVG:-N/A}" >> $GITHUB_OUTPUT | |
| echo "OTHER_V2_WER_MED=${OTHER_V2_WER_MED:-N/A}" >> $GITHUB_OUTPUT | |
| echo "OTHER_V2_RTFx=${OTHER_V2_RTFx:-N/A}" >> $GITHUB_OUTPUT | |
| # Streaming metrics | |
| echo "STREAMING_WER=${STREAMING_WER:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_RTFx=${STREAMING_RTFx:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_AVG_CHUNK=${STREAMING_AVG_CHUNK:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_MAX_CHUNK=${STREAMING_MAX_CHUNK:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_CHUNKS=${STREAMING_CHUNKS:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_FIRST_TOKEN=${STREAMING_FIRST_TOKEN:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_V2_WER=${STREAMING_V2_WER:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_V2_RTFx=${STREAMING_V2_RTFx:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_V2_AVG_CHUNK=${STREAMING_V2_AVG_CHUNK:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_V2_MAX_CHUNK=${STREAMING_V2_MAX_CHUNK:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_V2_CHUNKS=${STREAMING_V2_CHUNKS:-N/A}" >> $GITHUB_OUTPUT | |
| echo "STREAMING_V2_FIRST_TOKEN=${STREAMING_V2_FIRST_TOKEN:-N/A}" >> $GITHUB_OUTPUT | |
| EXECUTION_TIME=$(( ($(date +%s) - BENCHMARK_START) / 60 ))m$(( ($(date +%s) - BENCHMARK_START) % 60 ))s | |
| echo "EXECUTION_TIME=$EXECUTION_TIME" >> $GITHUB_OUTPUT | |
| echo "FILES_COUNT=$MAX_FILES" >> $GITHUB_OUTPUT | |
| # Validate RTFx values - 0 indicates benchmark failure | |
| if [ "$CLEAN_RTFx" = "0.00" ] || [ "$CLEAN_RTFx" = "0.0" ] || [ "$CLEAN_RTFx" = "0" ] || [ "$CLEAN_RTFx" = "N/A" ]; then | |
| echo "❌ CRITICAL: test-clean RTFx is 0 or N/A - benchmark failed" | |
| CLEAN_RTFX_FAILED=1 | |
| fi | |
| if [ "$CLEAN_V2_RTFx" = "0.00" ] || [ "$CLEAN_V2_RTFx" = "0.0" ] || [ "$CLEAN_V2_RTFx" = "0" ] || [ "$CLEAN_V2_RTFx" = "N/A" ]; then | |
| echo "❌ CRITICAL: test-clean (v2) RTFx is 0 or N/A - benchmark failed" | |
| CLEAN_V2_RTFX_FAILED=1 | |
| fi | |
| if [ "$OTHER_RTFx" = "0.00" ] || [ "$OTHER_RTFx" = "0.0" ] || [ "$OTHER_RTFx" = "0" ] || [ "$OTHER_RTFx" = "N/A" ]; then | |
| echo "❌ CRITICAL: test-other RTFx is 0 or N/A - benchmark failed" | |
| OTHER_RTFX_FAILED=1 | |
| fi | |
| if [ "$OTHER_V2_RTFx" = "0.00" ] || [ "$OTHER_V2_RTFx" = "0.0" ] || [ "$OTHER_V2_RTFx" = "0" ] || [ "$OTHER_V2_RTFx" = "N/A" ]; then | |
| echo "❌ CRITICAL: test-other (v2) RTFx is 0 or N/A - benchmark failed" | |
| OTHER_V2_RTFX_FAILED=1 | |
| fi | |
| if [ "$STREAMING_RTFx" = "0.00" ] || [ "$STREAMING_RTFx" = "0.0" ] || [ "$STREAMING_RTFx" = "0" ] || [ "$STREAMING_RTFx" = "N/A" ]; then | |
| echo "❌ CRITICAL: streaming RTFx is 0 or N/A - benchmark failed" | |
| STREAMING_RTFX_FAILED=1 | |
| fi | |
| if [ "$STREAMING_V2_RTFx" = "0.00" ] || [ "$STREAMING_V2_RTFx" = "0.0" ] || [ "$STREAMING_V2_RTFx" = "0" ] || [ "$STREAMING_V2_RTFx" = "N/A" ]; then | |
| echo "❌ CRITICAL: streaming (v2) RTFx is 0 or N/A - benchmark failed" | |
| STREAMING_V2_RTFX_FAILED=1 | |
| fi | |
| # Report failures summary | |
| if [ ! -z "$CLEAN_FAILED" ] || [ ! -z "$OTHER_FAILED" ] || [ ! -z "$STREAMING_FAILED" ] || \ | |
| [ ! -z "$CLEAN_V2_FAILED" ] || [ ! -z "$OTHER_V2_FAILED" ] || [ ! -z "$STREAMING_V2_FAILED" ] || \ | |
| [ ! -z "$CLEAN_RTFX_FAILED" ] || [ ! -z "$CLEAN_V2_RTFX_FAILED" ] || \ | |
| [ ! -z "$OTHER_RTFX_FAILED" ] || [ ! -z "$OTHER_V2_RTFX_FAILED" ] || \ | |
| [ ! -z "$STREAMING_RTFX_FAILED" ] || [ ! -z "$STREAMING_V2_RTFX_FAILED" ]; then | |
| echo "BENCHMARK_STATUS=PARTIAL_FAILURE" >> $GITHUB_OUTPUT | |
| echo "⚠️ Some benchmarks failed:" | |
| [ ! -z "$CLEAN_FAILED" ] && echo " - test-clean benchmark failed" | |
| [ ! -z "$OTHER_FAILED" ] && echo " - test-other benchmark failed" | |
| [ ! -z "$STREAMING_FAILED" ] && echo " - streaming benchmark failed" | |
| [ ! -z "$CLEAN_V2_FAILED" ] && echo " - test-clean (v2) benchmark failed" | |
| [ ! -z "$OTHER_V2_FAILED" ] && echo " - test-other (v2) benchmark failed" | |
| [ ! -z "$STREAMING_V2_FAILED" ] && echo " - streaming (v2) benchmark failed" | |
| [ ! -z "$CLEAN_RTFX_FAILED" ] && echo " - test-clean RTFx is 0" | |
| [ ! -z "$CLEAN_V2_RTFX_FAILED" ] && echo " - test-clean (v2) RTFx is 0" | |
| [ ! -z "$OTHER_RTFX_FAILED" ] && echo " - test-other RTFx is 0" | |
| [ ! -z "$OTHER_V2_RTFX_FAILED" ] && echo " - test-other (v2) RTFx is 0" | |
| [ ! -z "$STREAMING_RTFX_FAILED" ] && echo " - streaming RTFx is 0" | |
| [ ! -z "$STREAMING_V2_RTFX_FAILED" ] && echo " - streaming (v2) RTFx is 0" | |
| exit 1 | |
| else | |
| echo "BENCHMARK_STATUS=SUCCESS" >> $GITHUB_OUTPUT | |
| echo "✅ All benchmarks completed successfully" | |
| fi | |
| - name: Comment PR | |
| if: always() && github.event_name == 'pull_request' | |
| continue-on-error: true | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const benchmarkStatus = '${{ steps.benchmark.outputs.BENCHMARK_STATUS }}'; | |
| const statusEmoji = benchmarkStatus === 'SUCCESS' ? '✅' : '⚠️'; | |
| const statusText = benchmarkStatus === 'SUCCESS' ? 'All benchmarks passed' : 'Some benchmarks failed (see logs)'; | |
| const body = `## ASR Benchmark Results ${statusEmoji} | |
| **Status:** ${statusText} | |
| ### Parakeet v3 (multilingual) | |
| | Dataset | WER Avg | WER Med | RTFx | Status | | |
| |---------|---------|---------|------|--------| | |
| | test-clean | ${{ steps.benchmark.outputs.CLEAN_WER_AVG }}% | ${{ steps.benchmark.outputs.CLEAN_WER_MED }}% | ${{ steps.benchmark.outputs.CLEAN_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.CLEAN_WER_AVG }}') < 10 ? '✅' : '${{ steps.benchmark.outputs.CLEAN_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} | | |
| | test-other | ${{ steps.benchmark.outputs.OTHER_WER_AVG }}% | ${{ steps.benchmark.outputs.OTHER_WER_MED }}% | ${{ steps.benchmark.outputs.OTHER_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.OTHER_WER_AVG }}') < 20 ? '✅' : '${{ steps.benchmark.outputs.OTHER_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} | | |
| ### Parakeet v2 (English-optimized) | |
| | Dataset | WER Avg | WER Med | RTFx | Status | | |
| |---------|---------|---------|------|--------| | |
| | test-clean | ${{ steps.benchmark.outputs.CLEAN_V2_WER_AVG }}% | ${{ steps.benchmark.outputs.CLEAN_V2_WER_MED }}% | ${{ steps.benchmark.outputs.CLEAN_V2_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.CLEAN_V2_WER_AVG }}') < 10 ? '✅' : '${{ steps.benchmark.outputs.CLEAN_V2_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} | | |
| | test-other | ${{ steps.benchmark.outputs.OTHER_V2_WER_AVG }}% | ${{ steps.benchmark.outputs.OTHER_V2_WER_MED }}% | ${{ steps.benchmark.outputs.OTHER_V2_RTFx }}x | ${parseFloat('${{ steps.benchmark.outputs.OTHER_V2_WER_AVG }}') < 20 ? '✅' : '${{ steps.benchmark.outputs.OTHER_V2_WER_AVG }}' === 'N/A' ? '❌' : '⚠️'} | | |
| ### Streaming (v3) | |
| | Metric | Value | Description | | |
| |--------|-------|-------------| | |
| | WER | ${{ steps.benchmark.outputs.STREAMING_WER }}% | Word Error Rate in streaming mode | | |
| | RTFx | ${{ steps.benchmark.outputs.STREAMING_RTFx }}x | Streaming real-time factor | | |
| | Avg Chunk Time | ${{ steps.benchmark.outputs.STREAMING_AVG_CHUNK }}s | Average time to process each chunk | | |
| | Max Chunk Time | ${{ steps.benchmark.outputs.STREAMING_MAX_CHUNK }}s | Maximum chunk processing time | | |
| | First Token | ${{ steps.benchmark.outputs.STREAMING_FIRST_TOKEN }}s | Latency to first transcription token | | |
| | Total Chunks | ${{ steps.benchmark.outputs.STREAMING_CHUNKS }} | Number of chunks processed | | |
| ### Streaming (v2) | |
| | Metric | Value | Description | | |
| |--------|-------|-------------| | |
| | WER | ${{ steps.benchmark.outputs.STREAMING_V2_WER }}% | Word Error Rate in streaming mode | | |
| | RTFx | ${{ steps.benchmark.outputs.STREAMING_V2_RTFx }}x | Streaming real-time factor | | |
| | Avg Chunk Time | ${{ steps.benchmark.outputs.STREAMING_V2_AVG_CHUNK }}s | Average time to process each chunk | | |
| | Max Chunk Time | ${{ steps.benchmark.outputs.STREAMING_V2_MAX_CHUNK }}s | Maximum chunk processing time | | |
| | First Token | ${{ steps.benchmark.outputs.STREAMING_V2_FIRST_TOKEN }}s | Latency to first transcription token | | |
| | Total Chunks | ${{ steps.benchmark.outputs.STREAMING_V2_CHUNKS }} | Number of chunks processed | | |
| <sub>*Streaming tests use 5 files with 0.5s chunks to simulate real-time audio streaming*</sub> | |
| <sub>${{ steps.benchmark.outputs.FILES_COUNT }} files per dataset • Test runtime: ${{ steps.benchmark.outputs.EXECUTION_TIME }} • ${new Date().toLocaleString('en-US', { timeZone: 'America/New_York', year: 'numeric', month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit', hour12: true })} EST</sub> | |
| <sub>**RTFx** = Real-Time Factor (higher is better) • Calculated as: Total audio duration ÷ Total processing time<br>Processing time includes: Model inference on Apple Neural Engine, audio preprocessing, state resets between files, token-to-text conversion, and file I/O<br>Example: RTFx of 2.0x means 10 seconds of audio processed in 5 seconds (2x faster than real-time)</sub> | |
| ### Expected RTFx Performance on Physical M1 Hardware: | |
| **• M1 Mac: ~28x (clean), ~25x (other)** | |
| **• CI shows ~0.5-3x due to virtualization limitations** | |
| <sub>Testing methodology follows [HuggingFace Open ASR Leaderboard](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard)</sub> | |
| <!-- fluidaudio-benchmark-asr -->`; | |
| const { data: comments } = await github.rest.issues.listComments({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| }); | |
| const existing = comments.find(c => | |
| c.body.includes('<!-- fluidaudio-benchmark-asr -->') | |
| ); | |
| if (existing) { | |
| await github.rest.issues.updateComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| comment_id: existing.id, | |
| body: body | |
| }); | |
| } else { | |
| await github.rest.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: context.issue.number, | |
| body: body | |
| }); | |
| } | |
| - name: Upload Results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: asr-results | |
| path: asr_results_*.json |