diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml index 41e6d6c3..a17c9228 100644 --- a/.github/workflows/build-and-run.yml +++ b/.github/workflows/build-and-run.yml @@ -4,7 +4,7 @@ on: push: branches: [ main ] pull_request: - branches: [ main ] + branches: [ main ] types: [opened, synchronize, reopened] env: @@ -13,6 +13,8 @@ env: LLAMA_ROOT: ${{ github.workspace }} GRAAL_JARS: /opt/graalJars MODELS_DIR: /opt/models + # History file committed back to the repo on push to main + PERF_HISTORY_FILE: docs/perf-history.jsonl jobs: code-quality: @@ -51,20 +53,22 @@ jobs: git clone --depth 1 --branch master \ https://github.com/beehive-lab/TornadoVM.git \ $TORNADO_ROOT + - name: Set up Python venv for TornadoVM run: | python3 -m venv $TORNADO_ROOT/venv source $TORNADO_ROOT/venv/bin/activate python --version + - name: Build TornadoVM run: | cd $TORNADO_ROOT mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/ source venv/bin/activate echo "=== Building TornadoVM ===" - + make BACKEND=${{ matrix.backend.name }} - + echo "=== Searching for TornadoVM SDK directory ===" SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1) if [ -z "$SDK_DIR" ]; then @@ -74,7 +78,7 @@ jobs: fi FULL_SDK="${PWD}/${SDK_DIR}" echo "Detected TornadoVM SDK: $FULL_SDK" - + # Export for current shell session export TORNADOVM_HOME="$FULL_SDK" export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH" @@ -82,10 +86,11 @@ jobs: # Save for subsequent steps echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV echo "PATH=$PATH" >> $GITHUB_ENV - + echo "=== Checking tornado CLI ===" which tornado || { echo "::error::tornado not in PATH"; exit 1; } tornado --devices + - name: Build GPULlama3.java run: | cd ${{ github.workspace }} @@ -93,14 +98,36 @@ jobs: export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" tornado --version ./mvnw clean package -DskipTests + - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Llama-3.2-1B-Instruct-F16.gguf \ + model=Llama-3.2-1B-Instruct \ + quantization=F16 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" @@ -108,7 +135,23 @@ jobs: --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" \ --with-prefill-decode + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Llama-3.2-1B-Instruct-F16.gguf \ + model=Llama-3.2-1B-Instruct \ + quantization=F16 \ + configuration=prefill-decode \ + "flags=--with-prefill-decode" \ + prompt="Say hello" + - name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" @@ -116,8 +159,25 @@ jobs: --model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \ --prompt "Say hello" \ --with-prefill-decode --batch-prefill-size 32 - - name: PTX- FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Llama-3.2-1B-Instruct-F16.gguf \ + model=Llama-3.2-1B-Instruct \ + quantization=F16 \ + configuration=batch-prefill-decode \ + "flags=--with-prefill-decode --batch-prefill-size 32" \ + prompt="Say hello" + + # ── PTX-only: CUDA-graph variants ──────────────────────────────────────── + - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs if: matrix.backend.name == 'ptx' + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" @@ -126,8 +186,24 @@ jobs: --prompt "Say hello" \ --with-prefill-decode \ --cuda-graphs + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.meta.json" \ + backend=ptx \ + task=llama-inference \ + model_file=Llama-3.2-1B-Instruct-F16.gguf \ + model=Llama-3.2-1B-Instruct \ + quantization=F16 \ + configuration=prefill-decode-cuda-graphs \ + "flags=--with-prefill-decode --cuda-graphs" \ + prompt="Say hello" + - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs if: matrix.backend.name == 'ptx' + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" @@ -136,95 +212,371 @@ jobs: --prompt "Say hello" \ --with-prefill-decode --batch-prefill-size 32 \ --cuda-graphs + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.meta.json" \ + backend=ptx \ + task=llama-inference \ + model_file=Llama-3.2-1B-Instruct-F16.gguf \ + model=Llama-3.2-1B-Instruct \ + quantization=F16 \ + configuration=batch-prefill-decode-cuda-graphs \ + "flags=--with-prefill-decode --batch-prefill-size 32 --cuda-graphs" \ + prompt="Say hello" + + # ── Additional models — standard inference, all backends ───────────────── - name: FP16 - Run Qwen3-4B-f16.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Qwen3-4B-f16.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Qwen3-4B-f16.gguf \ + model=Qwen3-4B \ + quantization=F16 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Mistral-7B-Instruct-v0.3.fp16.gguf \ + model=Mistral-7B-Instruct-v0.3 \ + quantization=F16 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=qwen2.5-1.5b-instruct-fp16.gguf \ + model=Qwen2.5-1.5B-Instruct \ + quantization=F16 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model /$MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \ + --model $MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Phi-3-mini-4k-instruct-fp16.gguf \ + model=Phi-3-mini-4k-instruct \ + quantization=F16 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: FP16 - Run Granite-3.2-2b-instruct-f16.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model /$MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \ + --model $MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=granite-3.2-2b-instruct-f16.gguf \ + model=Granite-3.2-2B-Instruct \ + quantization=F16 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: FP16 - Run Granite-4.0-1b-F16.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model /$MODELS_DIR/granite-4.0-1b-F16.gguf \ + --model $MODELS_DIR/granite-4.0-1b-F16.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=granite-4.0-1b-F16.gguf \ + model=Granite-4.0-1B \ + quantization=F16 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Llama-3.2-1B-Instruct-Q8_0.gguf \ + model=Llama-3.2-1B-Instruct \ + quantization=Q8_0 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: Q8 - Run Qwen3-0.6B-Q8_0.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Qwen3-0.6B-Q8_0.gguf \ + model=Qwen3-0.6B \ + quantization=Q8_0 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Phi-3-mini-4k-instruct-Q8_0.gguf \ + model=Phi-3-mini-4k-instruct \ + quantization=Q8_0 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=qwen2.5-1.5b-instruct-q8_0.gguf \ + model=Qwen2.5-1.5B-Instruct \ + quantization=Q8_0 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ --model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \ --prompt "Say hello" - - name: Q8 - Run Granite-3.2-2b-instruct-Q8.gguf + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=Mistral-7B-Instruct-v0.3.Q8_0.gguf \ + model=Mistral-7B-Instruct-v0.3 \ + quantization=Q8_0 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + + - name: Q8 - Run Granite-3.2-2b-instruct-Q8_0.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model /$MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \ + --model $MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=granite-3.2-2b-instruct-Q8_0.gguf \ + model=Granite-3.2-2B-Instruct \ + quantization=Q8_0 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + - name: Q8 - Run Granite-4.0-1b-Q8_0.gguf + env: + JAVA_TOOL_OPTIONS: >- + -Dllama.metrics.format=json + -Dllama.metrics.output=file + -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json run: | cd ${{ github.workspace }} export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH" ./llama-tornado --gpu --${{ matrix.backend.name }} \ - --model /$MODELS_DIR/granite-4.0-1b-Q8_0.gguf \ + --model $MODELS_DIR/granite-4.0-1b-Q8_0.gguf \ --prompt "Say hello" + python3 scripts/write_metrics_sidecar.py \ + --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.meta.json" \ + backend="${{ matrix.backend.name }}" \ + task=llama-inference \ + model_file=granite-4.0-1b-Q8_0.gguf \ + model=Granite-4.0-1B \ + quantization=Q8_0 \ + configuration=standard \ + flags="" \ + prompt="Say hello" + + # ── Upload metrics for the publish job ──────────────────────────────────── + - name: Upload metrics artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: metrics-${{ matrix.backend.name }}-${{ github.run_id }} + path: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-*.json + if-no-files-found: warn + + # ── Separate job: collect all matrix metrics and update history ─────────────── + publish-performance-history: + # Guard: only commit history on real pushes to main, not on PRs or forks. + # Prevents duplicate entries from PR runs and avoids push-permission errors on forks. + if: >- + github.repository == 'beehive-lab/GPULlama3.java' && + github.event_name == 'push' && + github.ref == 'refs/heads/main' + runs-on: [self-hosted] + needs: build-and-run + timeout-minutes: 15 + + steps: + - name: Checkout GPULlama3 + uses: actions/checkout@v4 + - name: Download metrics artifacts + uses: actions/download-artifact@v4 + with: + pattern: metrics-*-${{ github.run_id }} + path: ${{ runner.temp }}/metrics-artifacts + merge-multiple: true + + - name: Append to performance history + run: | + python3 scripts/process_metrics.py \ + --metrics-dir "${{ runner.temp }}/metrics-artifacts" \ + --commit "${{ github.sha }}" \ + --branch "${{ github.ref_name }}" \ + --run-id "${{ github.run_id }}" \ + --run-number "${{ github.run_number }}" \ + --run-attempt "${{ github.run_attempt }}" \ + --workflow "${{ github.workflow }}" \ + --history "$PERF_HISTORY_FILE" + + - name: Commit performance history + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add "$PERF_HISTORY_FILE" + git diff --cached --quiet && echo "No history changes to commit" && exit 0 + git commit -m "perf: record run #${{ github.run_number }} @ ${GITHUB_SHA::8}" + for attempt in 1 2 3; do + git pull --rebase origin main && git push && break || { + [ $attempt -lt 3 ] && { echo "Attempt $attempt failed, retrying in $((attempt * 5))s..."; sleep $((attempt * 5)); } \ + || { echo "::error::Failed to push after 3 attempts"; exit 1; } + } + done diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 00000000..cdc2e32b --- /dev/null +++ b/docs/index.html @@ -0,0 +1,268 @@ + + + + + + GPULlama3 Performance History + + + + +

GPULlama3 — Performance History

+ +
+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ +
+
No performance history available yet.
+ + + + diff --git a/docs/perf-history.jsonl b/docs/perf-history.jsonl new file mode 100644 index 00000000..93011f25 --- /dev/null +++ b/docs/perf-history.jsonl @@ -0,0 +1,298 @@ +{"timestamp": "2026-02-25T13:02:33.4265625Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.51, "prompt_eval_rate": 0.0, "total_rate": 31.51, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 700000000, "prompt_eval_duration": 0, "total_duration": 700000000} +{"timestamp": "2026-02-25T13:04:03.7604236Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.6, "prompt_eval_rate": 0.0, "total_rate": 14.6, "eval_count": 131, "prompt_eval_count": 0, "total_count": 131, "eval_duration": 8970000000, "prompt_eval_duration": 0, "total_duration": 8970000000} +{"timestamp": "2026-02-25T13:06:03.0069983Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.61, "prompt_eval_rate": 0.0, "total_rate": 7.61, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6050000000, "prompt_eval_duration": 0, "total_duration": 6050000000} +{"timestamp": "2026-02-25T13:06:40.6181214Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.03, "prompt_eval_rate": 0.0, "total_rate": 14.03, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 780000000, "prompt_eval_duration": 0, "total_duration": 780000000} +{"timestamp": "2026-02-25T13:07:44.6833071Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.69, "prompt_eval_rate": 0.0, "total_rate": 12.69, "eval_count": 71, "prompt_eval_count": 0, "total_count": 71, "eval_duration": 5590000000, "prompt_eval_duration": 0, "total_duration": 5590000000} +{"timestamp": "2026-02-25T13:08:49.0141617Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.74, "prompt_eval_rate": 0.0, "total_rate": 16.74, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000} +{"timestamp": "2026-02-25T13:09:39.0976245Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.42, "prompt_eval_rate": 0.0, "total_rate": 18.42, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000} +{"timestamp": "2026-02-25T13:09:58.5864455Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 35.9, "prompt_eval_rate": 0.0, "total_rate": 35.9, "eval_count": 49, "prompt_eval_count": 0, "total_count": 49, "eval_duration": 1370000000, "prompt_eval_duration": 0, "total_duration": 1370000000} +{"timestamp": "2026-02-25T13:10:23.1699553Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 28.81, "prompt_eval_rate": 0.0, "total_rate": 28.81, "eval_count": 107, "prompt_eval_count": 0, "total_count": 107, "eval_duration": 3710000000, "prompt_eval_duration": 0, "total_duration": 3710000000} +{"timestamp": "2026-02-25T13:11:04.1905737Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.32, "prompt_eval_rate": 0.0, "total_rate": 12.32, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1220000000, "prompt_eval_duration": 0, "total_duration": 1220000000} +{"timestamp": "2026-02-25T13:11:32.6446371Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.14, "prompt_eval_rate": 0.0, "total_rate": 18.14, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1050000000, "prompt_eval_duration": 0, "total_duration": 1050000000} +{"timestamp": "2026-02-25T13:12:33.1492263Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 7.37, "prompt_eval_rate": 0.0, "total_rate": 7.37, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6510000000, "prompt_eval_duration": 0, "total_duration": 6510000000} +{"timestamp": "2026-02-25T13:13:17.4567596Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.24, "prompt_eval_rate": 0.0, "total_rate": 16.24, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1230000000, "prompt_eval_duration": 0, "total_duration": 1230000000} +{"timestamp": "2026-02-25T13:13:54.4757130Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.17, "prompt_eval_rate": 0.0, "total_rate": 19.17, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000} +{"timestamp": "2026-02-25T12:48:55.9103978Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.69, "prompt_eval_rate": 0.0, "total_rate": 31.69, "eval_count": 33, "prompt_eval_count": 0, "total_count": 33, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000} +{"timestamp": "2026-02-25T12:50:47.7822546Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 5.83, "prompt_eval_rate": 0.0, "total_rate": 5.83, "eval_count": 229, "prompt_eval_count": 0, "total_count": 229, "eval_duration": 39260000000, "prompt_eval_duration": 0, "total_duration": 39260000000} +{"timestamp": "2026-02-25T12:52:46.9423058Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.08, "prompt_eval_rate": 0.0, "total_rate": 7.08, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6780000000, "prompt_eval_duration": 0, "total_duration": 6780000000} +{"timestamp": "2026-02-25T12:53:24.3415702Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.71, "prompt_eval_rate": 0.0, "total_rate": 11.71, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1620000000, "prompt_eval_duration": 0, "total_duration": 1620000000} +{"timestamp": "2026-02-25T12:54:26.9375923Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.14, "prompt_eval_rate": 0.0, "total_rate": 10.14, "eval_count": 18, "prompt_eval_count": 0, "total_count": 18, "eval_duration": 1780000000, "prompt_eval_duration": 0, "total_duration": 1780000000} +{"timestamp": "2026-02-25T12:55:23.6364082Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.01, "prompt_eval_rate": 0.0, "total_rate": 14.01, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1430000000, "prompt_eval_duration": 0, "total_duration": 1430000000} +{"timestamp": "2026-02-25T12:56:03.6760343Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.28, "prompt_eval_rate": 0.0, "total_rate": 15.28, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000} +{"timestamp": "2026-02-25T12:56:21.1566819Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 34.02, "prompt_eval_rate": 0.0, "total_rate": 34.02, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1090000000, "prompt_eval_duration": 0, "total_duration": 1090000000} +{"timestamp": "2026-02-25T12:56:42.3970318Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.12, "prompt_eval_rate": 0.0, "total_rate": 14.12, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 8080000000, "prompt_eval_duration": 0, "total_duration": 8080000000} +{"timestamp": "2026-02-25T12:57:19.3770422Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.73, "prompt_eval_rate": 0.0, "total_rate": 12.73, "eval_count": 45, "prompt_eval_count": 0, "total_count": 45, "eval_duration": 3540000000, "prompt_eval_duration": 0, "total_duration": 3540000000} +{"timestamp": "2026-02-25T12:57:41.8384742Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.46, "prompt_eval_rate": 0.0, "total_rate": 15.46, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1230000000, "prompt_eval_duration": 0, "total_duration": 1230000000} +{"timestamp": "2026-02-25T12:58:41.9804243Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.08, "prompt_eval_rate": 0.0, "total_rate": 8.08, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5940000000, "prompt_eval_duration": 0, "total_duration": 5940000000} +{"timestamp": "2026-02-25T12:59:19.3721089Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.82, "prompt_eval_rate": 0.0, "total_rate": 15.82, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000} +{"timestamp": "2026-02-25T12:59:46.6532904Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.15, "prompt_eval_rate": 0.0, "total_rate": 16.15, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000} +{"timestamp": "2026-03-11T14:37:50.3727490Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 34.55, "prompt_eval_rate": 0.0, "total_rate": 34.55, "eval_count": 29, "prompt_eval_count": 0, "total_count": 29, "eval_duration": 840000000, "prompt_eval_duration": 0, "total_duration": 840000000} +{"timestamp": "2026-03-11T14:38:27.6168660Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.65, "prompt_eval_rate": 0.0, "total_rate": 14.65, "eval_count": 131, "prompt_eval_count": 0, "total_count": 131, "eval_duration": 8940000000, "prompt_eval_duration": 0, "total_duration": 8940000000} +{"timestamp": "2026-03-11T14:38:48.7062204Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.27, "prompt_eval_rate": 0.0, "total_rate": 9.27, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5180000000, "prompt_eval_duration": 0, "total_duration": 5180000000} +{"timestamp": "2026-03-11T14:38:55.8504332Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 22.39, "prompt_eval_rate": 0.0, "total_rate": 22.39, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 850000000, "prompt_eval_duration": 0, "total_duration": 850000000} +{"timestamp": "2026-03-11T14:39:37.6499442Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.92, "prompt_eval_rate": 0.0, "total_rate": 15.92, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 32159999999, "prompt_eval_duration": 0, "total_duration": 32159999999} +{"timestamp": "2026-03-11T14:39:47.3172710Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.51, "prompt_eval_rate": 0.0, "total_rate": 17.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1140000000, "prompt_eval_duration": 0, "total_duration": 1140000000} +{"timestamp": "2026-03-11T14:40:19.6581229Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.42, "prompt_eval_rate": 0.0, "total_rate": 19.42, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 570000000, "prompt_eval_duration": 0, "total_duration": 570000000} +{"timestamp": "2026-03-11T14:40:32.2523670Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 34.22, "prompt_eval_rate": 0.0, "total_rate": 34.22, "eval_count": 35, "prompt_eval_count": 0, "total_count": 35, "eval_duration": 1020000000, "prompt_eval_duration": 0, "total_duration": 1020000000} +{"timestamp": "2026-03-11T14:40:46.2367918Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.72, "prompt_eval_rate": 0.0, "total_rate": 30.72, "eval_count": 121, "prompt_eval_count": 0, "total_count": 121, "eval_duration": 3940000000, "prompt_eval_duration": 0, "total_duration": 3940000000} +{"timestamp": "2026-03-11T14:41:11.4739328Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.77, "prompt_eval_rate": 0.0, "total_rate": 15.77, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 950000000, "prompt_eval_duration": 0, "total_duration": 950000000} +{"timestamp": "2026-03-11T14:41:28.9736950Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.4, "prompt_eval_rate": 0.0, "total_rate": 17.4, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1090000000, "prompt_eval_duration": 0, "total_duration": 1090000000} +{"timestamp": "2026-03-11T14:42:18.6426816Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 9.09, "prompt_eval_rate": 0.0, "total_rate": 9.09, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5280000000, "prompt_eval_duration": 0, "total_duration": 5280000000} +{"timestamp": "2026-03-11T14:42:49.7737882Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.82, "prompt_eval_rate": 0.0, "total_rate": 16.82, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000} +{"timestamp": "2026-03-11T14:43:11.2237568Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.45, "prompt_eval_rate": 0.0, "total_rate": 19.45, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1030000000, "prompt_eval_duration": 0, "total_duration": 1030000000} +{"timestamp": "2026-03-11T14:28:32.1201281Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.61, "prompt_eval_rate": 0.0, "total_rate": 30.61, "eval_count": 18, "prompt_eval_count": 0, "total_count": 18, "eval_duration": 590000000, "prompt_eval_duration": 0, "total_duration": 590000000} +{"timestamp": "2026-03-11T14:30:01.4683147Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.33, "prompt_eval_rate": 0.0, "total_rate": 7.33, "eval_count": 147, "prompt_eval_count": 0, "total_count": 147, "eval_duration": 20070000000, "prompt_eval_duration": 0, "total_duration": 20070000000} +{"timestamp": "2026-03-11T14:31:55.4129847Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.56, "prompt_eval_rate": 0.0, "total_rate": 7.56, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6090000000, "prompt_eval_duration": 0, "total_duration": 6090000000} +{"timestamp": "2026-03-11T14:32:21.0910851Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.73, "prompt_eval_rate": 0.0, "total_rate": 14.73, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000} +{"timestamp": "2026-03-11T14:33:06.7383934Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.88, "prompt_eval_rate": 0.0, "total_rate": 9.88, "eval_count": 24, "prompt_eval_count": 0, "total_count": 24, "eval_duration": 2430000000, "prompt_eval_duration": 0, "total_duration": 2430000000} +{"timestamp": "2026-03-11T14:33:36.5002447Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.3, "prompt_eval_rate": 0.0, "total_rate": 13.3, "eval_count": 33, "prompt_eval_count": 0, "total_count": 33, "eval_duration": 2480000000, "prompt_eval_duration": 0, "total_duration": 2480000000} +{"timestamp": "2026-03-11T14:33:45.0985750Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.06, "prompt_eval_rate": 0.0, "total_rate": 15.06, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1330000000, "prompt_eval_duration": 0, "total_duration": 1330000000} +{"timestamp": "2026-03-11T14:33:50.9317339Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.92, "prompt_eval_rate": 0.0, "total_rate": 30.92, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000} +{"timestamp": "2026-03-11T14:34:04.2851760Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.72, "prompt_eval_rate": 0.0, "total_rate": 14.72, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 7740000000, "prompt_eval_duration": 0, "total_duration": 7740000000} +{"timestamp": "2026-03-11T14:34:21.5045809Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.66, "prompt_eval_rate": 0.0, "total_rate": 12.66, "eval_count": 69, "prompt_eval_count": 0, "total_count": 69, "eval_duration": 5450000000, "prompt_eval_duration": 0, "total_duration": 5450000000} +{"timestamp": "2026-03-11T14:34:33.6742781Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 21.02, "prompt_eval_rate": 0.0, "total_rate": 21.02, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 900000000, "prompt_eval_duration": 0, "total_duration": 900000000} +{"timestamp": "2026-03-11T14:35:03.9071483Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.34, "prompt_eval_rate": 0.0, "total_rate": 8.34, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5750000000, "prompt_eval_duration": 0, "total_duration": 5750000000} +{"timestamp": "2026-03-11T14:35:23.5037681Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.19, "prompt_eval_rate": 0.0, "total_rate": 16.19, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000} +{"timestamp": "2026-03-11T14:35:40.4748520Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.13, "prompt_eval_rate": 0.0, "total_rate": 16.13, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000} +{"timestamp": "2026-03-27T11:11:46.8464278Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.32, "prompt_eval_rate": 0.0, "total_rate": 31.32, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 480000000, "prompt_eval_duration": 0, "total_duration": 480000000} +{"timestamp": "2026-03-27T11:13:18.2558727Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.4, "prompt_eval_rate": 0.0, "total_rate": 12.4, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 9270000000, "prompt_eval_duration": 0, "total_duration": 9270000000} +{"timestamp": "2026-03-27T11:15:28.6588440Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.55, "prompt_eval_rate": 0.0, "total_rate": 8.55, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5620000000, "prompt_eval_duration": 0, "total_duration": 5620000000} +{"timestamp": "2026-03-27T11:16:05.3132154Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.03, "prompt_eval_rate": 0.0, "total_rate": 19.03, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1000000000, "prompt_eval_duration": 0, "total_duration": 1000000000} +{"timestamp": "2026-03-27T11:17:01.3215670Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.97, "prompt_eval_rate": 0.0, "total_rate": 12.97, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1460000000, "prompt_eval_duration": 0, "total_duration": 1460000000} +{"timestamp": "2026-03-27T11:17:49.3334417Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.28, "prompt_eval_rate": 0.0, "total_rate": 17.28, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000} +{"timestamp": "2026-03-27T11:19:05.4734469Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.4, "prompt_eval_rate": 0.0, "total_rate": 18.4, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 27830000000, "prompt_eval_duration": 0, "total_duration": 27830000000} +{"timestamp": "2026-03-27T11:19:23.4233286Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.99, "prompt_eval_rate": 0.0, "total_rate": 33.99, "eval_count": 29, "prompt_eval_count": 0, "total_count": 29, "eval_duration": 850000000, "prompt_eval_duration": 0, "total_duration": 850000000} +{"timestamp": "2026-03-27T11:19:47.7509434Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.32, "prompt_eval_rate": 0.0, "total_rate": 29.32, "eval_count": 112, "prompt_eval_count": 0, "total_count": 112, "eval_duration": 3820000000, "prompt_eval_duration": 0, "total_duration": 3820000000} +{"timestamp": "2026-03-27T11:20:32.9738436Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 10.94, "prompt_eval_rate": 0.0, "total_rate": 10.94, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 2100000000, "prompt_eval_duration": 0, "total_duration": 2100000000} +{"timestamp": "2026-03-27T11:21:01.8644361Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.55, "prompt_eval_rate": 0.0, "total_rate": 17.55, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000} +{"timestamp": "2026-03-27T11:21:48.7553878Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 9.51, "prompt_eval_rate": 0.0, "total_rate": 9.51, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5050000000, "prompt_eval_duration": 0, "total_duration": 5050000000} +{"timestamp": "2026-03-27T11:22:09.8806775Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.12, "prompt_eval_rate": 0.0, "total_rate": 17.12, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1170000000, "prompt_eval_duration": 0, "total_duration": 1170000000} +{"timestamp": "2026-03-27T11:22:32.6596801Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.3, "prompt_eval_rate": 0.0, "total_rate": 19.3, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000} +{"timestamp": "2026-03-27T10:59:21.2222696Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 32.72, "prompt_eval_rate": 0.0, "total_rate": 32.72, "eval_count": 312, "prompt_eval_count": 0, "total_count": 312, "eval_duration": 9540000000, "prompt_eval_duration": 0, "total_duration": 9540000000} +{"timestamp": "2026-03-27T11:00:51.8298313Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.08, "prompt_eval_rate": 0.0, "total_rate": 7.08, "eval_count": 157, "prompt_eval_count": 0, "total_count": 157, "eval_duration": 22170000000, "prompt_eval_duration": 0, "total_duration": 22170000000} +{"timestamp": "2026-03-27T11:02:53.6971774Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.92, "prompt_eval_rate": 0.0, "total_rate": 6.92, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6640000000, "prompt_eval_duration": 0, "total_duration": 6640000000} +{"timestamp": "2026-03-27T11:03:21.9312020Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.9, "prompt_eval_rate": 0.0, "total_rate": 14.9, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1270000000, "prompt_eval_duration": 0, "total_duration": 1270000000} +{"timestamp": "2026-03-27T11:04:12.1422822Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.67, "prompt_eval_rate": 0.0, "total_rate": 11.67, "eval_count": 24, "prompt_eval_count": 0, "total_count": 24, "eval_duration": 2060000000, "prompt_eval_duration": 0, "total_duration": 2060000000} +{"timestamp": "2026-03-27T11:05:01.6786403Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.42, "prompt_eval_rate": 0.0, "total_rate": 15.42, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000} +{"timestamp": "2026-03-27T11:05:42.5168894Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.79, "prompt_eval_rate": 0.0, "total_rate": 14.79, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1350000000, "prompt_eval_duration": 0, "total_duration": 1350000000} +{"timestamp": "2026-03-27T11:05:58.1450126Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.96, "prompt_eval_rate": 0.0, "total_rate": 31.96, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 690000000, "prompt_eval_duration": 0, "total_duration": 690000000} +{"timestamp": "2026-03-27T11:06:19.0563521Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 13.18, "prompt_eval_rate": 0.0, "total_rate": 13.18, "eval_count": 110, "prompt_eval_count": 0, "total_count": 110, "eval_duration": 8340000000, "prompt_eval_duration": 0, "total_duration": 8340000000} +{"timestamp": "2026-03-27T11:06:52.3261536Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.8, "prompt_eval_rate": 0.0, "total_rate": 12.8, "eval_count": 38, "prompt_eval_count": 0, "total_count": 38, "eval_duration": 2970000000, "prompt_eval_duration": 0, "total_duration": 2970000000} +{"timestamp": "2026-03-27T11:07:15.0983514Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.38, "prompt_eval_rate": 0.0, "total_rate": 16.38, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000} +{"timestamp": "2026-03-27T11:08:19.7915544Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 7.34, "prompt_eval_rate": 0.0, "total_rate": 7.34, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6540000000, "prompt_eval_duration": 0, "total_duration": 6540000000} +{"timestamp": "2026-03-27T11:08:57.6893685Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.65, "prompt_eval_rate": 0.0, "total_rate": 15.65, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000} +{"timestamp": "2026-03-27T11:09:24.2799821Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.74, "prompt_eval_rate": 0.0, "total_rate": 15.74, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1270000000, "prompt_eval_duration": 0, "total_duration": 1270000000} +{"timestamp": "2026-03-31T13:40:47.9467296Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 35.17, "prompt_eval_rate": 0.0, "total_rate": 35.17, "eval_count": 26, "prompt_eval_count": 0, "total_count": 26, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000} +{"timestamp": "2026-03-31T13:41:37.6514747Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.63, "prompt_eval_rate": 0.0, "total_rate": 14.63, "eval_count": 129, "prompt_eval_count": 0, "total_count": 129, "eval_duration": 8820000000, "prompt_eval_duration": 0, "total_duration": 8820000000} +{"timestamp": "2026-03-31T13:42:22.5163227Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.45, "prompt_eval_rate": 0.0, "total_rate": 9.45, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 4870000000, "prompt_eval_duration": 0, "total_duration": 4870000000} +{"timestamp": "2026-03-31T13:42:44.1337911Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 21.62, "prompt_eval_rate": 0.0, "total_rate": 21.62, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 510000000, "prompt_eval_duration": 0, "total_duration": 510000000} +{"timestamp": "2026-03-31T13:43:56.0520326Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.11, "prompt_eval_rate": 0.0, "total_rate": 15.11, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 33880000000, "prompt_eval_duration": 0, "total_duration": 33880000000} +{"timestamp": "2026-03-31T13:44:46.2989757Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.79, "prompt_eval_rate": 0.0, "total_rate": 12.79, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1560000000, "prompt_eval_duration": 0, "total_duration": 1560000000} +{"timestamp": "2026-03-31T13:45:19.2147696Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 20.59, "prompt_eval_rate": 0.0, "total_rate": 20.59, "eval_count": 73, "prompt_eval_count": 0, "total_count": 73, "eval_duration": 3550000000, "prompt_eval_duration": 0, "total_duration": 3550000000} +{"timestamp": "2026-03-31T13:45:37.4442519Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.44, "prompt_eval_rate": 0.0, "total_rate": 33.44, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 930000000, "prompt_eval_duration": 0, "total_duration": 930000000} +{"timestamp": "2026-03-31T13:45:59.8044745Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.9, "prompt_eval_rate": 0.0, "total_rate": 31.9, "eval_count": 113, "prompt_eval_count": 0, "total_count": 113, "eval_duration": 3540000000, "prompt_eval_duration": 0, "total_duration": 3540000000} +{"timestamp": "2026-03-31T13:46:32.3629413Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.64, "prompt_eval_rate": 0.0, "total_rate": 12.64, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000} +{"timestamp": "2026-03-31T13:46:59.7393895Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.49, "prompt_eval_rate": 0.0, "total_rate": 18.49, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1030000000, "prompt_eval_duration": 0, "total_duration": 1030000000} +{"timestamp": "2026-03-31T13:47:19.7101038Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 9.55, "prompt_eval_rate": 0.0, "total_rate": 9.55, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 4820000000, "prompt_eval_duration": 0, "total_duration": 4820000000} +{"timestamp": "2026-03-31T13:47:28.7660332Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.04, "prompt_eval_rate": 0.0, "total_rate": 17.04, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1170000000, "prompt_eval_duration": 0, "total_duration": 1170000000} +{"timestamp": "2026-03-31T13:47:37.2736957Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.48, "prompt_eval_rate": 0.0, "total_rate": 19.48, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1030000000, "prompt_eval_duration": 0, "total_duration": 1030000000} +{"timestamp": "2026-03-31T13:30:27.1035853Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 34.1, "prompt_eval_rate": 0.0, "total_rate": 34.1, "eval_count": 80, "prompt_eval_count": 0, "total_count": 80, "eval_duration": 2350000000, "prompt_eval_duration": 0, "total_duration": 2350000000} +{"timestamp": "2026-03-31T13:32:00.3570128Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.81, "prompt_eval_rate": 0.0, "total_rate": 6.81, "eval_count": 171, "prompt_eval_count": 0, "total_count": 171, "eval_duration": 25110000000, "prompt_eval_duration": 0, "total_duration": 25110000000} +{"timestamp": "2026-03-31T13:33:58.9035812Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.42, "prompt_eval_rate": 0.0, "total_rate": 7.42, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6470000000, "prompt_eval_duration": 0, "total_duration": 6470000000} +{"timestamp": "2026-03-31T13:34:25.2549424Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.08, "prompt_eval_rate": 0.0, "total_rate": 14.08, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1350000000, "prompt_eval_duration": 0, "total_duration": 1350000000} +{"timestamp": "2026-03-31T13:34:51.9147239Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.65, "prompt_eval_rate": 0.0, "total_rate": 12.65, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000} +{"timestamp": "2026-03-31T13:35:12.1648742Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.49, "prompt_eval_rate": 0.0, "total_rate": 15.49, "eval_count": 13, "prompt_eval_count": 0, "total_count": 13, "eval_duration": 840000000, "prompt_eval_duration": 0, "total_duration": 840000000} +{"timestamp": "2026-03-31T13:35:33.5012364Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.43, "prompt_eval_rate": 0.0, "total_rate": 15.43, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000} +{"timestamp": "2026-03-31T13:35:42.2492971Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.17, "prompt_eval_rate": 0.0, "total_rate": 33.17, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000} +{"timestamp": "2026-03-31T13:36:00.9055728Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.0, "prompt_eval_rate": 0.0, "total_rate": 15.0, "eval_count": 103, "prompt_eval_count": 0, "total_count": 103, "eval_duration": 6870000000, "prompt_eval_duration": 0, "total_duration": 6870000000} +{"timestamp": "2026-03-31T13:36:18.8889526Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.23, "prompt_eval_rate": 0.0, "total_rate": 15.23, "eval_count": 14, "prompt_eval_count": 0, "total_count": 14, "eval_duration": 920000000, "prompt_eval_duration": 0, "total_duration": 920000000} +{"timestamp": "2026-03-31T13:36:32.2594672Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.96, "prompt_eval_rate": 0.0, "total_rate": 19.96, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 950000000, "prompt_eval_duration": 0, "total_duration": 950000000} +{"timestamp": "2026-03-31T13:37:32.8805648Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.14, "prompt_eval_rate": 0.0, "total_rate": 8.14, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5650000000, "prompt_eval_duration": 0, "total_duration": 5650000000} +{"timestamp": "2026-03-31T13:38:05.1048826Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.39, "prompt_eval_rate": 0.0, "total_rate": 16.39, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1220000000, "prompt_eval_duration": 0, "total_duration": 1220000000} +{"timestamp": "2026-03-31T13:38:30.7513374Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.2, "prompt_eval_rate": 0.0, "total_rate": 16.2, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1230000000, "prompt_eval_duration": 0, "total_duration": 1230000000} +{"timestamp": "2026-04-07T21:50:40.0153279Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 39.09, "prompt_eval_rate": 0.0, "total_rate": 39.09, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 13100000000, "prompt_eval_duration": 0, "total_duration": 13100000000} +{"timestamp": "2026-04-07T21:51:54.1917549Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.52, "prompt_eval_rate": 0.0, "total_rate": 14.52, "eval_count": 124, "prompt_eval_count": 0, "total_count": 124, "eval_duration": 8539999999, "prompt_eval_duration": 0, "total_duration": 8539999999} +{"timestamp": "2026-04-07T21:53:54.0895254Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.77, "prompt_eval_rate": 0.0, "total_rate": 7.77, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6180000000, "prompt_eval_duration": 0, "total_duration": 6180000000} +{"timestamp": "2026-04-07T21:54:21.0741894Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.08, "prompt_eval_rate": 0.0, "total_rate": 18.08, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1050000000, "prompt_eval_duration": 0, "total_duration": 1050000000} +{"timestamp": "2026-04-07T21:55:14.0069332Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.66, "prompt_eval_rate": 0.0, "total_rate": 15.66, "eval_count": 113, "prompt_eval_count": 0, "total_count": 113, "eval_duration": 7220000000, "prompt_eval_duration": 0, "total_duration": 7220000000} +{"timestamp": "2026-04-07T21:56:06.7107943Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.24, "prompt_eval_rate": 0.0, "total_rate": 17.24, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000} +{"timestamp": "2026-04-07T21:56:58.6567482Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 20.01, "prompt_eval_rate": 0.0, "total_rate": 20.01, "eval_count": 27, "prompt_eval_count": 0, "total_count": 27, "eval_duration": 1350000000, "prompt_eval_duration": 0, "total_duration": 1350000000} +{"timestamp": "2026-04-07T21:57:12.9824348Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.45, "prompt_eval_rate": 0.0, "total_rate": 31.45, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000} +{"timestamp": "2026-04-07T21:57:29.5667820Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 28.57, "prompt_eval_rate": 0.0, "total_rate": 28.57, "eval_count": 141, "prompt_eval_count": 0, "total_count": 141, "eval_duration": 4930000000, "prompt_eval_duration": 0, "total_duration": 4930000000} +{"timestamp": "2026-04-07T21:58:01.1473144Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.21, "prompt_eval_rate": 0.0, "total_rate": 14.21, "eval_count": 30, "prompt_eval_count": 0, "total_count": 30, "eval_duration": 2109999999, "prompt_eval_duration": 0, "total_duration": 2109999999} +{"timestamp": "2026-04-07T21:58:20.2671557Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 20.18, "prompt_eval_rate": 0.0, "total_rate": 20.18, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 940000000, "prompt_eval_duration": 0, "total_duration": 940000000} +{"timestamp": "2026-04-07T21:59:13.6367856Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.98, "prompt_eval_rate": 0.0, "total_rate": 8.98, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5120000000, "prompt_eval_duration": 0, "total_duration": 5120000000} +{"timestamp": "2026-04-07T21:59:45.6744924Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.48, "prompt_eval_rate": 0.0, "total_rate": 16.48, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000} +{"timestamp": "2026-04-07T22:00:08.3321365Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.26, "prompt_eval_rate": 0.0, "total_rate": 19.26, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000} +{"timestamp": "2026-04-07T21:35:39.4302411Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.55, "prompt_eval_rate": 0.0, "total_rate": 30.55, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16760000000, "prompt_eval_duration": 0, "total_duration": 16760000000} +{"timestamp": "2026-04-07T21:37:01.0657004Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.53, "prompt_eval_rate": 0.0, "total_rate": 8.53, "eval_count": 98, "prompt_eval_count": 0, "total_count": 98, "eval_duration": 11490000000, "prompt_eval_duration": 0, "total_duration": 11490000000} +{"timestamp": "2026-04-07T21:39:01.8455952Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.86, "prompt_eval_rate": 0.0, "total_rate": 6.86, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6710000000, "prompt_eval_duration": 0, "total_duration": 6710000000} +{"timestamp": "2026-04-07T21:39:28.2730624Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.68, "prompt_eval_rate": 0.0, "total_rate": 14.68, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000} +{"timestamp": "2026-04-07T21:41:56.2790832Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 4.7, "prompt_eval_rate": 0.0, "total_rate": 4.7, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 108980000000, "prompt_eval_duration": 0, "total_duration": 108980000000} +{"timestamp": "2026-04-07T21:42:47.7767757Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.22, "prompt_eval_rate": 0.0, "total_rate": 15.22, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000} +{"timestamp": "2026-04-07T21:43:24.2629361Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.32, "prompt_eval_rate": 0.0, "total_rate": 15.32, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000} +{"timestamp": "2026-04-07T21:43:38.4329675Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.29, "prompt_eval_rate": 0.0, "total_rate": 33.29, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 660000000, "prompt_eval_duration": 0, "total_duration": 660000000} +{"timestamp": "2026-04-07T21:43:56.0594025Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.27, "prompt_eval_rate": 0.0, "total_rate": 14.27, "eval_count": 111, "prompt_eval_count": 0, "total_count": 111, "eval_duration": 7780000000, "prompt_eval_duration": 0, "total_duration": 7780000000} +{"timestamp": "2026-04-07T21:46:03.7170133Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 4.88, "prompt_eval_rate": 0.0, "total_rate": 4.88, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 104890000000, "prompt_eval_duration": 0, "total_duration": 104890000000} +{"timestamp": "2026-04-07T21:46:25.1585713Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.33, "prompt_eval_rate": 0.0, "total_rate": 16.33, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000} +{"timestamp": "2026-04-07T21:47:19.3815600Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.26, "prompt_eval_rate": 0.0, "total_rate": 8.26, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5570000000, "prompt_eval_duration": 0, "total_duration": 5570000000} +{"timestamp": "2026-04-07T21:47:51.6762573Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.01, "prompt_eval_rate": 0.0, "total_rate": 16.01, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000} +{"timestamp": "2026-04-07T21:48:13.8548868Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.94, "prompt_eval_rate": 0.0, "total_rate": 15.94, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000} +{"timestamp": "2026-04-19T12:08:45.2526989Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.55, "prompt_eval_rate": 0.0, "total_rate": 30.55, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16760000000, "prompt_eval_duration": 0, "total_duration": 16760000000} +{"timestamp": "2026-04-19T12:10:30.3677200Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.02, "prompt_eval_rate": 0.0, "total_rate": 6.02, "eval_count": 217, "prompt_eval_count": 0, "total_count": 217, "eval_duration": 36060000000, "prompt_eval_duration": 0, "total_duration": 36060000000} +{"timestamp": "2026-04-19T12:12:26.8304801Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.55, "prompt_eval_rate": 0.0, "total_rate": 7.55, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6090000000, "prompt_eval_duration": 0, "total_duration": 6090000000} +{"timestamp": "2026-04-19T12:12:54.4888402Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.17, "prompt_eval_rate": 0.0, "total_rate": 15.17, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000} +{"timestamp": "2026-04-19T12:13:47.5007598Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.76, "prompt_eval_rate": 0.0, "total_rate": 10.76, "eval_count": 8, "prompt_eval_count": 0, "total_count": 8, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000} +{"timestamp": "2026-04-19T12:14:42.3432806Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.99, "prompt_eval_rate": 0.0, "total_rate": 13.99, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1430000000, "prompt_eval_duration": 0, "total_duration": 1430000000} +{"timestamp": "2026-04-19T12:15:22.2105996Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.94, "prompt_eval_rate": 0.0, "total_rate": 14.94, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1340000000, "prompt_eval_duration": 0, "total_duration": 1340000000} +{"timestamp": "2026-04-19T12:15:39.7347922Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.92, "prompt_eval_rate": 0.0, "total_rate": 30.92, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 710000000, "prompt_eval_duration": 0, "total_duration": 710000000} +{"timestamp": "2026-04-19T12:16:02.0621914Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.2, "prompt_eval_rate": 0.0, "total_rate": 14.2, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 8100000000, "prompt_eval_duration": 0, "total_duration": 8100000000} +{"timestamp": "2026-04-19T12:16:38.7345701Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.07, "prompt_eval_rate": 0.0, "total_rate": 11.07, "eval_count": 40, "prompt_eval_count": 0, "total_count": 40, "eval_duration": 3610000000, "prompt_eval_duration": 0, "total_duration": 3610000000} +{"timestamp": "2026-04-19T12:17:01.6352498Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.34, "prompt_eval_rate": 0.0, "total_rate": 16.34, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000} +{"timestamp": "2026-04-19T12:17:58.1960572Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.34, "prompt_eval_rate": 0.0, "total_rate": 8.34, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5750000000, "prompt_eval_duration": 0, "total_duration": 5750000000} +{"timestamp": "2026-04-19T12:18:30.1108522Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.58, "prompt_eval_rate": 0.0, "total_rate": 16.58, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000} +{"timestamp": "2026-04-19T12:18:50.3891829Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.59, "prompt_eval_rate": 0.0, "total_rate": 15.59, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000} +{"timestamp": "2026-04-19T11:54:10.5516743Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 33.99, "prompt_eval_rate": 0.0, "total_rate": 33.99, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1090000000, "prompt_eval_duration": 0, "total_duration": 1090000000} +{"timestamp": "2026-04-19T11:55:44.0668466Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.94, "prompt_eval_rate": 0.0, "total_rate": 13.94, "eval_count": 181, "prompt_eval_count": 0, "total_count": 181, "eval_duration": 12990000000, "prompt_eval_duration": 0, "total_duration": 12990000000} +{"timestamp": "2026-04-19T11:57:42.3173371Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.94, "prompt_eval_rate": 0.0, "total_rate": 7.94, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5790000000, "prompt_eval_duration": 0, "total_duration": 5790000000} +{"timestamp": "2026-04-19T11:58:18.2369368Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.55, "prompt_eval_rate": 0.0, "total_rate": 17.55, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000} +{"timestamp": "2026-04-19T11:59:16.9495285Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.12, "prompt_eval_rate": 0.0, "total_rate": 13.12, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1140000000, "prompt_eval_duration": 0, "total_duration": 1140000000} +{"timestamp": "2026-04-19T12:00:21.6221524Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.51, "prompt_eval_rate": 0.0, "total_rate": 16.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000} +{"timestamp": "2026-04-19T12:01:12.3020518Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.52, "prompt_eval_rate": 0.0, "total_rate": 18.52, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000} +{"timestamp": "2026-04-19T12:01:29.7165722Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.77, "prompt_eval_rate": 0.0, "total_rate": 30.77, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000} +{"timestamp": "2026-04-19T12:01:55.3802804Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 28.11, "prompt_eval_rate": 0.0, "total_rate": 28.11, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 4380000000, "prompt_eval_duration": 0, "total_duration": 4380000000} +{"timestamp": "2026-04-19T12:02:36.8992211Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.21, "prompt_eval_rate": 0.0, "total_rate": 14.21, "eval_count": 39, "prompt_eval_count": 0, "total_count": 39, "eval_duration": 2740000000, "prompt_eval_duration": 0, "total_duration": 2740000000} +{"timestamp": "2026-04-19T12:03:06.7547391Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.7, "prompt_eval_rate": 0.0, "total_rate": 17.7, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1070000000, "prompt_eval_duration": 0, "total_duration": 1070000000} +{"timestamp": "2026-04-19T12:04:11.6043612Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 7.51, "prompt_eval_rate": 0.0, "total_rate": 7.51, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6120000000, "prompt_eval_duration": 0, "total_duration": 6120000000} +{"timestamp": "2026-04-19T12:04:54.7698813Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.31, "prompt_eval_rate": 0.0, "total_rate": 15.31, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000} +{"timestamp": "2026-04-19T12:05:32.1672558Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.01, "prompt_eval_rate": 0.0, "total_rate": 18.01, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1110000000, "prompt_eval_duration": 0, "total_duration": 1110000000} +{"timestamp": "2026-04-30T11:30:12.7846142Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.58, "prompt_eval_rate": 0.0, "total_rate": 30.58, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000} +{"timestamp": "2026-04-30T11:31:34.0573231Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.15, "prompt_eval_rate": 0.0, "total_rate": 8.15, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 13990000000, "prompt_eval_duration": 0, "total_duration": 13990000000} +{"timestamp": "2026-04-30T11:33:22.0569754Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.56, "prompt_eval_rate": 0.0, "total_rate": 7.56, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6080000000, "prompt_eval_duration": 0, "total_duration": 6080000000} +{"timestamp": "2026-04-30T11:33:48.4026201Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.81, "prompt_eval_rate": 0.0, "total_rate": 14.81, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000} +{"timestamp": "2026-04-30T11:34:37.7320049Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.64, "prompt_eval_rate": 0.0, "total_rate": 10.64, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1410000000, "prompt_eval_duration": 0, "total_duration": 1410000000} +{"timestamp": "2026-04-30T11:35:27.1637007Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.4, "prompt_eval_rate": 0.0, "total_rate": 15.4, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000} +{"timestamp": "2026-04-30T11:35:45.1818190Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.81, "prompt_eval_rate": 0.0, "total_rate": 15.81, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000} +{"timestamp": "2026-04-30T11:35:54.2323889Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 34.36, "prompt_eval_rate": 0.0, "total_rate": 34.36, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000} +{"timestamp": "2026-04-30T11:36:09.8633388Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.81, "prompt_eval_rate": 0.0, "total_rate": 14.81, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 7700000000, "prompt_eval_duration": 0, "total_duration": 7700000000} +{"timestamp": "2026-04-30T11:36:34.0269765Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 10.19, "prompt_eval_rate": 0.0, "total_rate": 10.19, "eval_count": 146, "prompt_eval_count": 0, "total_count": 146, "eval_duration": 14330000000, "prompt_eval_duration": 0, "total_duration": 14330000000} +{"timestamp": "2026-04-30T11:36:45.0461352Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 20.91, "prompt_eval_rate": 0.0, "total_rate": 20.91, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000} +{"timestamp": "2026-04-30T11:37:45.0756437Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 6.27, "prompt_eval_rate": 0.0, "total_rate": 6.27, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 7650000000, "prompt_eval_duration": 0, "total_duration": 7650000000} +{"timestamp": "2026-04-30T11:38:11.1457940Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.64, "prompt_eval_rate": 0.0, "total_rate": 16.64, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1200000000, "prompt_eval_duration": 0, "total_duration": 1200000000} +{"timestamp": "2026-04-30T11:38:18.8758446Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.6, "prompt_eval_rate": 0.0, "total_rate": 15.6, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000} +{"timestamp": "2026-04-30T11:20:06.7015234Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 32.19, "prompt_eval_rate": 0.0, "total_rate": 32.19, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 680000000, "prompt_eval_duration": 0, "total_duration": 680000000} +{"timestamp": "2026-04-30T11:21:28.3593079Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.06, "prompt_eval_rate": 0.0, "total_rate": 14.06, "eval_count": 126, "prompt_eval_count": 0, "total_count": 126, "eval_duration": 8960000000, "prompt_eval_duration": 0, "total_duration": 8960000000} +{"timestamp": "2026-04-30T11:23:19.3816934Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.75, "prompt_eval_rate": 0.0, "total_rate": 8.75, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5260000000, "prompt_eval_duration": 0, "total_duration": 5260000000} +{"timestamp": "2026-04-30T11:23:45.3178566Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.93, "prompt_eval_rate": 0.0, "total_rate": 15.93, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000} +{"timestamp": "2026-04-30T11:24:35.0610505Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.66, "prompt_eval_rate": 0.0, "total_rate": 12.66, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1180000000, "prompt_eval_duration": 0, "total_duration": 1180000000} +{"timestamp": "2026-04-30T11:25:28.5461910Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.63, "prompt_eval_rate": 0.0, "total_rate": 11.63, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1720000000, "prompt_eval_duration": 0, "total_duration": 1720000000} +{"timestamp": "2026-04-30T11:26:04.0544522Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.39, "prompt_eval_rate": 0.0, "total_rate": 17.39, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1150000000, "prompt_eval_duration": 0, "total_duration": 1150000000} +{"timestamp": "2026-04-30T11:26:17.6947503Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.83, "prompt_eval_rate": 0.0, "total_rate": 29.83, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000} +{"timestamp": "2026-04-30T11:26:32.5245738Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 27.65, "prompt_eval_rate": 0.0, "total_rate": 27.65, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 4450000000, "prompt_eval_duration": 0, "total_duration": 4450000000} +{"timestamp": "2026-04-30T11:27:08.0695669Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.09, "prompt_eval_rate": 0.0, "total_rate": 15.09, "eval_count": 75, "prompt_eval_count": 0, "total_count": 75, "eval_duration": 4970000000, "prompt_eval_duration": 0, "total_duration": 4970000000} +{"timestamp": "2026-04-30T11:27:26.7421162Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.59, "prompt_eval_rate": 0.0, "total_rate": 17.59, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000} +{"timestamp": "2026-04-30T11:27:49.7773682Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.5, "prompt_eval_rate": 0.0, "total_rate": 8.5, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5640000000, "prompt_eval_duration": 0, "total_duration": 5640000000} +{"timestamp": "2026-04-30T11:28:22.4684079Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.59, "prompt_eval_rate": 0.0, "total_rate": 12.59, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1590000000, "prompt_eval_duration": 0, "total_duration": 1590000000} +{"timestamp": "2026-04-30T11:28:45.7117408Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.78, "prompt_eval_rate": 0.0, "total_rate": 17.78, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000} +{"timestamp": "2026-05-04T12:04:21.5679563Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 32.67, "prompt_eval_rate": 0.0, "total_rate": 32.67, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 670000000, "prompt_eval_duration": 0, "total_duration": 670000000} +{"timestamp": "2026-05-04T12:04:27.7946305Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 36.05, "prompt_eval_rate": 0.0, "total_rate": 36.05, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 1330000000, "prompt_eval_duration": 0, "total_duration": 1330000000} +{"timestamp": "2026-05-04T12:04:43.3776216Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 36.8, "prompt_eval_rate": 0.0, "total_rate": 36.8, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000} +{"timestamp": "2026-05-04T12:05:12.4732202Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.21, "prompt_eval_rate": 0.0, "total_rate": 14.21, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 8100000000, "prompt_eval_duration": 0, "total_duration": 8100000000} +{"timestamp": "2026-05-04T12:06:09.3772021Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.03, "prompt_eval_rate": 0.0, "total_rate": 9.03, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5310000000, "prompt_eval_duration": 0, "total_duration": 5310000000} +{"timestamp": "2026-05-04T12:06:24.7665297Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 23.44, "prompt_eval_rate": 0.0, "total_rate": 23.44, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 810000000, "prompt_eval_duration": 0, "total_duration": 810000000} +{"timestamp": "2026-05-04T12:07:04.5121651Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.78, "prompt_eval_rate": 0.0, "total_rate": 14.78, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 1560000000, "prompt_eval_duration": 0, "total_duration": 1560000000} +{"timestamp": "2026-05-04T12:07:19.4971109Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.32, "prompt_eval_rate": 0.0, "total_rate": 17.32, "eval_count": 29, "prompt_eval_count": 0, "total_count": 29, "eval_duration": 1670000000, "prompt_eval_duration": 0, "total_duration": 1670000000} +{"timestamp": "2026-05-04T12:07:41.6423432Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.83, "prompt_eval_rate": 0.0, "total_rate": 18.83, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1060000000, "prompt_eval_duration": 0, "total_duration": 1060000000} +{"timestamp": "2026-05-04T12:07:55.0935791Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 27.04, "prompt_eval_rate": 0.0, "total_rate": 27.04, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 550000000, "prompt_eval_duration": 0, "total_duration": 550000000} +{"timestamp": "2026-05-04T12:08:06.0831655Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.03, "prompt_eval_rate": 0.0, "total_rate": 31.03, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 3710000000, "prompt_eval_duration": 0, "total_duration": 3710000000} +{"timestamp": "2026-05-04T12:08:23.4854098Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.56, "prompt_eval_rate": 0.0, "total_rate": 11.56, "eval_count": 13, "prompt_eval_count": 0, "total_count": 13, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000} +{"timestamp": "2026-05-04T12:08:30.7136989Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 22.99, "prompt_eval_rate": 0.0, "total_rate": 22.99, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 830000000, "prompt_eval_duration": 0, "total_duration": 830000000} +{"timestamp": "2026-05-04T12:08:56.3614351Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.84, "prompt_eval_rate": 0.0, "total_rate": 8.84, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5430000000, "prompt_eval_duration": 0, "total_duration": 5430000000} +{"timestamp": "2026-05-04T12:09:04.6537885Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.51, "prompt_eval_rate": 0.0, "total_rate": 15.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000} +{"timestamp": "2026-05-04T12:09:12.5752000Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.06, "prompt_eval_rate": 0.0, "total_rate": 18.06, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1110000000, "prompt_eval_duration": 0, "total_duration": 1110000000} +{"timestamp": "2026-05-04T11:55:24.7608406Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.71, "prompt_eval_rate": 0.0, "total_rate": 30.71, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16670000000, "prompt_eval_duration": 0, "total_duration": 16670000000} +{"timestamp": "2026-05-04T11:55:29.9460956Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 32.15, "prompt_eval_rate": 0.0, "total_rate": 32.15, "eval_count": 14, "prompt_eval_count": 0, "total_count": 14, "eval_duration": 440000000, "prompt_eval_duration": 0, "total_duration": 440000000} +{"timestamp": "2026-05-04T11:55:40.0185725Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 32.56, "prompt_eval_rate": 0.0, "total_rate": 32.56, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 680000000, "prompt_eval_duration": 0, "total_duration": 680000000} +{"timestamp": "2026-05-04T11:55:45.8422318Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 40.98, "prompt_eval_rate": 0.0, "total_rate": 40.98, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 760000000, "prompt_eval_duration": 0, "total_duration": 760000000} +{"timestamp": "2026-05-04T11:55:52.8736744Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 33.48, "prompt_eval_rate": 0.0, "total_rate": 33.48, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 450000000, "prompt_eval_duration": 0, "total_duration": 450000000} +{"timestamp": "2026-05-04T11:57:01.4292312Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.13, "prompt_eval_rate": 0.0, "total_rate": 7.13, "eval_count": 156, "prompt_eval_count": 0, "total_count": 156, "eval_duration": 21870000000, "prompt_eval_duration": 0, "total_duration": 21870000000} +{"timestamp": "2026-05-04T11:57:55.1774975Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.02, "prompt_eval_rate": 0.0, "total_rate": 8.02, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5740000000, "prompt_eval_duration": 0, "total_duration": 5740000000} +{"timestamp": "2026-05-04T11:58:07.3571474Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.81, "prompt_eval_rate": 0.0, "total_rate": 17.81, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1070000000, "prompt_eval_duration": 0, "total_duration": 1070000000} +{"timestamp": "2026-05-04T11:58:22.1862545Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.54, "prompt_eval_rate": 0.0, "total_rate": 12.54, "eval_count": 17, "prompt_eval_count": 0, "total_count": 17, "eval_duration": 1360000000, "prompt_eval_duration": 0, "total_duration": 1360000000} +{"timestamp": "2026-05-04T11:59:02.9692119Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.0, "prompt_eval_rate": 0.0, "total_rate": 16.0, "eval_count": 34, "prompt_eval_count": 0, "total_count": 34, "eval_duration": 2130000000, "prompt_eval_duration": 0, "total_duration": 2130000000} +{"timestamp": "2026-05-04T11:59:14.9062592Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.54, "prompt_eval_rate": 0.0, "total_rate": 15.54, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000} +{"timestamp": "2026-05-04T11:59:23.8101354Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.97, "prompt_eval_rate": 0.0, "total_rate": 33.97, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000} +{"timestamp": "2026-05-04T11:59:37.8817756Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.49, "prompt_eval_rate": 0.0, "total_rate": 15.49, "eval_count": 103, "prompt_eval_count": 0, "total_count": 103, "eval_duration": 6650000000, "prompt_eval_duration": 0, "total_duration": 6650000000} +{"timestamp": "2026-05-04T11:59:59.8121966Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.12, "prompt_eval_rate": 0.0, "total_rate": 14.12, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1060000000, "prompt_eval_duration": 0, "total_duration": 1060000000} +{"timestamp": "2026-05-04T12:00:10.7970585Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 21.25, "prompt_eval_rate": 0.0, "total_rate": 21.25, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 890000000, "prompt_eval_duration": 0, "total_duration": 890000000} +{"timestamp": "2026-05-04T12:00:40.4307689Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.68, "prompt_eval_rate": 0.0, "total_rate": 8.68, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5300000000, "prompt_eval_duration": 0, "total_duration": 5300000000} +{"timestamp": "2026-05-04T12:00:57.1961185Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.69, "prompt_eval_rate": 0.0, "total_rate": 16.69, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1200000000, "prompt_eval_duration": 0, "total_duration": 1200000000} +{"timestamp": "2026-05-04T12:01:13.9655300Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.85, "prompt_eval_rate": 0.0, "total_rate": 15.85, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000} +{"timestamp": "2026-05-04T12:11:07.9765458Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.57, "prompt_eval_rate": 0.0, "total_rate": 31.57, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 480000000, "prompt_eval_duration": 0, "total_duration": 480000000} +{"timestamp": "2026-05-04T12:11:13.4862167Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 35.12, "prompt_eval_rate": 0.0, "total_rate": 35.12, "eval_count": 21, "prompt_eval_count": 0, "total_count": 21, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000} +{"timestamp": "2026-05-04T12:11:21.1983246Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 37.27, "prompt_eval_rate": 0.0, "total_rate": 37.27, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000} +{"timestamp": "2026-05-04T12:12:20.1272735Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.12, "prompt_eval_rate": 0.0, "total_rate": 14.12, "eval_count": 137, "prompt_eval_count": 0, "total_count": 137, "eval_duration": 9700000000, "prompt_eval_duration": 0, "total_duration": 9700000000} +{"timestamp": "2026-05-04T12:13:37.1853298Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.43, "prompt_eval_rate": 0.0, "total_rate": 9.43, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 4880000000, "prompt_eval_duration": 0, "total_duration": 4880000000} +{"timestamp": "2026-05-04T12:14:00.8454739Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.18, "prompt_eval_rate": 0.0, "total_rate": 19.18, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000} +{"timestamp": "2026-05-04T12:14:28.1360263Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.27, "prompt_eval_rate": 0.0, "total_rate": 13.27, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 1730000000, "prompt_eval_duration": 0, "total_duration": 1730000000} +{"timestamp": "2026-05-04T12:14:52.6172460Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.84, "prompt_eval_rate": 0.0, "total_rate": 16.84, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000} +{"timestamp": "2026-05-04T12:15:24.1245352Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.23, "prompt_eval_rate": 0.0, "total_rate": 18.23, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1100000000, "prompt_eval_duration": 0, "total_duration": 1100000000} +{"timestamp": "2026-05-04T12:15:36.7690104Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.88, "prompt_eval_rate": 0.0, "total_rate": 29.88, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000} +{"timestamp": "2026-05-04T12:15:58.7383276Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.74, "prompt_eval_rate": 0.0, "total_rate": 29.74, "eval_count": 110, "prompt_eval_count": 0, "total_count": 110, "eval_duration": 3700000000, "prompt_eval_duration": 0, "total_duration": 3700000000} +{"timestamp": "2026-05-04T12:16:51.3131788Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.5, "prompt_eval_rate": 0.0, "total_rate": 14.5, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 35310000000, "prompt_eval_duration": 0, "total_duration": 35310000000} +{"timestamp": "2026-05-04T12:17:09.0073182Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.73, "prompt_eval_rate": 0.0, "total_rate": 17.73, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1070000000, "prompt_eval_duration": 0, "total_duration": 1070000000} +{"timestamp": "2026-05-04T12:17:52.9209199Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.55, "prompt_eval_rate": 0.0, "total_rate": 8.55, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5610000000, "prompt_eval_duration": 0, "total_duration": 5610000000} +{"timestamp": "2026-05-04T12:18:24.7812117Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.87, "prompt_eval_rate": 0.0, "total_rate": 15.87, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000} +{"timestamp": "2026-05-04T12:18:43.6490300Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.83, "prompt_eval_rate": 0.0, "total_rate": 17.83, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000} +{"timestamp": "2026-05-04T12:20:16.6547715Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.77, "prompt_eval_rate": 0.0, "total_rate": 30.77, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000} +{"timestamp": "2026-05-04T12:20:21.9810122Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 33.31, "prompt_eval_rate": 0.0, "total_rate": 33.31, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000} +{"timestamp": "2026-05-04T12:20:29.3048624Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 30.49, "prompt_eval_rate": 0.0, "total_rate": 30.49, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 720000000, "prompt_eval_duration": 0, "total_duration": 720000000} +{"timestamp": "2026-05-04T12:20:35.0154804Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 40.77, "prompt_eval_rate": 0.0, "total_rate": 40.77, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000} +{"timestamp": "2026-05-04T12:20:42.2659296Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 35.66, "prompt_eval_rate": 0.0, "total_rate": 35.66, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 620000000, "prompt_eval_duration": 0, "total_duration": 620000000} +{"timestamp": "2026-05-04T12:21:50.0194380Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.14, "prompt_eval_rate": 0.0, "total_rate": 7.14, "eval_count": 156, "prompt_eval_count": 0, "total_count": 156, "eval_duration": 21860000000, "prompt_eval_duration": 0, "total_duration": 21860000000} +{"timestamp": "2026-05-04T12:22:50.8726674Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.01, "prompt_eval_rate": 0.0, "total_rate": 8.01, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6000000000, "prompt_eval_duration": 0, "total_duration": 6000000000} +{"timestamp": "2026-05-04T12:23:04.2490589Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.92, "prompt_eval_rate": 0.0, "total_rate": 16.92, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000} +{"timestamp": "2026-05-04T12:25:09.6454017Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 4.65, "prompt_eval_rate": 0.0, "total_rate": 4.65, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 110030000000, "prompt_eval_duration": 0, "total_duration": 110030000000} +{"timestamp": "2026-05-04T12:25:48.0000402Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.68, "prompt_eval_rate": 0.0, "total_rate": 15.68, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000} +{"timestamp": "2026-05-04T12:25:56.3108580Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.67, "prompt_eval_rate": 0.0, "total_rate": 15.67, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000} +{"timestamp": "2026-05-04T12:26:04.7426553Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 32.16, "prompt_eval_rate": 0.0, "total_rate": 32.16, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 620000000, "prompt_eval_duration": 0, "total_duration": 620000000} +{"timestamp": "2026-05-04T12:26:21.6057067Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.48, "prompt_eval_rate": 0.0, "total_rate": 14.48, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 7940000000, "prompt_eval_duration": 0, "total_duration": 7940000000} +{"timestamp": "2026-05-04T12:26:45.4045892Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.19, "prompt_eval_rate": 0.0, "total_rate": 14.19, "eval_count": 13, "prompt_eval_count": 0, "total_count": 13, "eval_duration": 920000000, "prompt_eval_duration": 0, "total_duration": 920000000} +{"timestamp": "2026-05-04T12:26:58.6663276Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 20.96, "prompt_eval_rate": 0.0, "total_rate": 20.96, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000} +{"timestamp": "2026-05-04T12:27:33.1874529Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.62, "prompt_eval_rate": 0.0, "total_rate": 8.62, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5570000000, "prompt_eval_duration": 0, "total_duration": 5570000000} +{"timestamp": "2026-05-04T12:27:56.7492385Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.51, "prompt_eval_rate": 0.0, "total_rate": 16.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000} +{"timestamp": "2026-05-04T12:28:13.8129306Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.94, "prompt_eval_rate": 0.0, "total_rate": 15.94, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000} +{"timestamp": "2026-05-06T11:18:20.8850298Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 33.74, "prompt_eval_rate": 0.0, "total_rate": 33.74, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 680000000, "prompt_eval_duration": 0, "total_duration": 680000000} +{"timestamp": "2026-05-06T11:18:30.4028640Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 35.3, "prompt_eval_rate": 39.16, "total_rate": 36.4, "eval_count": 27, "prompt_eval_count": 12, "total_count": 39, "eval_duration": 760000000, "prompt_eval_duration": 310000000, "total_duration": 1070000000} +{"timestamp": "2026-05-06T11:18:38.1428213Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 17.83, "prompt_eval_rate": 44.43, "total_rate": 34.22, "eval_count": 3, "prompt_eval_count": 12, "total_count": 15, "eval_duration": 170000000, "prompt_eval_duration": 270000000, "total_duration": 440000000} +{"timestamp": "2026-05-06T11:19:54.8336127Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.82, "prompt_eval_rate": 0.0, "total_rate": 11.82, "eval_count": 124, "prompt_eval_count": 0, "total_count": 124, "eval_duration": 10490000000, "prompt_eval_duration": 0, "total_duration": 10490000000} +{"timestamp": "2026-05-06T11:21:14.2633753Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.85, "prompt_eval_rate": 0.0, "total_rate": 8.85, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5200000000, "prompt_eval_duration": 0, "total_duration": 5200000000} +{"timestamp": "2026-05-06T11:21:37.7443900Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.15, "prompt_eval_rate": 0.0, "total_rate": 19.15, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000} +{"timestamp": "2026-05-06T11:22:19.7584207Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.09, "prompt_eval_rate": 0.0, "total_rate": 14.09, "eval_count": 25, "prompt_eval_count": 0, "total_count": 25, "eval_duration": 1770000000, "prompt_eval_duration": 0, "total_duration": 1770000000} +{"timestamp": "2026-05-06T11:23:09.5080876Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.67, "prompt_eval_rate": 0.0, "total_rate": 16.67, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1200000000, "prompt_eval_duration": 0, "total_duration": 1200000000} +{"timestamp": "2026-05-06T11:23:57.1947828Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.52, "prompt_eval_rate": 0.0, "total_rate": 18.52, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000} +{"timestamp": "2026-05-06T11:24:10.2080614Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.78, "prompt_eval_rate": 0.0, "total_rate": 29.78, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 500000000, "prompt_eval_duration": 0, "total_duration": 500000000} +{"timestamp": "2026-05-06T11:24:24.2488086Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.28, "prompt_eval_rate": 0.0, "total_rate": 29.28, "eval_count": 112, "prompt_eval_count": 0, "total_count": 112, "eval_duration": 3830000000, "prompt_eval_duration": 0, "total_duration": 3830000000} +{"timestamp": "2026-05-06T11:24:53.6566294Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.47, "prompt_eval_rate": 0.0, "total_rate": 11.47, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000} +{"timestamp": "2026-05-06T11:25:11.8539184Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.33, "prompt_eval_rate": 0.0, "total_rate": 18.33, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000} +{"timestamp": "2026-05-06T11:25:54.3531897Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.39, "prompt_eval_rate": 0.0, "total_rate": 8.39, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5720000000, "prompt_eval_duration": 0, "total_duration": 5720000000} +{"timestamp": "2026-05-06T11:26:33.5663299Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.33, "prompt_eval_rate": 0.0, "total_rate": 15.33, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000} +{"timestamp": "2026-05-06T11:26:53.0868149Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.41, "prompt_eval_rate": 0.0, "total_rate": 16.41, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1220000000, "prompt_eval_duration": 0, "total_duration": 1220000000} +{"timestamp": "2026-05-06T11:09:15.4770696Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.75, "prompt_eval_rate": 0.0, "total_rate": 30.75, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16649999999, "prompt_eval_duration": 0, "total_duration": 16649999999} +{"timestamp": "2026-05-06T11:09:37.0148114Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 30.6, "prompt_eval_rate": 37.08, "total_rate": 30.73, "eval_count": 500, "prompt_eval_count": 12, "total_count": 512, "eval_duration": 16340000000, "prompt_eval_duration": 320000000, "total_duration": 16660000000} +{"timestamp": "2026-05-06T11:09:44.7350508Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 32.1, "prompt_eval_rate": 36.14, "total_rate": 33.31, "eval_count": 25, "prompt_eval_count": 12, "total_count": 37, "eval_duration": 780000000, "prompt_eval_duration": 330000000, "total_duration": 1110000000} +{"timestamp": "2026-05-06T11:09:50.2611626Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 35.93, "prompt_eval_rate": 48.35, "total_rate": 39.9, "eval_count": 19, "prompt_eval_count": 12, "total_count": 31, "eval_duration": 530000000, "prompt_eval_duration": 250000000, "total_duration": 780000000} +{"timestamp": "2026-05-06T11:10:00.3706462Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 34.4, "prompt_eval_rate": 36.63, "total_rate": 35.58, "eval_count": 10, "prompt_eval_count": 12, "total_count": 22, "eval_duration": 290000000, "prompt_eval_duration": 330000000, "total_duration": 620000000} +{"timestamp": "2026-05-06T11:10:40.6784157Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.91, "prompt_eval_rate": 0.0, "total_rate": 7.91, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 15550000000, "prompt_eval_duration": 0, "total_duration": 15550000000} +{"timestamp": "2026-05-06T11:11:12.6627139Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.59, "prompt_eval_rate": 0.0, "total_rate": 7.59, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6060000000, "prompt_eval_duration": 0, "total_duration": 6060000000} +{"timestamp": "2026-05-06T11:11:34.7510955Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.35, "prompt_eval_rate": 0.0, "total_rate": 15.35, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000} +{"timestamp": "2026-05-06T11:12:16.1973421Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.38, "prompt_eval_rate": 0.0, "total_rate": 10.38, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1440000000, "prompt_eval_duration": 0, "total_duration": 1440000000} +{"timestamp": "2026-05-06T11:13:03.3584541Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.03, "prompt_eval_rate": 0.0, "total_rate": 15.03, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1330000000, "prompt_eval_duration": 0, "total_duration": 1330000000} +{"timestamp": "2026-05-06T11:13:36.3050454Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.7, "prompt_eval_rate": 0.0, "total_rate": 14.7, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1360000000, "prompt_eval_duration": 0, "total_duration": 1360000000} +{"timestamp": "2026-05-06T11:13:44.4973845Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.06, "prompt_eval_rate": 0.0, "total_rate": 33.06, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 670000000, "prompt_eval_duration": 0, "total_duration": 670000000} +{"timestamp": "2026-05-06T11:14:05.6560088Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 13.9, "prompt_eval_rate": 0.0, "total_rate": 13.9, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 8850000000, "prompt_eval_duration": 0, "total_duration": 8850000000} +{"timestamp": "2026-05-06T11:14:41.9066315Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.11, "prompt_eval_rate": 0.0, "total_rate": 11.11, "eval_count": 107, "prompt_eval_count": 0, "total_count": 107, "eval_duration": 9630000000, "prompt_eval_duration": 0, "total_duration": 9630000000} +{"timestamp": "2026-05-06T11:14:59.2856984Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.18, "prompt_eval_rate": 0.0, "total_rate": 18.18, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1050000000, "prompt_eval_duration": 0, "total_duration": 1050000000} +{"timestamp": "2026-05-06T11:15:40.1511897Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.16, "prompt_eval_rate": 0.0, "total_rate": 8.16, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5880000000, "prompt_eval_duration": 0, "total_duration": 5880000000} +{"timestamp": "2026-05-06T11:16:11.8078059Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.89, "prompt_eval_rate": 0.0, "total_rate": 16.89, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1180000000, "prompt_eval_duration": 0, "total_duration": 1180000000} +{"timestamp": "2026-05-06T11:16:30.5879933Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.56, "prompt_eval_rate": 0.0, "total_rate": 15.56, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000} diff --git a/scripts/process_metrics.py b/scripts/process_metrics.py new file mode 100644 index 00000000..98242668 --- /dev/null +++ b/scripts/process_metrics.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Append benchmark JSON metrics to the performance history JSONL file. + +Scans --metrics-dir recursively for *.json files. Each metrics file must have a +companion *.meta.json sidecar (same stem) written by the CI step that produced it. +The sidecar schema is open-ended — this script does not assume any fixed fields. + +Usage: + python3 scripts/process_metrics.py \\ + --metrics-dir /path/to/artifacts \\ + --commit $GITHUB_SHA --branch main \\ + --run-id $GITHUB_RUN_ID --run-number $GITHUB_RUN_NUMBER \\ + --run-attempt 1 --workflow "GPULlama3 Build & Run" \\ + --history docs/perf-history.jsonl +""" + +import argparse +import json +import sys +from datetime import datetime, timezone +from pathlib import Path + + +def parse_args(): + p = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument("--metrics-dir", required=True, dest="metrics_dir", + help="Directory to search recursively for *.json + *.meta.json pairs") + p.add_argument("--history", required=True, + help="JSONL history file to append rows to") + p.add_argument("--commit", required=True) + p.add_argument("--branch", required=True) + p.add_argument("--run-id", required=True, dest="run_id") + p.add_argument("--run-number", default="", dest="run_number") + p.add_argument("--run-attempt", required=True, dest="run_attempt") + p.add_argument("--workflow", required=True) + return p.parse_args() + + +def load_json(path): + try: + with open(path) as f: + return json.load(f) + except (json.JSONDecodeError, OSError) as e: + print(f"ERROR: {path}: {e}", file=sys.stderr) + return None + + +def discover_pairs(metrics_dir): + """Yield (metrics_path, meta_path) for every non-sidecar JSON found recursively.""" + for path in sorted(Path(metrics_dir).rglob("*.json")): + if path.name.endswith(".meta.json"): + continue + yield path, path.with_suffix(".meta.json") + + +def build_row(m, meta, args): + return { + "timestamp": datetime.now(timezone.utc).isoformat(), + "commit": args.commit, + "short_commit": args.commit[:8], + "branch": args.branch, + "run_id": args.run_id, + "run_number": args.run_number or "", + "run_attempt": args.run_attempt, + "workflow": args.workflow, + # Flat compat fields — sourced from sidecar; null when absent + "backend": meta.get("backend"), + "model": meta.get("model"), + "quantization": meta.get("quantization"), + "configuration": meta.get("configuration"), + # Key metrics promoted to top level — null when absent in the metrics file + "eval_rate": m.get("eval_rate"), + "prompt_eval_rate": m.get("prompt_eval_rate"), + "total_rate": m.get("total_rate"), + "eval_count": m.get("eval_count"), + "prompt_eval_count": m.get("prompt_eval_count"), + "total_count": m.get("total_count"), + "total_duration": m.get("total_duration"), + "load_duration": m.get("load_duration"), + "prompt_eval_duration": m.get("prompt_eval_duration"), + "eval_duration": m.get("eval_duration"), + "has_prefill_phase": m.get("has_prefill_phase"), + "tornadovm": m.get("tornadovm"), + # Nested full objects — open-ended; schema is whatever the benchmark step writes + "benchmark": meta, + "metrics": m, + } + + +def main(): + args = parse_args() + rows = [] + + for metrics_path, meta_path in discover_pairs(args.metrics_dir): + m = load_json(metrics_path) + if not isinstance(m, dict): + print(f"WARNING: {metrics_path.name}: not a JSON object, skipping", file=sys.stderr) + continue + + if not meta_path.exists(): + print(f"WARNING: no sidecar for {metrics_path.name}, skipping", file=sys.stderr) + continue + meta = load_json(meta_path) + if not isinstance(meta, dict): + print(f"WARNING: {meta_path.name}: not a JSON object, skipping", file=sys.stderr) + continue + + rows.append(build_row(m, meta, args)) + label = " / ".join(filter(None, [ + meta.get("backend"), + meta.get("model"), + meta.get("quantization"), + meta.get("configuration") or meta.get("task"), + ])) + print(f" {label or metrics_path.name}", file=sys.stderr) + + if not rows: + print("WARNING: no metrics loaded, nothing written", file=sys.stderr) + return + + history = Path(args.history) + history.parent.mkdir(parents=True, exist_ok=True) + with open(history, "a") as f: + for row in rows: + f.write(json.dumps(row) + "\n") + print(f"Appended {len(rows)} row(s) to {history}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/scripts/write_metrics_sidecar.py b/scripts/write_metrics_sidecar.py new file mode 100644 index 00000000..55e92920 --- /dev/null +++ b/scripts/write_metrics_sidecar.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +Write a benchmark metadata sidecar JSON file from KEY=VALUE arguments. + +Values are stored as strings unless they parse as a valid JSON literal +(true, false, null, or a number), making it easy to pass typed metadata +from shell without quoting gymnastics. + +Usage: + python3 scripts/write_metrics_sidecar.py --out /path/to/file.meta.json \ + backend=opencl \ + task=llama-inference \ + model_file=Llama-3.2-1B-Instruct-F16.gguf \ + configuration=standard \ + flags="" \ + prompt="Say hello" +""" + +import argparse +import json +import sys +from pathlib import Path + + +def coerce(value): + """Return value as a JSON-native type when unambiguous, otherwise keep as string.""" + try: + return json.loads(value) + except (json.JSONDecodeError, ValueError): + return value + + +def parse_args(): + p = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + p.add_argument("--out", required=True, help="Output .meta.json path") + p.add_argument("fields", nargs="*", metavar="KEY=VALUE") + return p.parse_args() + + +def main(): + args = parse_args() + meta = {} + for field in args.fields: + if "=" not in field: + print(f"ERROR: expected KEY=VALUE, got: {field!r}", file=sys.stderr) + sys.exit(1) + key, _, value = field.partition("=") + meta[key] = coerce(value) + + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + with open(out, "w") as f: + json.dump(meta, f) + print(f"Wrote sidecar: {out}", file=sys.stderr) + + +if __name__ == "__main__": + main()