diff --git a/.github/workflows/build-and-run.yml b/.github/workflows/build-and-run.yml
index 41e6d6c3..a17c9228 100644
--- a/.github/workflows/build-and-run.yml
+++ b/.github/workflows/build-and-run.yml
@@ -4,7 +4,7 @@ on:
push:
branches: [ main ]
pull_request:
- branches: [ main ]
+ branches: [ main ]
types: [opened, synchronize, reopened]
env:
@@ -13,6 +13,8 @@ env:
LLAMA_ROOT: ${{ github.workspace }}
GRAAL_JARS: /opt/graalJars
MODELS_DIR: /opt/models
+ # History file committed back to the repo on push to main
+ PERF_HISTORY_FILE: docs/perf-history.jsonl
jobs:
code-quality:
@@ -51,20 +53,22 @@ jobs:
git clone --depth 1 --branch master \
https://github.com/beehive-lab/TornadoVM.git \
$TORNADO_ROOT
+
- name: Set up Python venv for TornadoVM
run: |
python3 -m venv $TORNADO_ROOT/venv
source $TORNADO_ROOT/venv/bin/activate
python --version
+
- name: Build TornadoVM
run: |
cd $TORNADO_ROOT
mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/
source venv/bin/activate
echo "=== Building TornadoVM ==="
-
+
make BACKEND=${{ matrix.backend.name }}
-
+
echo "=== Searching for TornadoVM SDK directory ==="
SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1)
if [ -z "$SDK_DIR" ]; then
@@ -74,7 +78,7 @@ jobs:
fi
FULL_SDK="${PWD}/${SDK_DIR}"
echo "Detected TornadoVM SDK: $FULL_SDK"
-
+
# Export for current shell session
export TORNADOVM_HOME="$FULL_SDK"
export PATH="$FULL_SDK/bin:$JAVA_HOME/bin:$PATH"
@@ -82,10 +86,11 @@ jobs:
# Save for subsequent steps
echo "TORNADOVM_HOME=$FULL_SDK" >> $GITHUB_ENV
echo "PATH=$PATH" >> $GITHUB_ENV
-
+
echo "=== Checking tornado CLI ==="
which tornado || { echo "::error::tornado not in PATH"; exit 1; }
tornado --devices
+
- name: Build GPULlama3.java
run: |
cd ${{ github.workspace }}
@@ -93,14 +98,36 @@ jobs:
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
tornado --version
./mvnw clean package -DskipTests
+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Standard
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Llama-3.2-1B-Instruct-F16.gguf \
+ model=Llama-3.2-1B-Instruct \
+ quantization=F16 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
@@ -108,7 +135,23 @@ jobs:
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
--prompt "Say hello" \
--with-prefill-decode
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-prefill-decode.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Llama-3.2-1B-Instruct-F16.gguf \
+ model=Llama-3.2-1B-Instruct \
+ quantization=F16 \
+ configuration=prefill-decode \
+ "flags=--with-prefill-decode" \
+ prompt="Say hello"
+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
@@ -116,8 +159,25 @@ jobs:
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
--prompt "Say hello" \
--with-prefill-decode --batch-prefill-size 32
- - name: PTX- FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-f16-batch-prefill-decode.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Llama-3.2-1B-Instruct-F16.gguf \
+ model=Llama-3.2-1B-Instruct \
+ quantization=F16 \
+ configuration=batch-prefill-decode \
+ "flags=--with-prefill-decode --batch-prefill-size 32" \
+ prompt="Say hello"
+
+ # ── PTX-only: CUDA-graph variants ────────────────────────────────────────
+ - name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Prefill-Decode-CUDA-Graphs
if: matrix.backend.name == 'ptx'
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
@@ -126,8 +186,24 @@ jobs:
--prompt "Say hello" \
--with-prefill-decode \
--cuda-graphs
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-prefill-decode-cuda-graphs.meta.json" \
+ backend=ptx \
+ task=llama-inference \
+ model_file=Llama-3.2-1B-Instruct-F16.gguf \
+ model=Llama-3.2-1B-Instruct \
+ quantization=F16 \
+ configuration=prefill-decode-cuda-graphs \
+ "flags=--with-prefill-decode --cuda-graphs" \
+ prompt="Say hello"
+
- name: PTX - FP16 - Run Llama-3.2-1B-Instruct-F16.gguf - Batch-Prefill-Decode-CUDA-Graphs
if: matrix.backend.name == 'ptx'
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
@@ -136,95 +212,371 @@ jobs:
--prompt "Say hello" \
--with-prefill-decode --batch-prefill-size 32 \
--cuda-graphs
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-ptx-llama-1b-f16-batch-prefill-decode-cuda-graphs.meta.json" \
+ backend=ptx \
+ task=llama-inference \
+ model_file=Llama-3.2-1B-Instruct-F16.gguf \
+ model=Llama-3.2-1B-Instruct \
+ quantization=F16 \
+ configuration=batch-prefill-decode-cuda-graphs \
+ "flags=--with-prefill-decode --batch-prefill-size 32 --cuda-graphs" \
+ prompt="Say hello"
+
+ # ── Additional models — standard inference, all backends ─────────────────
- name: FP16 - Run Qwen3-4B-f16.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Qwen3-4B-f16.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-4b-f16-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Qwen3-4B-f16.gguf \
+ model=Qwen3-4B \
+ quantization=F16 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-fp16-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Mistral-7B-Instruct-v0.3.fp16.gguf \
+ model=Mistral-7B-Instruct-v0.3 \
+ quantization=F16 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-fp16-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=qwen2.5-1.5b-instruct-fp16.gguf \
+ model=Qwen2.5-1.5B-Instruct \
+ quantization=F16 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
- --model /$MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
+ --model $MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-fp16-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Phi-3-mini-4k-instruct-fp16.gguf \
+ model=Phi-3-mini-4k-instruct \
+ quantization=F16 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: FP16 - Run Granite-3.2-2b-instruct-f16.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
- --model /$MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \
+ --model $MODELS_DIR/granite-3.2-2b-instruct-f16.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-f16-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=granite-3.2-2b-instruct-f16.gguf \
+ model=Granite-3.2-2B-Instruct \
+ quantization=F16 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: FP16 - Run Granite-4.0-1b-F16.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
- --model /$MODELS_DIR/granite-4.0-1b-F16.gguf \
+ --model $MODELS_DIR/granite-4.0-1b-F16.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-f16-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=granite-4.0-1b-F16.gguf \
+ model=Granite-4.0-1B \
+ quantization=F16 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-llama-1b-q8-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Llama-3.2-1B-Instruct-Q8_0.gguf \
+ model=Llama-3.2-1B-Instruct \
+ quantization=Q8_0 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: Q8 - Run Qwen3-0.6B-Q8_0.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen3-0-6b-q8-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Qwen3-0.6B-Q8_0.gguf \
+ model=Qwen3-0.6B \
+ quantization=Q8_0 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-phi3-mini-q8-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Phi-3-mini-4k-instruct-Q8_0.gguf \
+ model=Phi-3-mini-4k-instruct \
+ quantization=Q8_0 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-qwen2-5-1-5b-q8-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=qwen2.5-1.5b-instruct-q8_0.gguf \
+ model=Qwen2.5-1.5B-Instruct \
+ quantization=Q8_0 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
--prompt "Say hello"
- - name: Q8 - Run Granite-3.2-2b-instruct-Q8.gguf
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-mistral-7b-q8-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=Mistral-7B-Instruct-v0.3.Q8_0.gguf \
+ model=Mistral-7B-Instruct-v0.3 \
+ quantization=Q8_0 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
+ - name: Q8 - Run Granite-3.2-2b-instruct-Q8_0.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
- --model /$MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \
+ --model $MODELS_DIR/granite-3.2-2b-instruct-Q8_0.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-3-2-2b-q8-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=granite-3.2-2b-instruct-Q8_0.gguf \
+ model=Granite-3.2-2B-Instruct \
+ quantization=Q8_0 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
- name: Q8 - Run Granite-4.0-1b-Q8_0.gguf
+ env:
+ JAVA_TOOL_OPTIONS: >-
+ -Dllama.metrics.format=json
+ -Dllama.metrics.output=file
+ -Dllama.metrics.file=${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.json
run: |
cd ${{ github.workspace }}
export PATH="$TORNADOVM_HOME/bin:$JAVA_HOME/bin:$PATH"
./llama-tornado --gpu --${{ matrix.backend.name }} \
- --model /$MODELS_DIR/granite-4.0-1b-Q8_0.gguf \
+ --model $MODELS_DIR/granite-4.0-1b-Q8_0.gguf \
--prompt "Say hello"
+ python3 scripts/write_metrics_sidecar.py \
+ --out "${{ runner.temp }}/metrics-${{ matrix.backend.name }}-granite-4-0-1b-q8-standard.meta.json" \
+ backend="${{ matrix.backend.name }}" \
+ task=llama-inference \
+ model_file=granite-4.0-1b-Q8_0.gguf \
+ model=Granite-4.0-1B \
+ quantization=Q8_0 \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+
+ # ── Upload metrics for the publish job ────────────────────────────────────
+ - name: Upload metrics artifacts
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: metrics-${{ matrix.backend.name }}-${{ github.run_id }}
+ path: ${{ runner.temp }}/metrics-${{ matrix.backend.name }}-*.json
+ if-no-files-found: warn
+
+ # ── Separate job: collect all matrix metrics and update history ───────────────
+ publish-performance-history:
+ # Guard: only commit history on real pushes to main, not on PRs or forks.
+ # Prevents duplicate entries from PR runs and avoids push-permission errors on forks.
+ if: >-
+ github.repository == 'beehive-lab/GPULlama3.java' &&
+ github.event_name == 'push' &&
+ github.ref == 'refs/heads/main'
+ runs-on: [self-hosted]
+ needs: build-and-run
+ timeout-minutes: 15
+
+ steps:
+ - name: Checkout GPULlama3
+ uses: actions/checkout@v4
+ - name: Download metrics artifacts
+ uses: actions/download-artifact@v4
+ with:
+ pattern: metrics-*-${{ github.run_id }}
+ path: ${{ runner.temp }}/metrics-artifacts
+ merge-multiple: true
+
+ - name: Append to performance history
+ run: |
+ python3 scripts/process_metrics.py \
+ --metrics-dir "${{ runner.temp }}/metrics-artifacts" \
+ --commit "${{ github.sha }}" \
+ --branch "${{ github.ref_name }}" \
+ --run-id "${{ github.run_id }}" \
+ --run-number "${{ github.run_number }}" \
+ --run-attempt "${{ github.run_attempt }}" \
+ --workflow "${{ github.workflow }}" \
+ --history "$PERF_HISTORY_FILE"
+
+ - name: Commit performance history
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git add "$PERF_HISTORY_FILE"
+ git diff --cached --quiet && echo "No history changes to commit" && exit 0
+ git commit -m "perf: record run #${{ github.run_number }} @ ${GITHUB_SHA::8}"
+ for attempt in 1 2 3; do
+ git pull --rebase origin main && git push && break || {
+ [ $attempt -lt 3 ] && { echo "Attempt $attempt failed, retrying in $((attempt * 5))s..."; sleep $((attempt * 5)); } \
+ || { echo "::error::Failed to push after 3 attempts"; exit 1; }
+ }
+ done
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 00000000..cdc2e32b
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,268 @@
+
+
+
+
+
+ GPULlama3 Performance History
+
+
+
+
+ GPULlama3 — Performance History
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ No performance history available yet.
+
+
+
+
diff --git a/docs/perf-history.jsonl b/docs/perf-history.jsonl
new file mode 100644
index 00000000..93011f25
--- /dev/null
+++ b/docs/perf-history.jsonl
@@ -0,0 +1,298 @@
+{"timestamp": "2026-02-25T13:02:33.4265625Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.51, "prompt_eval_rate": 0.0, "total_rate": 31.51, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 700000000, "prompt_eval_duration": 0, "total_duration": 700000000}
+{"timestamp": "2026-02-25T13:04:03.7604236Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.6, "prompt_eval_rate": 0.0, "total_rate": 14.6, "eval_count": 131, "prompt_eval_count": 0, "total_count": 131, "eval_duration": 8970000000, "prompt_eval_duration": 0, "total_duration": 8970000000}
+{"timestamp": "2026-02-25T13:06:03.0069983Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.61, "prompt_eval_rate": 0.0, "total_rate": 7.61, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6050000000, "prompt_eval_duration": 0, "total_duration": 6050000000}
+{"timestamp": "2026-02-25T13:06:40.6181214Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.03, "prompt_eval_rate": 0.0, "total_rate": 14.03, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 780000000, "prompt_eval_duration": 0, "total_duration": 780000000}
+{"timestamp": "2026-02-25T13:07:44.6833071Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.69, "prompt_eval_rate": 0.0, "total_rate": 12.69, "eval_count": 71, "prompt_eval_count": 0, "total_count": 71, "eval_duration": 5590000000, "prompt_eval_duration": 0, "total_duration": 5590000000}
+{"timestamp": "2026-02-25T13:08:49.0141617Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.74, "prompt_eval_rate": 0.0, "total_rate": 16.74, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000}
+{"timestamp": "2026-02-25T13:09:39.0976245Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.42, "prompt_eval_rate": 0.0, "total_rate": 18.42, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000}
+{"timestamp": "2026-02-25T13:09:58.5864455Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 35.9, "prompt_eval_rate": 0.0, "total_rate": 35.9, "eval_count": 49, "prompt_eval_count": 0, "total_count": 49, "eval_duration": 1370000000, "prompt_eval_duration": 0, "total_duration": 1370000000}
+{"timestamp": "2026-02-25T13:10:23.1699553Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 28.81, "prompt_eval_rate": 0.0, "total_rate": 28.81, "eval_count": 107, "prompt_eval_count": 0, "total_count": 107, "eval_duration": 3710000000, "prompt_eval_duration": 0, "total_duration": 3710000000}
+{"timestamp": "2026-02-25T13:11:04.1905737Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.32, "prompt_eval_rate": 0.0, "total_rate": 12.32, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1220000000, "prompt_eval_duration": 0, "total_duration": 1220000000}
+{"timestamp": "2026-02-25T13:11:32.6446371Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.14, "prompt_eval_rate": 0.0, "total_rate": 18.14, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1050000000, "prompt_eval_duration": 0, "total_duration": 1050000000}
+{"timestamp": "2026-02-25T13:12:33.1492263Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 7.37, "prompt_eval_rate": 0.0, "total_rate": 7.37, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6510000000, "prompt_eval_duration": 0, "total_duration": 6510000000}
+{"timestamp": "2026-02-25T13:13:17.4567596Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.24, "prompt_eval_rate": 0.0, "total_rate": 16.24, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1230000000, "prompt_eval_duration": 0, "total_duration": 1230000000}
+{"timestamp": "2026-02-25T13:13:54.4757130Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.17, "prompt_eval_rate": 0.0, "total_rate": 19.17, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000}
+{"timestamp": "2026-02-25T12:48:55.9103978Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.69, "prompt_eval_rate": 0.0, "total_rate": 31.69, "eval_count": 33, "prompt_eval_count": 0, "total_count": 33, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000}
+{"timestamp": "2026-02-25T12:50:47.7822546Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 5.83, "prompt_eval_rate": 0.0, "total_rate": 5.83, "eval_count": 229, "prompt_eval_count": 0, "total_count": 229, "eval_duration": 39260000000, "prompt_eval_duration": 0, "total_duration": 39260000000}
+{"timestamp": "2026-02-25T12:52:46.9423058Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.08, "prompt_eval_rate": 0.0, "total_rate": 7.08, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6780000000, "prompt_eval_duration": 0, "total_duration": 6780000000}
+{"timestamp": "2026-02-25T12:53:24.3415702Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.71, "prompt_eval_rate": 0.0, "total_rate": 11.71, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1620000000, "prompt_eval_duration": 0, "total_duration": 1620000000}
+{"timestamp": "2026-02-25T12:54:26.9375923Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.14, "prompt_eval_rate": 0.0, "total_rate": 10.14, "eval_count": 18, "prompt_eval_count": 0, "total_count": 18, "eval_duration": 1780000000, "prompt_eval_duration": 0, "total_duration": 1780000000}
+{"timestamp": "2026-02-25T12:55:23.6364082Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.01, "prompt_eval_rate": 0.0, "total_rate": 14.01, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1430000000, "prompt_eval_duration": 0, "total_duration": 1430000000}
+{"timestamp": "2026-02-25T12:56:03.6760343Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.28, "prompt_eval_rate": 0.0, "total_rate": 15.28, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000}
+{"timestamp": "2026-02-25T12:56:21.1566819Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 34.02, "prompt_eval_rate": 0.0, "total_rate": 34.02, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1090000000, "prompt_eval_duration": 0, "total_duration": 1090000000}
+{"timestamp": "2026-02-25T12:56:42.3970318Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.12, "prompt_eval_rate": 0.0, "total_rate": 14.12, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 8080000000, "prompt_eval_duration": 0, "total_duration": 8080000000}
+{"timestamp": "2026-02-25T12:57:19.3770422Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.73, "prompt_eval_rate": 0.0, "total_rate": 12.73, "eval_count": 45, "prompt_eval_count": 0, "total_count": 45, "eval_duration": 3540000000, "prompt_eval_duration": 0, "total_duration": 3540000000}
+{"timestamp": "2026-02-25T12:57:41.8384742Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.46, "prompt_eval_rate": 0.0, "total_rate": 15.46, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1230000000, "prompt_eval_duration": 0, "total_duration": 1230000000}
+{"timestamp": "2026-02-25T12:58:41.9804243Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.08, "prompt_eval_rate": 0.0, "total_rate": 8.08, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5940000000, "prompt_eval_duration": 0, "total_duration": 5940000000}
+{"timestamp": "2026-02-25T12:59:19.3721089Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.82, "prompt_eval_rate": 0.0, "total_rate": 15.82, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000}
+{"timestamp": "2026-02-25T12:59:46.6532904Z", "commit": "3059800b13e899ae26a4f4876e104db8c9f48cd5", "short_commit": "3059800b", "branch": "pr-97", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.15, "prompt_eval_rate": 0.0, "total_rate": 16.15, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000}
+{"timestamp": "2026-03-11T14:37:50.3727490Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 34.55, "prompt_eval_rate": 0.0, "total_rate": 34.55, "eval_count": 29, "prompt_eval_count": 0, "total_count": 29, "eval_duration": 840000000, "prompt_eval_duration": 0, "total_duration": 840000000}
+{"timestamp": "2026-03-11T14:38:27.6168660Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.65, "prompt_eval_rate": 0.0, "total_rate": 14.65, "eval_count": 131, "prompt_eval_count": 0, "total_count": 131, "eval_duration": 8940000000, "prompt_eval_duration": 0, "total_duration": 8940000000}
+{"timestamp": "2026-03-11T14:38:48.7062204Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.27, "prompt_eval_rate": 0.0, "total_rate": 9.27, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5180000000, "prompt_eval_duration": 0, "total_duration": 5180000000}
+{"timestamp": "2026-03-11T14:38:55.8504332Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 22.39, "prompt_eval_rate": 0.0, "total_rate": 22.39, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 850000000, "prompt_eval_duration": 0, "total_duration": 850000000}
+{"timestamp": "2026-03-11T14:39:37.6499442Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.92, "prompt_eval_rate": 0.0, "total_rate": 15.92, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 32159999999, "prompt_eval_duration": 0, "total_duration": 32159999999}
+{"timestamp": "2026-03-11T14:39:47.3172710Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.51, "prompt_eval_rate": 0.0, "total_rate": 17.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1140000000, "prompt_eval_duration": 0, "total_duration": 1140000000}
+{"timestamp": "2026-03-11T14:40:19.6581229Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.42, "prompt_eval_rate": 0.0, "total_rate": 19.42, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 570000000, "prompt_eval_duration": 0, "total_duration": 570000000}
+{"timestamp": "2026-03-11T14:40:32.2523670Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 34.22, "prompt_eval_rate": 0.0, "total_rate": 34.22, "eval_count": 35, "prompt_eval_count": 0, "total_count": 35, "eval_duration": 1020000000, "prompt_eval_duration": 0, "total_duration": 1020000000}
+{"timestamp": "2026-03-11T14:40:46.2367918Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.72, "prompt_eval_rate": 0.0, "total_rate": 30.72, "eval_count": 121, "prompt_eval_count": 0, "total_count": 121, "eval_duration": 3940000000, "prompt_eval_duration": 0, "total_duration": 3940000000}
+{"timestamp": "2026-03-11T14:41:11.4739328Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.77, "prompt_eval_rate": 0.0, "total_rate": 15.77, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 950000000, "prompt_eval_duration": 0, "total_duration": 950000000}
+{"timestamp": "2026-03-11T14:41:28.9736950Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.4, "prompt_eval_rate": 0.0, "total_rate": 17.4, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1090000000, "prompt_eval_duration": 0, "total_duration": 1090000000}
+{"timestamp": "2026-03-11T14:42:18.6426816Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 9.09, "prompt_eval_rate": 0.0, "total_rate": 9.09, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5280000000, "prompt_eval_duration": 0, "total_duration": 5280000000}
+{"timestamp": "2026-03-11T14:42:49.7737882Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.82, "prompt_eval_rate": 0.0, "total_rate": 16.82, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000}
+{"timestamp": "2026-03-11T14:43:11.2237568Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.45, "prompt_eval_rate": 0.0, "total_rate": 19.45, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1030000000, "prompt_eval_duration": 0, "total_duration": 1030000000}
+{"timestamp": "2026-03-11T14:28:32.1201281Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.61, "prompt_eval_rate": 0.0, "total_rate": 30.61, "eval_count": 18, "prompt_eval_count": 0, "total_count": 18, "eval_duration": 590000000, "prompt_eval_duration": 0, "total_duration": 590000000}
+{"timestamp": "2026-03-11T14:30:01.4683147Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.33, "prompt_eval_rate": 0.0, "total_rate": 7.33, "eval_count": 147, "prompt_eval_count": 0, "total_count": 147, "eval_duration": 20070000000, "prompt_eval_duration": 0, "total_duration": 20070000000}
+{"timestamp": "2026-03-11T14:31:55.4129847Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.56, "prompt_eval_rate": 0.0, "total_rate": 7.56, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6090000000, "prompt_eval_duration": 0, "total_duration": 6090000000}
+{"timestamp": "2026-03-11T14:32:21.0910851Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.73, "prompt_eval_rate": 0.0, "total_rate": 14.73, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000}
+{"timestamp": "2026-03-11T14:33:06.7383934Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.88, "prompt_eval_rate": 0.0, "total_rate": 9.88, "eval_count": 24, "prompt_eval_count": 0, "total_count": 24, "eval_duration": 2430000000, "prompt_eval_duration": 0, "total_duration": 2430000000}
+{"timestamp": "2026-03-11T14:33:36.5002447Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.3, "prompt_eval_rate": 0.0, "total_rate": 13.3, "eval_count": 33, "prompt_eval_count": 0, "total_count": 33, "eval_duration": 2480000000, "prompt_eval_duration": 0, "total_duration": 2480000000}
+{"timestamp": "2026-03-11T14:33:45.0985750Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.06, "prompt_eval_rate": 0.0, "total_rate": 15.06, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1330000000, "prompt_eval_duration": 0, "total_duration": 1330000000}
+{"timestamp": "2026-03-11T14:33:50.9317339Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.92, "prompt_eval_rate": 0.0, "total_rate": 30.92, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000}
+{"timestamp": "2026-03-11T14:34:04.2851760Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.72, "prompt_eval_rate": 0.0, "total_rate": 14.72, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 7740000000, "prompt_eval_duration": 0, "total_duration": 7740000000}
+{"timestamp": "2026-03-11T14:34:21.5045809Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.66, "prompt_eval_rate": 0.0, "total_rate": 12.66, "eval_count": 69, "prompt_eval_count": 0, "total_count": 69, "eval_duration": 5450000000, "prompt_eval_duration": 0, "total_duration": 5450000000}
+{"timestamp": "2026-03-11T14:34:33.6742781Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 21.02, "prompt_eval_rate": 0.0, "total_rate": 21.02, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 900000000, "prompt_eval_duration": 0, "total_duration": 900000000}
+{"timestamp": "2026-03-11T14:35:03.9071483Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.34, "prompt_eval_rate": 0.0, "total_rate": 8.34, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5750000000, "prompt_eval_duration": 0, "total_duration": 5750000000}
+{"timestamp": "2026-03-11T14:35:23.5037681Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.19, "prompt_eval_rate": 0.0, "total_rate": 16.19, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000}
+{"timestamp": "2026-03-11T14:35:40.4748520Z", "commit": "e3b4b5e69e0ef7990a77f2be293cfe04addefab8", "short_commit": "e3b4b5e6", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.13, "prompt_eval_rate": 0.0, "total_rate": 16.13, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000}
+{"timestamp": "2026-03-27T11:11:46.8464278Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.32, "prompt_eval_rate": 0.0, "total_rate": 31.32, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 480000000, "prompt_eval_duration": 0, "total_duration": 480000000}
+{"timestamp": "2026-03-27T11:13:18.2558727Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.4, "prompt_eval_rate": 0.0, "total_rate": 12.4, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 9270000000, "prompt_eval_duration": 0, "total_duration": 9270000000}
+{"timestamp": "2026-03-27T11:15:28.6588440Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.55, "prompt_eval_rate": 0.0, "total_rate": 8.55, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5620000000, "prompt_eval_duration": 0, "total_duration": 5620000000}
+{"timestamp": "2026-03-27T11:16:05.3132154Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.03, "prompt_eval_rate": 0.0, "total_rate": 19.03, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1000000000, "prompt_eval_duration": 0, "total_duration": 1000000000}
+{"timestamp": "2026-03-27T11:17:01.3215670Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.97, "prompt_eval_rate": 0.0, "total_rate": 12.97, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1460000000, "prompt_eval_duration": 0, "total_duration": 1460000000}
+{"timestamp": "2026-03-27T11:17:49.3334417Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.28, "prompt_eval_rate": 0.0, "total_rate": 17.28, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000}
+{"timestamp": "2026-03-27T11:19:05.4734469Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.4, "prompt_eval_rate": 0.0, "total_rate": 18.4, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 27830000000, "prompt_eval_duration": 0, "total_duration": 27830000000}
+{"timestamp": "2026-03-27T11:19:23.4233286Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.99, "prompt_eval_rate": 0.0, "total_rate": 33.99, "eval_count": 29, "prompt_eval_count": 0, "total_count": 29, "eval_duration": 850000000, "prompt_eval_duration": 0, "total_duration": 850000000}
+{"timestamp": "2026-03-27T11:19:47.7509434Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.32, "prompt_eval_rate": 0.0, "total_rate": 29.32, "eval_count": 112, "prompt_eval_count": 0, "total_count": 112, "eval_duration": 3820000000, "prompt_eval_duration": 0, "total_duration": 3820000000}
+{"timestamp": "2026-03-27T11:20:32.9738436Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 10.94, "prompt_eval_rate": 0.0, "total_rate": 10.94, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 2100000000, "prompt_eval_duration": 0, "total_duration": 2100000000}
+{"timestamp": "2026-03-27T11:21:01.8644361Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.55, "prompt_eval_rate": 0.0, "total_rate": 17.55, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000}
+{"timestamp": "2026-03-27T11:21:48.7553878Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 9.51, "prompt_eval_rate": 0.0, "total_rate": 9.51, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5050000000, "prompt_eval_duration": 0, "total_duration": 5050000000}
+{"timestamp": "2026-03-27T11:22:09.8806775Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.12, "prompt_eval_rate": 0.0, "total_rate": 17.12, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1170000000, "prompt_eval_duration": 0, "total_duration": 1170000000}
+{"timestamp": "2026-03-27T11:22:32.6596801Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.3, "prompt_eval_rate": 0.0, "total_rate": 19.3, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000}
+{"timestamp": "2026-03-27T10:59:21.2222696Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 32.72, "prompt_eval_rate": 0.0, "total_rate": 32.72, "eval_count": 312, "prompt_eval_count": 0, "total_count": 312, "eval_duration": 9540000000, "prompt_eval_duration": 0, "total_duration": 9540000000}
+{"timestamp": "2026-03-27T11:00:51.8298313Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.08, "prompt_eval_rate": 0.0, "total_rate": 7.08, "eval_count": 157, "prompt_eval_count": 0, "total_count": 157, "eval_duration": 22170000000, "prompt_eval_duration": 0, "total_duration": 22170000000}
+{"timestamp": "2026-03-27T11:02:53.6971774Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.92, "prompt_eval_rate": 0.0, "total_rate": 6.92, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6640000000, "prompt_eval_duration": 0, "total_duration": 6640000000}
+{"timestamp": "2026-03-27T11:03:21.9312020Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.9, "prompt_eval_rate": 0.0, "total_rate": 14.9, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1270000000, "prompt_eval_duration": 0, "total_duration": 1270000000}
+{"timestamp": "2026-03-27T11:04:12.1422822Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.67, "prompt_eval_rate": 0.0, "total_rate": 11.67, "eval_count": 24, "prompt_eval_count": 0, "total_count": 24, "eval_duration": 2060000000, "prompt_eval_duration": 0, "total_duration": 2060000000}
+{"timestamp": "2026-03-27T11:05:01.6786403Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.42, "prompt_eval_rate": 0.0, "total_rate": 15.42, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000}
+{"timestamp": "2026-03-27T11:05:42.5168894Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.79, "prompt_eval_rate": 0.0, "total_rate": 14.79, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1350000000, "prompt_eval_duration": 0, "total_duration": 1350000000}
+{"timestamp": "2026-03-27T11:05:58.1450126Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.96, "prompt_eval_rate": 0.0, "total_rate": 31.96, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 690000000, "prompt_eval_duration": 0, "total_duration": 690000000}
+{"timestamp": "2026-03-27T11:06:19.0563521Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 13.18, "prompt_eval_rate": 0.0, "total_rate": 13.18, "eval_count": 110, "prompt_eval_count": 0, "total_count": 110, "eval_duration": 8340000000, "prompt_eval_duration": 0, "total_duration": 8340000000}
+{"timestamp": "2026-03-27T11:06:52.3261536Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.8, "prompt_eval_rate": 0.0, "total_rate": 12.8, "eval_count": 38, "prompt_eval_count": 0, "total_count": 38, "eval_duration": 2970000000, "prompt_eval_duration": 0, "total_duration": 2970000000}
+{"timestamp": "2026-03-27T11:07:15.0983514Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.38, "prompt_eval_rate": 0.0, "total_rate": 16.38, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000}
+{"timestamp": "2026-03-27T11:08:19.7915544Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 7.34, "prompt_eval_rate": 0.0, "total_rate": 7.34, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6540000000, "prompt_eval_duration": 0, "total_duration": 6540000000}
+{"timestamp": "2026-03-27T11:08:57.6893685Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.65, "prompt_eval_rate": 0.0, "total_rate": 15.65, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000}
+{"timestamp": "2026-03-27T11:09:24.2799821Z", "commit": "d86dbaef410d08e777936f621107256a8fee3485", "short_commit": "d86dbaef", "branch": "pr-101", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.74, "prompt_eval_rate": 0.0, "total_rate": 15.74, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1270000000, "prompt_eval_duration": 0, "total_duration": 1270000000}
+{"timestamp": "2026-03-31T13:40:47.9467296Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 35.17, "prompt_eval_rate": 0.0, "total_rate": 35.17, "eval_count": 26, "prompt_eval_count": 0, "total_count": 26, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000}
+{"timestamp": "2026-03-31T13:41:37.6514747Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.63, "prompt_eval_rate": 0.0, "total_rate": 14.63, "eval_count": 129, "prompt_eval_count": 0, "total_count": 129, "eval_duration": 8820000000, "prompt_eval_duration": 0, "total_duration": 8820000000}
+{"timestamp": "2026-03-31T13:42:22.5163227Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.45, "prompt_eval_rate": 0.0, "total_rate": 9.45, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 4870000000, "prompt_eval_duration": 0, "total_duration": 4870000000}
+{"timestamp": "2026-03-31T13:42:44.1337911Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 21.62, "prompt_eval_rate": 0.0, "total_rate": 21.62, "eval_count": 11, "prompt_eval_count": 0, "total_count": 11, "eval_duration": 510000000, "prompt_eval_duration": 0, "total_duration": 510000000}
+{"timestamp": "2026-03-31T13:43:56.0520326Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.11, "prompt_eval_rate": 0.0, "total_rate": 15.11, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 33880000000, "prompt_eval_duration": 0, "total_duration": 33880000000}
+{"timestamp": "2026-03-31T13:44:46.2989757Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.79, "prompt_eval_rate": 0.0, "total_rate": 12.79, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1560000000, "prompt_eval_duration": 0, "total_duration": 1560000000}
+{"timestamp": "2026-03-31T13:45:19.2147696Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 20.59, "prompt_eval_rate": 0.0, "total_rate": 20.59, "eval_count": 73, "prompt_eval_count": 0, "total_count": 73, "eval_duration": 3550000000, "prompt_eval_duration": 0, "total_duration": 3550000000}
+{"timestamp": "2026-03-31T13:45:37.4442519Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.44, "prompt_eval_rate": 0.0, "total_rate": 33.44, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 930000000, "prompt_eval_duration": 0, "total_duration": 930000000}
+{"timestamp": "2026-03-31T13:45:59.8044745Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.9, "prompt_eval_rate": 0.0, "total_rate": 31.9, "eval_count": 113, "prompt_eval_count": 0, "total_count": 113, "eval_duration": 3540000000, "prompt_eval_duration": 0, "total_duration": 3540000000}
+{"timestamp": "2026-03-31T13:46:32.3629413Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.64, "prompt_eval_rate": 0.0, "total_rate": 12.64, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000}
+{"timestamp": "2026-03-31T13:46:59.7393895Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.49, "prompt_eval_rate": 0.0, "total_rate": 18.49, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1030000000, "prompt_eval_duration": 0, "total_duration": 1030000000}
+{"timestamp": "2026-03-31T13:47:19.7101038Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 9.55, "prompt_eval_rate": 0.0, "total_rate": 9.55, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 4820000000, "prompt_eval_duration": 0, "total_duration": 4820000000}
+{"timestamp": "2026-03-31T13:47:28.7660332Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.04, "prompt_eval_rate": 0.0, "total_rate": 17.04, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1170000000, "prompt_eval_duration": 0, "total_duration": 1170000000}
+{"timestamp": "2026-03-31T13:47:37.2736957Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.48, "prompt_eval_rate": 0.0, "total_rate": 19.48, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1030000000, "prompt_eval_duration": 0, "total_duration": 1030000000}
+{"timestamp": "2026-03-31T13:30:27.1035853Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 34.1, "prompt_eval_rate": 0.0, "total_rate": 34.1, "eval_count": 80, "prompt_eval_count": 0, "total_count": 80, "eval_duration": 2350000000, "prompt_eval_duration": 0, "total_duration": 2350000000}
+{"timestamp": "2026-03-31T13:32:00.3570128Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.81, "prompt_eval_rate": 0.0, "total_rate": 6.81, "eval_count": 171, "prompt_eval_count": 0, "total_count": 171, "eval_duration": 25110000000, "prompt_eval_duration": 0, "total_duration": 25110000000}
+{"timestamp": "2026-03-31T13:33:58.9035812Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.42, "prompt_eval_rate": 0.0, "total_rate": 7.42, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6470000000, "prompt_eval_duration": 0, "total_duration": 6470000000}
+{"timestamp": "2026-03-31T13:34:25.2549424Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.08, "prompt_eval_rate": 0.0, "total_rate": 14.08, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1350000000, "prompt_eval_duration": 0, "total_duration": 1350000000}
+{"timestamp": "2026-03-31T13:34:51.9147239Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.65, "prompt_eval_rate": 0.0, "total_rate": 12.65, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000}
+{"timestamp": "2026-03-31T13:35:12.1648742Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.49, "prompt_eval_rate": 0.0, "total_rate": 15.49, "eval_count": 13, "prompt_eval_count": 0, "total_count": 13, "eval_duration": 840000000, "prompt_eval_duration": 0, "total_duration": 840000000}
+{"timestamp": "2026-03-31T13:35:33.5012364Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.43, "prompt_eval_rate": 0.0, "total_rate": 15.43, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000}
+{"timestamp": "2026-03-31T13:35:42.2492971Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.17, "prompt_eval_rate": 0.0, "total_rate": 33.17, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000}
+{"timestamp": "2026-03-31T13:36:00.9055728Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.0, "prompt_eval_rate": 0.0, "total_rate": 15.0, "eval_count": 103, "prompt_eval_count": 0, "total_count": 103, "eval_duration": 6870000000, "prompt_eval_duration": 0, "total_duration": 6870000000}
+{"timestamp": "2026-03-31T13:36:18.8889526Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.23, "prompt_eval_rate": 0.0, "total_rate": 15.23, "eval_count": 14, "prompt_eval_count": 0, "total_count": 14, "eval_duration": 920000000, "prompt_eval_duration": 0, "total_duration": 920000000}
+{"timestamp": "2026-03-31T13:36:32.2594672Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.96, "prompt_eval_rate": 0.0, "total_rate": 19.96, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 950000000, "prompt_eval_duration": 0, "total_duration": 950000000}
+{"timestamp": "2026-03-31T13:37:32.8805648Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.14, "prompt_eval_rate": 0.0, "total_rate": 8.14, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5650000000, "prompt_eval_duration": 0, "total_duration": 5650000000}
+{"timestamp": "2026-03-31T13:38:05.1048826Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.39, "prompt_eval_rate": 0.0, "total_rate": 16.39, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1220000000, "prompt_eval_duration": 0, "total_duration": 1220000000}
+{"timestamp": "2026-03-31T13:38:30.7513374Z", "commit": "8a00dedb4c29b5e198546b493660cc13765ab91a", "short_commit": "8a00dedb", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.2, "prompt_eval_rate": 0.0, "total_rate": 16.2, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1230000000, "prompt_eval_duration": 0, "total_duration": 1230000000}
+{"timestamp": "2026-04-07T21:50:40.0153279Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 39.09, "prompt_eval_rate": 0.0, "total_rate": 39.09, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 13100000000, "prompt_eval_duration": 0, "total_duration": 13100000000}
+{"timestamp": "2026-04-07T21:51:54.1917549Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.52, "prompt_eval_rate": 0.0, "total_rate": 14.52, "eval_count": 124, "prompt_eval_count": 0, "total_count": 124, "eval_duration": 8539999999, "prompt_eval_duration": 0, "total_duration": 8539999999}
+{"timestamp": "2026-04-07T21:53:54.0895254Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.77, "prompt_eval_rate": 0.0, "total_rate": 7.77, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6180000000, "prompt_eval_duration": 0, "total_duration": 6180000000}
+{"timestamp": "2026-04-07T21:54:21.0741894Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.08, "prompt_eval_rate": 0.0, "total_rate": 18.08, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1050000000, "prompt_eval_duration": 0, "total_duration": 1050000000}
+{"timestamp": "2026-04-07T21:55:14.0069332Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.66, "prompt_eval_rate": 0.0, "total_rate": 15.66, "eval_count": 113, "prompt_eval_count": 0, "total_count": 113, "eval_duration": 7220000000, "prompt_eval_duration": 0, "total_duration": 7220000000}
+{"timestamp": "2026-04-07T21:56:06.7107943Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.24, "prompt_eval_rate": 0.0, "total_rate": 17.24, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000}
+{"timestamp": "2026-04-07T21:56:58.6567482Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 20.01, "prompt_eval_rate": 0.0, "total_rate": 20.01, "eval_count": 27, "prompt_eval_count": 0, "total_count": 27, "eval_duration": 1350000000, "prompt_eval_duration": 0, "total_duration": 1350000000}
+{"timestamp": "2026-04-07T21:57:12.9824348Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.45, "prompt_eval_rate": 0.0, "total_rate": 31.45, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000}
+{"timestamp": "2026-04-07T21:57:29.5667820Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 28.57, "prompt_eval_rate": 0.0, "total_rate": 28.57, "eval_count": 141, "prompt_eval_count": 0, "total_count": 141, "eval_duration": 4930000000, "prompt_eval_duration": 0, "total_duration": 4930000000}
+{"timestamp": "2026-04-07T21:58:01.1473144Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.21, "prompt_eval_rate": 0.0, "total_rate": 14.21, "eval_count": 30, "prompt_eval_count": 0, "total_count": 30, "eval_duration": 2109999999, "prompt_eval_duration": 0, "total_duration": 2109999999}
+{"timestamp": "2026-04-07T21:58:20.2671557Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 20.18, "prompt_eval_rate": 0.0, "total_rate": 20.18, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 940000000, "prompt_eval_duration": 0, "total_duration": 940000000}
+{"timestamp": "2026-04-07T21:59:13.6367856Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.98, "prompt_eval_rate": 0.0, "total_rate": 8.98, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5120000000, "prompt_eval_duration": 0, "total_duration": 5120000000}
+{"timestamp": "2026-04-07T21:59:45.6744924Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.48, "prompt_eval_rate": 0.0, "total_rate": 16.48, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000}
+{"timestamp": "2026-04-07T22:00:08.3321365Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 19.26, "prompt_eval_rate": 0.0, "total_rate": 19.26, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000}
+{"timestamp": "2026-04-07T21:35:39.4302411Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.55, "prompt_eval_rate": 0.0, "total_rate": 30.55, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16760000000, "prompt_eval_duration": 0, "total_duration": 16760000000}
+{"timestamp": "2026-04-07T21:37:01.0657004Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.53, "prompt_eval_rate": 0.0, "total_rate": 8.53, "eval_count": 98, "prompt_eval_count": 0, "total_count": 98, "eval_duration": 11490000000, "prompt_eval_duration": 0, "total_duration": 11490000000}
+{"timestamp": "2026-04-07T21:39:01.8455952Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.86, "prompt_eval_rate": 0.0, "total_rate": 6.86, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6710000000, "prompt_eval_duration": 0, "total_duration": 6710000000}
+{"timestamp": "2026-04-07T21:39:28.2730624Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.68, "prompt_eval_rate": 0.0, "total_rate": 14.68, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000}
+{"timestamp": "2026-04-07T21:41:56.2790832Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 4.7, "prompt_eval_rate": 0.0, "total_rate": 4.7, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 108980000000, "prompt_eval_duration": 0, "total_duration": 108980000000}
+{"timestamp": "2026-04-07T21:42:47.7767757Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.22, "prompt_eval_rate": 0.0, "total_rate": 15.22, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000}
+{"timestamp": "2026-04-07T21:43:24.2629361Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.32, "prompt_eval_rate": 0.0, "total_rate": 15.32, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000}
+{"timestamp": "2026-04-07T21:43:38.4329675Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.29, "prompt_eval_rate": 0.0, "total_rate": 33.29, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 660000000, "prompt_eval_duration": 0, "total_duration": 660000000}
+{"timestamp": "2026-04-07T21:43:56.0594025Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.27, "prompt_eval_rate": 0.0, "total_rate": 14.27, "eval_count": 111, "prompt_eval_count": 0, "total_count": 111, "eval_duration": 7780000000, "prompt_eval_duration": 0, "total_duration": 7780000000}
+{"timestamp": "2026-04-07T21:46:03.7170133Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 4.88, "prompt_eval_rate": 0.0, "total_rate": 4.88, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 104890000000, "prompt_eval_duration": 0, "total_duration": 104890000000}
+{"timestamp": "2026-04-07T21:46:25.1585713Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.33, "prompt_eval_rate": 0.0, "total_rate": 16.33, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000}
+{"timestamp": "2026-04-07T21:47:19.3815600Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.26, "prompt_eval_rate": 0.0, "total_rate": 8.26, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5570000000, "prompt_eval_duration": 0, "total_duration": 5570000000}
+{"timestamp": "2026-04-07T21:47:51.6762573Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.01, "prompt_eval_rate": 0.0, "total_rate": 16.01, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000}
+{"timestamp": "2026-04-07T21:48:13.8548868Z", "commit": "4e385b812334065b79a42541109beb05763f0497", "short_commit": "4e385b81", "branch": "pr-103", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.94, "prompt_eval_rate": 0.0, "total_rate": 15.94, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000}
+{"timestamp": "2026-04-19T12:08:45.2526989Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.55, "prompt_eval_rate": 0.0, "total_rate": 30.55, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16760000000, "prompt_eval_duration": 0, "total_duration": 16760000000}
+{"timestamp": "2026-04-19T12:10:30.3677200Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 6.02, "prompt_eval_rate": 0.0, "total_rate": 6.02, "eval_count": 217, "prompt_eval_count": 0, "total_count": 217, "eval_duration": 36060000000, "prompt_eval_duration": 0, "total_duration": 36060000000}
+{"timestamp": "2026-04-19T12:12:26.8304801Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.55, "prompt_eval_rate": 0.0, "total_rate": 7.55, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6090000000, "prompt_eval_duration": 0, "total_duration": 6090000000}
+{"timestamp": "2026-04-19T12:12:54.4888402Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.17, "prompt_eval_rate": 0.0, "total_rate": 15.17, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000}
+{"timestamp": "2026-04-19T12:13:47.5007598Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.76, "prompt_eval_rate": 0.0, "total_rate": 10.76, "eval_count": 8, "prompt_eval_count": 0, "total_count": 8, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000}
+{"timestamp": "2026-04-19T12:14:42.3432806Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.99, "prompt_eval_rate": 0.0, "total_rate": 13.99, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1430000000, "prompt_eval_duration": 0, "total_duration": 1430000000}
+{"timestamp": "2026-04-19T12:15:22.2105996Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.94, "prompt_eval_rate": 0.0, "total_rate": 14.94, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1340000000, "prompt_eval_duration": 0, "total_duration": 1340000000}
+{"timestamp": "2026-04-19T12:15:39.7347922Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.92, "prompt_eval_rate": 0.0, "total_rate": 30.92, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 710000000, "prompt_eval_duration": 0, "total_duration": 710000000}
+{"timestamp": "2026-04-19T12:16:02.0621914Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.2, "prompt_eval_rate": 0.0, "total_rate": 14.2, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 8100000000, "prompt_eval_duration": 0, "total_duration": 8100000000}
+{"timestamp": "2026-04-19T12:16:38.7345701Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.07, "prompt_eval_rate": 0.0, "total_rate": 11.07, "eval_count": 40, "prompt_eval_count": 0, "total_count": 40, "eval_duration": 3610000000, "prompt_eval_duration": 0, "total_duration": 3610000000}
+{"timestamp": "2026-04-19T12:17:01.6352498Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.34, "prompt_eval_rate": 0.0, "total_rate": 16.34, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1160000000, "prompt_eval_duration": 0, "total_duration": 1160000000}
+{"timestamp": "2026-04-19T12:17:58.1960572Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.34, "prompt_eval_rate": 0.0, "total_rate": 8.34, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5750000000, "prompt_eval_duration": 0, "total_duration": 5750000000}
+{"timestamp": "2026-04-19T12:18:30.1108522Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.58, "prompt_eval_rate": 0.0, "total_rate": 16.58, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000}
+{"timestamp": "2026-04-19T12:18:50.3891829Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.59, "prompt_eval_rate": 0.0, "total_rate": 15.59, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000}
+{"timestamp": "2026-04-19T11:54:10.5516743Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 33.99, "prompt_eval_rate": 0.0, "total_rate": 33.99, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1090000000, "prompt_eval_duration": 0, "total_duration": 1090000000}
+{"timestamp": "2026-04-19T11:55:44.0668466Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.94, "prompt_eval_rate": 0.0, "total_rate": 13.94, "eval_count": 181, "prompt_eval_count": 0, "total_count": 181, "eval_duration": 12990000000, "prompt_eval_duration": 0, "total_duration": 12990000000}
+{"timestamp": "2026-04-19T11:57:42.3173371Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.94, "prompt_eval_rate": 0.0, "total_rate": 7.94, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5790000000, "prompt_eval_duration": 0, "total_duration": 5790000000}
+{"timestamp": "2026-04-19T11:58:18.2369368Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.55, "prompt_eval_rate": 0.0, "total_rate": 17.55, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000}
+{"timestamp": "2026-04-19T11:59:16.9495285Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.12, "prompt_eval_rate": 0.0, "total_rate": 13.12, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1140000000, "prompt_eval_duration": 0, "total_duration": 1140000000}
+{"timestamp": "2026-04-19T12:00:21.6221524Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.51, "prompt_eval_rate": 0.0, "total_rate": 16.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000}
+{"timestamp": "2026-04-19T12:01:12.3020518Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.52, "prompt_eval_rate": 0.0, "total_rate": 18.52, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000}
+{"timestamp": "2026-04-19T12:01:29.7165722Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 30.77, "prompt_eval_rate": 0.0, "total_rate": 30.77, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000}
+{"timestamp": "2026-04-19T12:01:55.3802804Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 28.11, "prompt_eval_rate": 0.0, "total_rate": 28.11, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 4380000000, "prompt_eval_duration": 0, "total_duration": 4380000000}
+{"timestamp": "2026-04-19T12:02:36.8992211Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.21, "prompt_eval_rate": 0.0, "total_rate": 14.21, "eval_count": 39, "prompt_eval_count": 0, "total_count": 39, "eval_duration": 2740000000, "prompt_eval_duration": 0, "total_duration": 2740000000}
+{"timestamp": "2026-04-19T12:03:06.7547391Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.7, "prompt_eval_rate": 0.0, "total_rate": 17.7, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1070000000, "prompt_eval_duration": 0, "total_duration": 1070000000}
+{"timestamp": "2026-04-19T12:04:11.6043612Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 7.51, "prompt_eval_rate": 0.0, "total_rate": 7.51, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6120000000, "prompt_eval_duration": 0, "total_duration": 6120000000}
+{"timestamp": "2026-04-19T12:04:54.7698813Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.31, "prompt_eval_rate": 0.0, "total_rate": 15.31, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000}
+{"timestamp": "2026-04-19T12:05:32.1672558Z", "commit": "e95c5a2f553a1f87bf876ca827687cb47add1c82", "short_commit": "e95c5a2f", "branch": "pr-108", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.01, "prompt_eval_rate": 0.0, "total_rate": 18.01, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1110000000, "prompt_eval_duration": 0, "total_duration": 1110000000}
+{"timestamp": "2026-04-30T11:30:12.7846142Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.58, "prompt_eval_rate": 0.0, "total_rate": 30.58, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000}
+{"timestamp": "2026-04-30T11:31:34.0573231Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.15, "prompt_eval_rate": 0.0, "total_rate": 8.15, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 13990000000, "prompt_eval_duration": 0, "total_duration": 13990000000}
+{"timestamp": "2026-04-30T11:33:22.0569754Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.56, "prompt_eval_rate": 0.0, "total_rate": 7.56, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6080000000, "prompt_eval_duration": 0, "total_duration": 6080000000}
+{"timestamp": "2026-04-30T11:33:48.4026201Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.81, "prompt_eval_rate": 0.0, "total_rate": 14.81, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000}
+{"timestamp": "2026-04-30T11:34:37.7320049Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.64, "prompt_eval_rate": 0.0, "total_rate": 10.64, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1410000000, "prompt_eval_duration": 0, "total_duration": 1410000000}
+{"timestamp": "2026-04-30T11:35:27.1637007Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.4, "prompt_eval_rate": 0.0, "total_rate": 15.4, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000}
+{"timestamp": "2026-04-30T11:35:45.1818190Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.81, "prompt_eval_rate": 0.0, "total_rate": 15.81, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000}
+{"timestamp": "2026-04-30T11:35:54.2323889Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 34.36, "prompt_eval_rate": 0.0, "total_rate": 34.36, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000}
+{"timestamp": "2026-04-30T11:36:09.8633388Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.81, "prompt_eval_rate": 0.0, "total_rate": 14.81, "eval_count": 114, "prompt_eval_count": 0, "total_count": 114, "eval_duration": 7700000000, "prompt_eval_duration": 0, "total_duration": 7700000000}
+{"timestamp": "2026-04-30T11:36:34.0269765Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 10.19, "prompt_eval_rate": 0.0, "total_rate": 10.19, "eval_count": 146, "prompt_eval_count": 0, "total_count": 146, "eval_duration": 14330000000, "prompt_eval_duration": 0, "total_duration": 14330000000}
+{"timestamp": "2026-04-30T11:36:45.0461352Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 20.91, "prompt_eval_rate": 0.0, "total_rate": 20.91, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000}
+{"timestamp": "2026-04-30T11:37:45.0756437Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 6.27, "prompt_eval_rate": 0.0, "total_rate": 6.27, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 7650000000, "prompt_eval_duration": 0, "total_duration": 7650000000}
+{"timestamp": "2026-04-30T11:38:11.1457940Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.64, "prompt_eval_rate": 0.0, "total_rate": 16.64, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1200000000, "prompt_eval_duration": 0, "total_duration": 1200000000}
+{"timestamp": "2026-04-30T11:38:18.8758446Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.6, "prompt_eval_rate": 0.0, "total_rate": 15.6, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000}
+{"timestamp": "2026-04-30T11:20:06.7015234Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 32.19, "prompt_eval_rate": 0.0, "total_rate": 32.19, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 680000000, "prompt_eval_duration": 0, "total_duration": 680000000}
+{"timestamp": "2026-04-30T11:21:28.3593079Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.06, "prompt_eval_rate": 0.0, "total_rate": 14.06, "eval_count": 126, "prompt_eval_count": 0, "total_count": 126, "eval_duration": 8960000000, "prompt_eval_duration": 0, "total_duration": 8960000000}
+{"timestamp": "2026-04-30T11:23:19.3816934Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.75, "prompt_eval_rate": 0.0, "total_rate": 8.75, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5260000000, "prompt_eval_duration": 0, "total_duration": 5260000000}
+{"timestamp": "2026-04-30T11:23:45.3178566Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.93, "prompt_eval_rate": 0.0, "total_rate": 15.93, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000}
+{"timestamp": "2026-04-30T11:24:35.0610505Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.66, "prompt_eval_rate": 0.0, "total_rate": 12.66, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1180000000, "prompt_eval_duration": 0, "total_duration": 1180000000}
+{"timestamp": "2026-04-30T11:25:28.5461910Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.63, "prompt_eval_rate": 0.0, "total_rate": 11.63, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1720000000, "prompt_eval_duration": 0, "total_duration": 1720000000}
+{"timestamp": "2026-04-30T11:26:04.0544522Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.39, "prompt_eval_rate": 0.0, "total_rate": 17.39, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1150000000, "prompt_eval_duration": 0, "total_duration": 1150000000}
+{"timestamp": "2026-04-30T11:26:17.6947503Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.83, "prompt_eval_rate": 0.0, "total_rate": 29.83, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000}
+{"timestamp": "2026-04-30T11:26:32.5245738Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 27.65, "prompt_eval_rate": 0.0, "total_rate": 27.65, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 4450000000, "prompt_eval_duration": 0, "total_duration": 4450000000}
+{"timestamp": "2026-04-30T11:27:08.0695669Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.09, "prompt_eval_rate": 0.0, "total_rate": 15.09, "eval_count": 75, "prompt_eval_count": 0, "total_count": 75, "eval_duration": 4970000000, "prompt_eval_duration": 0, "total_duration": 4970000000}
+{"timestamp": "2026-04-30T11:27:26.7421162Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.59, "prompt_eval_rate": 0.0, "total_rate": 17.59, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000}
+{"timestamp": "2026-04-30T11:27:49.7773682Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.5, "prompt_eval_rate": 0.0, "total_rate": 8.5, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5640000000, "prompt_eval_duration": 0, "total_duration": 5640000000}
+{"timestamp": "2026-04-30T11:28:22.4684079Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 12.59, "prompt_eval_rate": 0.0, "total_rate": 12.59, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1590000000, "prompt_eval_duration": 0, "total_duration": 1590000000}
+{"timestamp": "2026-04-30T11:28:45.7117408Z", "commit": "214b58e0ed0c43eecffedf0702034df8adf6d1d8", "short_commit": "214b58e0", "branch": "main", "run_id": "", "run_number": "", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.78, "prompt_eval_rate": 0.0, "total_rate": 17.78, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000}
+{"timestamp": "2026-05-04T12:04:21.5679563Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 32.67, "prompt_eval_rate": 0.0, "total_rate": 32.67, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 670000000, "prompt_eval_duration": 0, "total_duration": 670000000}
+{"timestamp": "2026-05-04T12:04:27.7946305Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 36.05, "prompt_eval_rate": 0.0, "total_rate": 36.05, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 1330000000, "prompt_eval_duration": 0, "total_duration": 1330000000}
+{"timestamp": "2026-05-04T12:04:43.3776216Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 36.8, "prompt_eval_rate": 0.0, "total_rate": 36.8, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000}
+{"timestamp": "2026-05-04T12:05:12.4732202Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.21, "prompt_eval_rate": 0.0, "total_rate": 14.21, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 8100000000, "prompt_eval_duration": 0, "total_duration": 8100000000}
+{"timestamp": "2026-05-04T12:06:09.3772021Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.03, "prompt_eval_rate": 0.0, "total_rate": 9.03, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5310000000, "prompt_eval_duration": 0, "total_duration": 5310000000}
+{"timestamp": "2026-05-04T12:06:24.7665297Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 23.44, "prompt_eval_rate": 0.0, "total_rate": 23.44, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 810000000, "prompt_eval_duration": 0, "total_duration": 810000000}
+{"timestamp": "2026-05-04T12:07:04.5121651Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.78, "prompt_eval_rate": 0.0, "total_rate": 14.78, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 1560000000, "prompt_eval_duration": 0, "total_duration": 1560000000}
+{"timestamp": "2026-05-04T12:07:19.4971109Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.32, "prompt_eval_rate": 0.0, "total_rate": 17.32, "eval_count": 29, "prompt_eval_count": 0, "total_count": 29, "eval_duration": 1670000000, "prompt_eval_duration": 0, "total_duration": 1670000000}
+{"timestamp": "2026-05-04T12:07:41.6423432Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.83, "prompt_eval_rate": 0.0, "total_rate": 18.83, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1060000000, "prompt_eval_duration": 0, "total_duration": 1060000000}
+{"timestamp": "2026-05-04T12:07:55.0935791Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 27.04, "prompt_eval_rate": 0.0, "total_rate": 27.04, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 550000000, "prompt_eval_duration": 0, "total_duration": 550000000}
+{"timestamp": "2026-05-04T12:08:06.0831655Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 31.03, "prompt_eval_rate": 0.0, "total_rate": 31.03, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 3710000000, "prompt_eval_duration": 0, "total_duration": 3710000000}
+{"timestamp": "2026-05-04T12:08:23.4854098Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.56, "prompt_eval_rate": 0.0, "total_rate": 11.56, "eval_count": 13, "prompt_eval_count": 0, "total_count": 13, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000}
+{"timestamp": "2026-05-04T12:08:30.7136989Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 22.99, "prompt_eval_rate": 0.0, "total_rate": 22.99, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 830000000, "prompt_eval_duration": 0, "total_duration": 830000000}
+{"timestamp": "2026-05-04T12:08:56.3614351Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.84, "prompt_eval_rate": 0.0, "total_rate": 8.84, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5430000000, "prompt_eval_duration": 0, "total_duration": 5430000000}
+{"timestamp": "2026-05-04T12:09:04.6537885Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.51, "prompt_eval_rate": 0.0, "total_rate": 15.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000}
+{"timestamp": "2026-05-04T12:09:12.5752000Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.06, "prompt_eval_rate": 0.0, "total_rate": 18.06, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1110000000, "prompt_eval_duration": 0, "total_duration": 1110000000}
+{"timestamp": "2026-05-04T11:55:24.7608406Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.71, "prompt_eval_rate": 0.0, "total_rate": 30.71, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16670000000, "prompt_eval_duration": 0, "total_duration": 16670000000}
+{"timestamp": "2026-05-04T11:55:29.9460956Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 32.15, "prompt_eval_rate": 0.0, "total_rate": 32.15, "eval_count": 14, "prompt_eval_count": 0, "total_count": 14, "eval_duration": 440000000, "prompt_eval_duration": 0, "total_duration": 440000000}
+{"timestamp": "2026-05-04T11:55:40.0185725Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 32.56, "prompt_eval_rate": 0.0, "total_rate": 32.56, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 680000000, "prompt_eval_duration": 0, "total_duration": 680000000}
+{"timestamp": "2026-05-04T11:55:45.8422318Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 40.98, "prompt_eval_rate": 0.0, "total_rate": 40.98, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 760000000, "prompt_eval_duration": 0, "total_duration": 760000000}
+{"timestamp": "2026-05-04T11:55:52.8736744Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 33.48, "prompt_eval_rate": 0.0, "total_rate": 33.48, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 450000000, "prompt_eval_duration": 0, "total_duration": 450000000}
+{"timestamp": "2026-05-04T11:57:01.4292312Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.13, "prompt_eval_rate": 0.0, "total_rate": 7.13, "eval_count": 156, "prompt_eval_count": 0, "total_count": 156, "eval_duration": 21870000000, "prompt_eval_duration": 0, "total_duration": 21870000000}
+{"timestamp": "2026-05-04T11:57:55.1774975Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.02, "prompt_eval_rate": 0.0, "total_rate": 8.02, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5740000000, "prompt_eval_duration": 0, "total_duration": 5740000000}
+{"timestamp": "2026-05-04T11:58:07.3571474Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 17.81, "prompt_eval_rate": 0.0, "total_rate": 17.81, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1070000000, "prompt_eval_duration": 0, "total_duration": 1070000000}
+{"timestamp": "2026-05-04T11:58:22.1862545Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 12.54, "prompt_eval_rate": 0.0, "total_rate": 12.54, "eval_count": 17, "prompt_eval_count": 0, "total_count": 17, "eval_duration": 1360000000, "prompt_eval_duration": 0, "total_duration": 1360000000}
+{"timestamp": "2026-05-04T11:59:02.9692119Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.0, "prompt_eval_rate": 0.0, "total_rate": 16.0, "eval_count": 34, "prompt_eval_count": 0, "total_count": 34, "eval_duration": 2130000000, "prompt_eval_duration": 0, "total_duration": 2130000000}
+{"timestamp": "2026-05-04T11:59:14.9062592Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.54, "prompt_eval_rate": 0.0, "total_rate": 15.54, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000}
+{"timestamp": "2026-05-04T11:59:23.8101354Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.97, "prompt_eval_rate": 0.0, "total_rate": 33.97, "eval_count": 31, "prompt_eval_count": 0, "total_count": 31, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000}
+{"timestamp": "2026-05-04T11:59:37.8817756Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.49, "prompt_eval_rate": 0.0, "total_rate": 15.49, "eval_count": 103, "prompt_eval_count": 0, "total_count": 103, "eval_duration": 6650000000, "prompt_eval_duration": 0, "total_duration": 6650000000}
+{"timestamp": "2026-05-04T11:59:59.8121966Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.12, "prompt_eval_rate": 0.0, "total_rate": 14.12, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1060000000, "prompt_eval_duration": 0, "total_duration": 1060000000}
+{"timestamp": "2026-05-04T12:00:10.7970585Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 21.25, "prompt_eval_rate": 0.0, "total_rate": 21.25, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 890000000, "prompt_eval_duration": 0, "total_duration": 890000000}
+{"timestamp": "2026-05-04T12:00:40.4307689Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.68, "prompt_eval_rate": 0.0, "total_rate": 8.68, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5300000000, "prompt_eval_duration": 0, "total_duration": 5300000000}
+{"timestamp": "2026-05-04T12:00:57.1961185Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.69, "prompt_eval_rate": 0.0, "total_rate": 16.69, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1200000000, "prompt_eval_duration": 0, "total_duration": 1200000000}
+{"timestamp": "2026-05-04T12:01:13.9655300Z", "commit": "26b61db45164b875250e5c17ab267f3d38ca579a", "short_commit": "26b61db4", "branch": "main", "run_id": "25315897044", "run_number": "308", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.85, "prompt_eval_rate": 0.0, "total_rate": 15.85, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000}
+{"timestamp": "2026-05-04T12:11:07.9765458Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 31.57, "prompt_eval_rate": 0.0, "total_rate": 31.57, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 480000000, "prompt_eval_duration": 0, "total_duration": 480000000}
+{"timestamp": "2026-05-04T12:11:13.4862167Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 35.12, "prompt_eval_rate": 0.0, "total_rate": 35.12, "eval_count": 21, "prompt_eval_count": 0, "total_count": 21, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000}
+{"timestamp": "2026-05-04T12:11:21.1983246Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 37.27, "prompt_eval_rate": 0.0, "total_rate": 37.27, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000}
+{"timestamp": "2026-05-04T12:12:20.1272735Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.12, "prompt_eval_rate": 0.0, "total_rate": 14.12, "eval_count": 137, "prompt_eval_count": 0, "total_count": 137, "eval_duration": 9700000000, "prompt_eval_duration": 0, "total_duration": 9700000000}
+{"timestamp": "2026-05-04T12:13:37.1853298Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 9.43, "prompt_eval_rate": 0.0, "total_rate": 9.43, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 4880000000, "prompt_eval_duration": 0, "total_duration": 4880000000}
+{"timestamp": "2026-05-04T12:14:00.8454739Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.18, "prompt_eval_rate": 0.0, "total_rate": 19.18, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000}
+{"timestamp": "2026-05-04T12:14:28.1360263Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 13.27, "prompt_eval_rate": 0.0, "total_rate": 13.27, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 1730000000, "prompt_eval_duration": 0, "total_duration": 1730000000}
+{"timestamp": "2026-05-04T12:14:52.6172460Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.84, "prompt_eval_rate": 0.0, "total_rate": 16.84, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1190000000, "prompt_eval_duration": 0, "total_duration": 1190000000}
+{"timestamp": "2026-05-04T12:15:24.1245352Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.23, "prompt_eval_rate": 0.0, "total_rate": 18.23, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1100000000, "prompt_eval_duration": 0, "total_duration": 1100000000}
+{"timestamp": "2026-05-04T12:15:36.7690104Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.88, "prompt_eval_rate": 0.0, "total_rate": 29.88, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 740000000, "prompt_eval_duration": 0, "total_duration": 740000000}
+{"timestamp": "2026-05-04T12:15:58.7383276Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.74, "prompt_eval_rate": 0.0, "total_rate": 29.74, "eval_count": 110, "prompt_eval_count": 0, "total_count": 110, "eval_duration": 3700000000, "prompt_eval_duration": 0, "total_duration": 3700000000}
+{"timestamp": "2026-05-04T12:16:51.3131788Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.5, "prompt_eval_rate": 0.0, "total_rate": 14.5, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 35310000000, "prompt_eval_duration": 0, "total_duration": 35310000000}
+{"timestamp": "2026-05-04T12:17:09.0073182Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.73, "prompt_eval_rate": 0.0, "total_rate": 17.73, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1070000000, "prompt_eval_duration": 0, "total_duration": 1070000000}
+{"timestamp": "2026-05-04T12:17:52.9209199Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.55, "prompt_eval_rate": 0.0, "total_rate": 8.55, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5610000000, "prompt_eval_duration": 0, "total_duration": 5610000000}
+{"timestamp": "2026-05-04T12:18:24.7812117Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.87, "prompt_eval_rate": 0.0, "total_rate": 15.87, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1260000000, "prompt_eval_duration": 0, "total_duration": 1260000000}
+{"timestamp": "2026-05-04T12:18:43.6490300Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 17.83, "prompt_eval_rate": 0.0, "total_rate": 17.83, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000}
+{"timestamp": "2026-05-04T12:20:16.6547715Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.77, "prompt_eval_rate": 0.0, "total_rate": 30.77, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 490000000, "prompt_eval_duration": 0, "total_duration": 490000000}
+{"timestamp": "2026-05-04T12:20:21.9810122Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 33.31, "prompt_eval_rate": 0.0, "total_rate": 33.31, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 600000000, "prompt_eval_duration": 0, "total_duration": 600000000}
+{"timestamp": "2026-05-04T12:20:29.3048624Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 30.49, "prompt_eval_rate": 0.0, "total_rate": 30.49, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 720000000, "prompt_eval_duration": 0, "total_duration": 720000000}
+{"timestamp": "2026-05-04T12:20:35.0154804Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 40.77, "prompt_eval_rate": 0.0, "total_rate": 40.77, "eval_count": 37, "prompt_eval_count": 0, "total_count": 37, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000}
+{"timestamp": "2026-05-04T12:20:42.2659296Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 35.66, "prompt_eval_rate": 0.0, "total_rate": 35.66, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 620000000, "prompt_eval_duration": 0, "total_duration": 620000000}
+{"timestamp": "2026-05-04T12:21:50.0194380Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.14, "prompt_eval_rate": 0.0, "total_rate": 7.14, "eval_count": 156, "prompt_eval_count": 0, "total_count": 156, "eval_duration": 21860000000, "prompt_eval_duration": 0, "total_duration": 21860000000}
+{"timestamp": "2026-05-04T12:22:50.8726674Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.01, "prompt_eval_rate": 0.0, "total_rate": 8.01, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 6000000000, "prompt_eval_duration": 0, "total_duration": 6000000000}
+{"timestamp": "2026-05-04T12:23:04.2490589Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.92, "prompt_eval_rate": 0.0, "total_rate": 16.92, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1120000000, "prompt_eval_duration": 0, "total_duration": 1120000000}
+{"timestamp": "2026-05-04T12:25:09.6454017Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 4.65, "prompt_eval_rate": 0.0, "total_rate": 4.65, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 110030000000, "prompt_eval_duration": 0, "total_duration": 110030000000}
+{"timestamp": "2026-05-04T12:25:48.0000402Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.68, "prompt_eval_rate": 0.0, "total_rate": 15.68, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000}
+{"timestamp": "2026-05-04T12:25:56.3108580Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.67, "prompt_eval_rate": 0.0, "total_rate": 15.67, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1280000000, "prompt_eval_duration": 0, "total_duration": 1280000000}
+{"timestamp": "2026-05-04T12:26:04.7426553Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 32.16, "prompt_eval_rate": 0.0, "total_rate": 32.16, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 620000000, "prompt_eval_duration": 0, "total_duration": 620000000}
+{"timestamp": "2026-05-04T12:26:21.6057067Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.48, "prompt_eval_rate": 0.0, "total_rate": 14.48, "eval_count": 115, "prompt_eval_count": 0, "total_count": 115, "eval_duration": 7940000000, "prompt_eval_duration": 0, "total_duration": 7940000000}
+{"timestamp": "2026-05-04T12:26:45.4045892Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 14.19, "prompt_eval_rate": 0.0, "total_rate": 14.19, "eval_count": 13, "prompt_eval_count": 0, "total_count": 13, "eval_duration": 920000000, "prompt_eval_duration": 0, "total_duration": 920000000}
+{"timestamp": "2026-05-04T12:26:58.6663276Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 20.96, "prompt_eval_rate": 0.0, "total_rate": 20.96, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 910000000, "prompt_eval_duration": 0, "total_duration": 910000000}
+{"timestamp": "2026-05-04T12:27:33.1874529Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.62, "prompt_eval_rate": 0.0, "total_rate": 8.62, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5570000000, "prompt_eval_duration": 0, "total_duration": 5570000000}
+{"timestamp": "2026-05-04T12:27:56.7492385Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.51, "prompt_eval_rate": 0.0, "total_rate": 16.51, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1210000000, "prompt_eval_duration": 0, "total_duration": 1210000000}
+{"timestamp": "2026-05-04T12:28:13.8129306Z", "commit": "8ce9f8234fb9433585b042830b5d000e2e0baf76", "short_commit": "8ce9f823", "branch": "main", "run_id": "25316691311", "run_number": "310", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.94, "prompt_eval_rate": 0.0, "total_rate": 15.94, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1250000000, "prompt_eval_duration": 0, "total_duration": 1250000000}
+{"timestamp": "2026-05-06T11:18:20.8850298Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 33.74, "prompt_eval_rate": 0.0, "total_rate": 33.74, "eval_count": 23, "prompt_eval_count": 0, "total_count": 23, "eval_duration": 680000000, "prompt_eval_duration": 0, "total_duration": 680000000}
+{"timestamp": "2026-05-06T11:18:30.4028640Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 35.3, "prompt_eval_rate": 39.16, "total_rate": 36.4, "eval_count": 27, "prompt_eval_count": 12, "total_count": 39, "eval_duration": 760000000, "prompt_eval_duration": 310000000, "total_duration": 1070000000}
+{"timestamp": "2026-05-06T11:18:38.1428213Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 17.83, "prompt_eval_rate": 44.43, "total_rate": 34.22, "eval_count": 3, "prompt_eval_count": 12, "total_count": 15, "eval_duration": 170000000, "prompt_eval_duration": 270000000, "total_duration": 440000000}
+{"timestamp": "2026-05-06T11:19:54.8336127Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 11.82, "prompt_eval_rate": 0.0, "total_rate": 11.82, "eval_count": 124, "prompt_eval_count": 0, "total_count": 124, "eval_duration": 10490000000, "prompt_eval_duration": 0, "total_duration": 10490000000}
+{"timestamp": "2026-05-06T11:21:14.2633753Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 8.85, "prompt_eval_rate": 0.0, "total_rate": 8.85, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 5200000000, "prompt_eval_duration": 0, "total_duration": 5200000000}
+{"timestamp": "2026-05-06T11:21:37.7443900Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 19.15, "prompt_eval_rate": 0.0, "total_rate": 19.15, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 990000000, "prompt_eval_duration": 0, "total_duration": 990000000}
+{"timestamp": "2026-05-06T11:22:19.7584207Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.09, "prompt_eval_rate": 0.0, "total_rate": 14.09, "eval_count": 25, "prompt_eval_count": 0, "total_count": 25, "eval_duration": 1770000000, "prompt_eval_duration": 0, "total_duration": 1770000000}
+{"timestamp": "2026-05-06T11:23:09.5080876Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 16.67, "prompt_eval_rate": 0.0, "total_rate": 16.67, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1200000000, "prompt_eval_duration": 0, "total_duration": 1200000000}
+{"timestamp": "2026-05-06T11:23:57.1947828Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 18.52, "prompt_eval_rate": 0.0, "total_rate": 18.52, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1080000000, "prompt_eval_duration": 0, "total_duration": 1080000000}
+{"timestamp": "2026-05-06T11:24:10.2080614Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.78, "prompt_eval_rate": 0.0, "total_rate": 29.78, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 500000000, "prompt_eval_duration": 0, "total_duration": 500000000}
+{"timestamp": "2026-05-06T11:24:24.2488086Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 29.28, "prompt_eval_rate": 0.0, "total_rate": 29.28, "eval_count": 112, "prompt_eval_count": 0, "total_count": 112, "eval_duration": 3830000000, "prompt_eval_duration": 0, "total_duration": 3830000000}
+{"timestamp": "2026-05-06T11:24:53.6566294Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.47, "prompt_eval_rate": 0.0, "total_rate": 11.47, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1310000000, "prompt_eval_duration": 0, "total_duration": 1310000000}
+{"timestamp": "2026-05-06T11:25:11.8539184Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.33, "prompt_eval_rate": 0.0, "total_rate": 18.33, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1040000000, "prompt_eval_duration": 0, "total_duration": 1040000000}
+{"timestamp": "2026-05-06T11:25:54.3531897Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.39, "prompt_eval_rate": 0.0, "total_rate": 8.39, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5720000000, "prompt_eval_duration": 0, "total_duration": 5720000000}
+{"timestamp": "2026-05-06T11:26:33.5663299Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.33, "prompt_eval_rate": 0.0, "total_rate": 15.33, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1300000000, "prompt_eval_duration": 0, "total_duration": 1300000000}
+{"timestamp": "2026-05-06T11:26:53.0868149Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "opencl", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.41, "prompt_eval_rate": 0.0, "total_rate": 16.41, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1220000000, "prompt_eval_duration": 0, "total_duration": 1220000000}
+{"timestamp": "2026-05-06T11:09:15.4770696Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 30.75, "prompt_eval_rate": 0.0, "total_rate": 30.75, "eval_count": 512, "prompt_eval_count": 0, "total_count": 512, "eval_duration": 16649999999, "prompt_eval_duration": 0, "total_duration": 16649999999}
+{"timestamp": "2026-05-06T11:09:37.0148114Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 30.6, "prompt_eval_rate": 37.08, "total_rate": 30.73, "eval_count": 500, "prompt_eval_count": 12, "total_count": 512, "eval_duration": 16340000000, "prompt_eval_duration": 320000000, "total_duration": 16660000000}
+{"timestamp": "2026-05-06T11:09:44.7350508Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 32.1, "prompt_eval_rate": 36.14, "total_rate": 33.31, "eval_count": 25, "prompt_eval_count": 12, "total_count": 37, "eval_duration": 780000000, "prompt_eval_duration": 330000000, "total_duration": 1110000000}
+{"timestamp": "2026-05-06T11:09:50.2611626Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "prefill-decode", "load_duration": 0, "eval_rate": 35.93, "prompt_eval_rate": 48.35, "total_rate": 39.9, "eval_count": 19, "prompt_eval_count": 12, "total_count": 31, "eval_duration": 530000000, "prompt_eval_duration": 250000000, "total_duration": 780000000}
+{"timestamp": "2026-05-06T11:10:00.3706462Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "F16", "configuration": "batch-prefill-decode", "load_duration": 0, "eval_rate": 34.4, "prompt_eval_rate": 36.63, "total_rate": 35.58, "eval_count": 10, "prompt_eval_count": 12, "total_count": 22, "eval_duration": 290000000, "prompt_eval_duration": 330000000, "total_duration": 620000000}
+{"timestamp": "2026-05-06T11:10:40.6784157Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-4B", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.91, "prompt_eval_rate": 0.0, "total_rate": 7.91, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 15550000000, "prompt_eval_duration": 0, "total_duration": 15550000000}
+{"timestamp": "2026-05-06T11:11:12.6627139Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 7.59, "prompt_eval_rate": 0.0, "total_rate": 7.59, "eval_count": 46, "prompt_eval_count": 0, "total_count": 46, "eval_duration": 6060000000, "prompt_eval_duration": 0, "total_duration": 6060000000}
+{"timestamp": "2026-05-06T11:11:34.7510955Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.35, "prompt_eval_rate": 0.0, "total_rate": 15.35, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1240000000, "prompt_eval_duration": 0, "total_duration": 1240000000}
+{"timestamp": "2026-05-06T11:12:16.1973421Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 10.38, "prompt_eval_rate": 0.0, "total_rate": 10.38, "eval_count": 15, "prompt_eval_count": 0, "total_count": 15, "eval_duration": 1440000000, "prompt_eval_duration": 0, "total_duration": 1440000000}
+{"timestamp": "2026-05-06T11:13:03.3584541Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 15.03, "prompt_eval_rate": 0.0, "total_rate": 15.03, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1330000000, "prompt_eval_duration": 0, "total_duration": 1330000000}
+{"timestamp": "2026-05-06T11:13:36.3050454Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "F16", "configuration": "standard", "load_duration": 0, "eval_rate": 14.7, "prompt_eval_rate": 0.0, "total_rate": 14.7, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1360000000, "prompt_eval_duration": 0, "total_duration": 1360000000}
+{"timestamp": "2026-05-06T11:13:44.4973845Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Llama-3.2-1B-Instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 33.06, "prompt_eval_rate": 0.0, "total_rate": 33.06, "eval_count": 22, "prompt_eval_count": 0, "total_count": 22, "eval_duration": 670000000, "prompt_eval_duration": 0, "total_duration": 670000000}
+{"timestamp": "2026-05-06T11:14:05.6560088Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Qwen3-0.6B", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 13.9, "prompt_eval_rate": 0.0, "total_rate": 13.9, "eval_count": 123, "prompt_eval_count": 0, "total_count": 123, "eval_duration": 8850000000, "prompt_eval_duration": 0, "total_duration": 8850000000}
+{"timestamp": "2026-05-06T11:14:41.9066315Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Phi-3-mini-4k-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 11.11, "prompt_eval_rate": 0.0, "total_rate": 11.11, "eval_count": 107, "prompt_eval_count": 0, "total_count": 107, "eval_duration": 9630000000, "prompt_eval_duration": 0, "total_duration": 9630000000}
+{"timestamp": "2026-05-06T11:14:59.2856984Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "qwen2.5-1.5b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 18.18, "prompt_eval_rate": 0.0, "total_rate": 18.18, "eval_count": 19, "prompt_eval_count": 0, "total_count": 19, "eval_duration": 1050000000, "prompt_eval_duration": 0, "total_duration": 1050000000}
+{"timestamp": "2026-05-06T11:15:40.1511897Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "Mistral-7B-Instruct-v0.3", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 8.16, "prompt_eval_rate": 0.0, "total_rate": 8.16, "eval_count": 48, "prompt_eval_count": 0, "total_count": 48, "eval_duration": 5880000000, "prompt_eval_duration": 0, "total_duration": 5880000000}
+{"timestamp": "2026-05-06T11:16:11.8078059Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-3.2-2b-instruct", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 16.89, "prompt_eval_rate": 0.0, "total_rate": 16.89, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1180000000, "prompt_eval_duration": 0, "total_duration": 1180000000}
+{"timestamp": "2026-05-06T11:16:30.5879933Z", "commit": "0235dc7e26d763ed0786825f9272d289952deede", "short_commit": "0235dc7e", "branch": "main", "run_id": "25431221446", "run_number": "316", "run_attempt": "1", "workflow": "GPULlama3 Build & Run", "backend": "ptx", "model": "granite-4.0-1b", "quantization": "Q8_0", "configuration": "standard", "load_duration": 0, "eval_rate": 15.56, "prompt_eval_rate": 0.0, "total_rate": 15.56, "eval_count": 20, "prompt_eval_count": 0, "total_count": 20, "eval_duration": 1290000000, "prompt_eval_duration": 0, "total_duration": 1290000000}
diff --git a/scripts/process_metrics.py b/scripts/process_metrics.py
new file mode 100644
index 00000000..98242668
--- /dev/null
+++ b/scripts/process_metrics.py
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+"""
+Append benchmark JSON metrics to the performance history JSONL file.
+
+Scans --metrics-dir recursively for *.json files. Each metrics file must have a
+companion *.meta.json sidecar (same stem) written by the CI step that produced it.
+The sidecar schema is open-ended — this script does not assume any fixed fields.
+
+Usage:
+ python3 scripts/process_metrics.py \\
+ --metrics-dir /path/to/artifacts \\
+ --commit $GITHUB_SHA --branch main \\
+ --run-id $GITHUB_RUN_ID --run-number $GITHUB_RUN_NUMBER \\
+ --run-attempt 1 --workflow "GPULlama3 Build & Run" \\
+ --history docs/perf-history.jsonl
+"""
+
+import argparse
+import json
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+def parse_args():
+ p = argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ p.add_argument("--metrics-dir", required=True, dest="metrics_dir",
+ help="Directory to search recursively for *.json + *.meta.json pairs")
+ p.add_argument("--history", required=True,
+ help="JSONL history file to append rows to")
+ p.add_argument("--commit", required=True)
+ p.add_argument("--branch", required=True)
+ p.add_argument("--run-id", required=True, dest="run_id")
+ p.add_argument("--run-number", default="", dest="run_number")
+ p.add_argument("--run-attempt", required=True, dest="run_attempt")
+ p.add_argument("--workflow", required=True)
+ return p.parse_args()
+
+
+def load_json(path):
+ try:
+ with open(path) as f:
+ return json.load(f)
+ except (json.JSONDecodeError, OSError) as e:
+ print(f"ERROR: {path}: {e}", file=sys.stderr)
+ return None
+
+
+def discover_pairs(metrics_dir):
+ """Yield (metrics_path, meta_path) for every non-sidecar JSON found recursively."""
+ for path in sorted(Path(metrics_dir).rglob("*.json")):
+ if path.name.endswith(".meta.json"):
+ continue
+ yield path, path.with_suffix(".meta.json")
+
+
+def build_row(m, meta, args):
+ return {
+ "timestamp": datetime.now(timezone.utc).isoformat(),
+ "commit": args.commit,
+ "short_commit": args.commit[:8],
+ "branch": args.branch,
+ "run_id": args.run_id,
+ "run_number": args.run_number or "",
+ "run_attempt": args.run_attempt,
+ "workflow": args.workflow,
+ # Flat compat fields — sourced from sidecar; null when absent
+ "backend": meta.get("backend"),
+ "model": meta.get("model"),
+ "quantization": meta.get("quantization"),
+ "configuration": meta.get("configuration"),
+ # Key metrics promoted to top level — null when absent in the metrics file
+ "eval_rate": m.get("eval_rate"),
+ "prompt_eval_rate": m.get("prompt_eval_rate"),
+ "total_rate": m.get("total_rate"),
+ "eval_count": m.get("eval_count"),
+ "prompt_eval_count": m.get("prompt_eval_count"),
+ "total_count": m.get("total_count"),
+ "total_duration": m.get("total_duration"),
+ "load_duration": m.get("load_duration"),
+ "prompt_eval_duration": m.get("prompt_eval_duration"),
+ "eval_duration": m.get("eval_duration"),
+ "has_prefill_phase": m.get("has_prefill_phase"),
+ "tornadovm": m.get("tornadovm"),
+ # Nested full objects — open-ended; schema is whatever the benchmark step writes
+ "benchmark": meta,
+ "metrics": m,
+ }
+
+
+def main():
+ args = parse_args()
+ rows = []
+
+ for metrics_path, meta_path in discover_pairs(args.metrics_dir):
+ m = load_json(metrics_path)
+ if not isinstance(m, dict):
+ print(f"WARNING: {metrics_path.name}: not a JSON object, skipping", file=sys.stderr)
+ continue
+
+ if not meta_path.exists():
+ print(f"WARNING: no sidecar for {metrics_path.name}, skipping", file=sys.stderr)
+ continue
+ meta = load_json(meta_path)
+ if not isinstance(meta, dict):
+ print(f"WARNING: {meta_path.name}: not a JSON object, skipping", file=sys.stderr)
+ continue
+
+ rows.append(build_row(m, meta, args))
+ label = " / ".join(filter(None, [
+ meta.get("backend"),
+ meta.get("model"),
+ meta.get("quantization"),
+ meta.get("configuration") or meta.get("task"),
+ ]))
+ print(f" {label or metrics_path.name}", file=sys.stderr)
+
+ if not rows:
+ print("WARNING: no metrics loaded, nothing written", file=sys.stderr)
+ return
+
+ history = Path(args.history)
+ history.parent.mkdir(parents=True, exist_ok=True)
+ with open(history, "a") as f:
+ for row in rows:
+ f.write(json.dumps(row) + "\n")
+ print(f"Appended {len(rows)} row(s) to {history}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/write_metrics_sidecar.py b/scripts/write_metrics_sidecar.py
new file mode 100644
index 00000000..55e92920
--- /dev/null
+++ b/scripts/write_metrics_sidecar.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""
+Write a benchmark metadata sidecar JSON file from KEY=VALUE arguments.
+
+Values are stored as strings unless they parse as a valid JSON literal
+(true, false, null, or a number), making it easy to pass typed metadata
+from shell without quoting gymnastics.
+
+Usage:
+ python3 scripts/write_metrics_sidecar.py --out /path/to/file.meta.json \
+ backend=opencl \
+ task=llama-inference \
+ model_file=Llama-3.2-1B-Instruct-F16.gguf \
+ configuration=standard \
+ flags="" \
+ prompt="Say hello"
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+
+def coerce(value):
+ """Return value as a JSON-native type when unambiguous, otherwise keep as string."""
+ try:
+ return json.loads(value)
+ except (json.JSONDecodeError, ValueError):
+ return value
+
+
+def parse_args():
+ p = argparse.ArgumentParser(description=__doc__,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+ p.add_argument("--out", required=True, help="Output .meta.json path")
+ p.add_argument("fields", nargs="*", metavar="KEY=VALUE")
+ return p.parse_args()
+
+
+def main():
+ args = parse_args()
+ meta = {}
+ for field in args.fields:
+ if "=" not in field:
+ print(f"ERROR: expected KEY=VALUE, got: {field!r}", file=sys.stderr)
+ sys.exit(1)
+ key, _, value = field.partition("=")
+ meta[key] = coerce(value)
+
+ out = Path(args.out)
+ out.parent.mkdir(parents=True, exist_ok=True)
+ with open(out, "w") as f:
+ json.dump(meta, f)
+ print(f"Wrote sidecar: {out}", file=sys.stderr)
+
+
+if __name__ == "__main__":
+ main()