Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 41 additions & 40 deletions .github/workflows/tilegym-ci.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: MIT
# SPDX-License-Identifier: MIT

name: tilegym-ci

Expand Down Expand Up @@ -59,16 +59,16 @@ jobs:
script: |
let prBody = '';
let prNumber = '';

const branchName = context.ref.replace('refs/heads/', '');
core.info(`Looking for PR for branch: ${branchName}`);

// Try method 1: Extract PR number from branch name
const branchMatch = branchName.match(/^pull-request\/(\d+)/);
if (branchMatch) {
prNumber = branchMatch[1];
core.info(`Extracted PR #${prNumber} from branch name`);

// Fetch PR body by number
try {
const { data: pr } = await github.rest.pulls.get({
Expand All @@ -90,7 +90,7 @@ jobs:
state: 'open',
head: `${context.repo.owner}:${branchName}`,
});

if (prs.length > 0) {
prBody = prs[0].body || '';
prNumber = prs[0].number.toString();
Expand All @@ -103,7 +103,7 @@ jobs:
core.warning(`Error searching for PR: ${error.message}`);
}
}

return { prBody, prNumber };

- name: Parse config and set image tag
Expand All @@ -118,7 +118,7 @@ jobs:
if [[ "$IS_PR" == "true" ]]; then
pip install pyyaml --quiet
python3 .github/scripts/parse_pr_config.py

# Set PR-specific image tag
if [ -n "$PR_NUMBER" ]; then
echo "image_tag=pr-${PR_NUMBER}" >> $GITHUB_OUTPUT
Expand All @@ -130,15 +130,15 @@ jobs:
else
# Main/nightly: check if image already exists before building
echo "image_tag=${{ github.sha }}" >> $GITHUB_OUTPUT

# Check if 'latest' already points to current SHA (tests passed previously)
OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')
export REGISTRY_IMAGE="ghcr.io/${OWNER_LOWER}/${{ steps.context.outputs.image_name }}"
export IMAGE_TAG="${{ github.sha }}"
export IS_PR="false"

python3 .github/scripts/check_image_exists.py

# Read the skipped output from check_image_exists.py
if [ -f "$GITHUB_OUTPUT" ] && grep -q "skipped=true" "$GITHUB_OUTPUT"; then
echo "✅ Image already exists and tests passed, skipping build"
Expand All @@ -154,7 +154,7 @@ jobs:
echo "run_sanity=true" >> $GITHUB_OUTPUT
fi
fi

# Pass through image name from context
echo "image_name=${{ steps.context.outputs.image_name }}" >> $GITHUB_OUTPUT

Expand Down Expand Up @@ -187,7 +187,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Download TileGym wheel (Python 3.10, x86_64)
uses: actions/download-artifact@v4
with:
Expand All @@ -199,7 +199,7 @@ jobs:
run: |
OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')
REGISTRY_IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}"

echo "owner_lower=${OWNER_LOWER}" >> $GITHUB_OUTPUT
echo "registry_image=${REGISTRY_IMAGE}" >> $GITHUB_OUTPUT

Expand Down Expand Up @@ -227,13 +227,13 @@ jobs:
run: |
TAGS="${{ steps.vars.outputs.registry_image }}:${{ needs.config.outputs.image_tag }}"
TAGS="${TAGS},${{ steps.vars.outputs.registry_image }}:${{ github.sha }}"

# Add datetime tag for nightly builds
if [[ "${{ needs.config.outputs.is_pr }}" == "false" ]]; then
DATETIME=$(date -u +%Y%m%d-%H%M%S)
TAGS="${TAGS},${{ steps.vars.outputs.registry_image }}:nightly-${DATETIME}"
fi

echo "tags=${TAGS}" >> $GITHUB_OUTPUT

- name: Build and push Docker image to GHCR
Expand All @@ -242,7 +242,7 @@ jobs:
with:
context: .
file: ./modeling/transformers/Dockerfile
target: wheel # Use wheel target for CI builds
target: source
tags: ${{ steps.tags.outputs.tags }}
push: true
provenance: false
Expand Down Expand Up @@ -312,21 +312,22 @@ jobs:
run: |
OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')
IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}:${{ needs.config.outputs.image_tag }}"

docker pull ${IMAGE}
docker run --rm \
--gpus all \
-e DISABLE_AUTOTUNE=1 \
-v ${{ github.workspace }}/tests:/workspace/tilegym/tests \
-v ${{ github.workspace }}/test-results:/test-results \
-w /workspace/tilegym \
-w /workspace/tilegym/modeling/transformers \
${IMAGE} \
bash -c "pip install --no-cache-dir pytest-xdist pytest-html && \
pytest -s tests/ops tests/suites -v -k test_op \
uv run --locked --no-sync python -m pytest \
/workspace/tilegym/tests/ops /workspace/tilegym/tests/suites \
-v -k test_op \
-n 12 \
--junitxml=/test-results/ops-results.xml \
--html=/test-results/ops-report.html \
--self-contained-html"
--self-contained-html

- name: Upload test results
if: always()
Expand All @@ -349,7 +350,7 @@ jobs:
needs: [config, build]
timeout-minutes: 70
if: |
always() &&
always() &&
needs.config.outputs.run_benchmark == 'true' &&
(needs.build.result == 'success' || needs.build.result == 'skipped')
runs-on: linux-amd64-gpu-rtxpro6000-latest-1
Expand All @@ -375,21 +376,21 @@ jobs:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
echo "Attempting to download baseline benchmark results..."

# Find the most recent successful workflow run on main with baseline artifact
RUN_ID=$(gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"/repos/${{ github.repository }}/actions/workflows/tilegym-ci.yml/runs?branch=main&status=success&per_page=10" \
--jq '.workflow_runs[].id' | head -1)

if [ -z "$RUN_ID" ]; then
echo "⚠️ No successful workflow runs found on main branch"
exit 0
fi

echo "Found workflow run: $RUN_ID"

# Download baseline artifact from that run
mkdir -p ${{ github.workspace }}/baseline-results
if gh run download "$RUN_ID" \
Expand All @@ -413,15 +414,15 @@ jobs:
run: |
OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')
IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}:${{ needs.config.outputs.image_tag }}"

docker pull ${IMAGE}
docker run --rm \
--gpus all \
-v ${{ github.workspace }}/tests/benchmark:/workspace/tilegym/tests/benchmark \
-v ${{ github.workspace }}/test-results:/test-results \
-w /workspace/tilegym/tests/benchmark \
-w /workspace/tilegym/modeling/transformers \
${IMAGE} \
./run_all.sh /test-results --json
uv run --locked --no-sync bash /workspace/tilegym/tests/benchmark/run_all.sh /test-results --json

# Compare current results against baseline with three zones:
# - Regression zone (< -5%): Build fails
Expand All @@ -437,7 +438,7 @@ jobs:
if [ -d "${{ github.workspace }}/baseline-results" ] && [ "$(ls -A ${{ github.workspace }}/baseline-results/*.json 2>/dev/null)" ]; then
echo "Baseline results found, checking for regressions..."
echo "has_baseline=true" >> $GITHUB_OUTPUT

if python3 .github/scripts/check_benchmark_regression.py \
--current test-results \
--baseline baseline-results \
Expand All @@ -447,11 +448,11 @@ jobs:
--fail-on-regression; then
echo "✅ No regressions detected"
echo "passed=true" >> $GITHUB_OUTPUT

# Check if we should update baseline (only if significant improvements)
SHOULD_UPDATE=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['should_update_baseline'])" 2>/dev/null || echo "false")
echo "should_update_baseline=${SHOULD_UPDATE}" >> $GITHUB_OUTPUT

if [ "$SHOULD_UPDATE" == "True" ]; then
echo "🎉 Significant improvements detected - will update baseline (nightly only)"
else
Expand All @@ -470,7 +471,7 @@ jobs:
echo "passed=true" >> $GITHUB_OUTPUT
echo "should_update_baseline=true" >> $GITHUB_OUTPUT
fi

# Note: PR builds check for regressions but cannot update the baseline
if [ "${{ needs.config.outputs.is_pr }}" == "true" ]; then
echo ""
Expand Down Expand Up @@ -511,13 +512,13 @@ jobs:
steps.regression_check.outputs.has_baseline == 'true'
run: |
mkdir -p ${{ github.workspace }}/merged-baseline

python3 .github/scripts/merge_baseline_selective.py \
--old-baseline baseline-results \
--new-results test-results \
--regression-report test-results/regression_report.json \
--output merged-baseline

- name: Update baseline (nightly only - selective or full)
if: needs.config.outputs.is_pr == 'false'
uses: actions/upload-artifact@v4
Expand All @@ -526,7 +527,7 @@ jobs:
# Use merged baseline if it exists (partial update), otherwise use all new results (first run or all improved)
path: ${{ steps.regression_check.outputs.has_baseline == 'true' && 'merged-baseline/*.json' || 'test-results/*.json' }}
retention-days: 90

- name: Log baseline decision
if: always()
run: |
Expand All @@ -544,13 +545,13 @@ jobs:
TOTAL_FILES=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['total_benchmark_files'])" 2>/dev/null || echo "0")
FILES_WITH_REGRESSIONS=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['files_with_regressions'])" 2>/dev/null || echo "0")
FILES_SAFE_TO_UPDATE=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['files_safe_to_update'])" 2>/dev/null || echo "0")

echo "📊 Baseline Update Summary:"
echo " Total benchmark files: $TOTAL_FILES"
echo " Files with regressions: $FILES_WITH_REGRESSIONS"
echo " Files updated: $FILES_SAFE_TO_UPDATE"
echo ""

if [ "$FILES_WITH_REGRESSIONS" -gt 0 ]; then
echo "⚠️ SELECTIVE UPDATE: Some benchmarks regressed, keeping old baseline for those"
echo " ✅ Updated baseline for $FILES_SAFE_TO_UPDATE non-regressing benchmarks"
Expand Down Expand Up @@ -612,7 +613,7 @@ jobs:
OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]')
IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}"
SHA="${{ github.sha }}"

echo "Promoting ${IMAGE}:${SHA} to latest and adding verified tags (tests passed)"
docker buildx imagetools create \
-t ${IMAGE}:latest \
Expand Down
Loading
Loading