From 51b0c8f5e115ce6b79cd32512e3c808760e78c40 Mon Sep 17 00:00:00 2001 From: "Rundong (David) Li" Date: Mon, 15 Jun 2026 00:55:51 -0700 Subject: [PATCH 1/2] fix: run public CI through hf-bench uv env --- .github/workflows/tilegym-ci.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tilegym-ci.yml b/.github/workflows/tilegym-ci.yml index 3c31e4cc..2414dbd9 100644 --- a/.github/workflows/tilegym-ci.yml +++ b/.github/workflows/tilegym-ci.yml @@ -242,7 +242,7 @@ jobs: with: context: . file: ./modeling/transformers/Dockerfile - target: wheel # Use wheel target for CI builds + target: source tags: ${{ steps.tags.outputs.tags }} push: true provenance: false @@ -319,14 +319,15 @@ jobs: -e DISABLE_AUTOTUNE=1 \ -v ${{ github.workspace }}/tests:/workspace/tilegym/tests \ -v ${{ github.workspace }}/test-results:/test-results \ - -w /workspace/tilegym \ + -w /workspace/tilegym/modeling/transformers \ ${IMAGE} \ - bash -c "pip install --no-cache-dir pytest-xdist pytest-html && \ - pytest -s tests/ops tests/suites -v -k test_op \ + uv run --locked --no-sync python -m pytest \ + /workspace/tilegym/tests/ops /workspace/tilegym/tests/suites \ + -v -k test_op \ -n 12 \ --junitxml=/test-results/ops-results.xml \ --html=/test-results/ops-report.html \ - --self-contained-html" + --self-contained-html - name: Upload test results if: always() @@ -419,9 +420,9 @@ jobs: --gpus all \ -v ${{ github.workspace }}/tests/benchmark:/workspace/tilegym/tests/benchmark \ -v ${{ github.workspace }}/test-results:/test-results \ - -w /workspace/tilegym/tests/benchmark \ + -w /workspace/tilegym/modeling/transformers \ ${IMAGE} \ - ./run_all.sh /test-results --json + uv run --locked --no-sync bash /workspace/tilegym/tests/benchmark/run_all.sh /test-results --json # Compare current results against baseline with three zones: # - Regression zone (< -5%): Build fails From f529b594d1db4f1e5cf43b1bc34ba92bde7d1a73 Mon Sep 17 00:00:00 2001 From: "Rundong (David) Li" Date: Mon, 15 Jun 2026 02:58:21 -0700 Subject: [PATCH 2/2] style: format tilegym ci workflow --- .github/workflows/tilegym-ci.yml | 66 ++++++++++++++++---------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/.github/workflows/tilegym-ci.yml b/.github/workflows/tilegym-ci.yml index 2414dbd9..f5cdf1a3 100644 --- a/.github/workflows/tilegym-ci.yml +++ b/.github/workflows/tilegym-ci.yml @@ -1,6 +1,6 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # -# SPDX-License-Identifier: MIT +# SPDX-License-Identifier: MIT name: tilegym-ci @@ -59,16 +59,16 @@ jobs: script: | let prBody = ''; let prNumber = ''; - + const branchName = context.ref.replace('refs/heads/', ''); core.info(`Looking for PR for branch: ${branchName}`); - + // Try method 1: Extract PR number from branch name const branchMatch = branchName.match(/^pull-request\/(\d+)/); if (branchMatch) { prNumber = branchMatch[1]; core.info(`Extracted PR #${prNumber} from branch name`); - + // Fetch PR body by number try { const { data: pr } = await github.rest.pulls.get({ @@ -90,7 +90,7 @@ jobs: state: 'open', head: `${context.repo.owner}:${branchName}`, }); - + if (prs.length > 0) { prBody = prs[0].body || ''; prNumber = prs[0].number.toString(); @@ -103,7 +103,7 @@ jobs: core.warning(`Error searching for PR: ${error.message}`); } } - + return { prBody, prNumber }; - name: Parse config and set image tag @@ -118,7 +118,7 @@ jobs: if [[ "$IS_PR" == "true" ]]; then pip install pyyaml --quiet python3 .github/scripts/parse_pr_config.py - + # Set PR-specific image tag if [ -n "$PR_NUMBER" ]; then echo "image_tag=pr-${PR_NUMBER}" >> $GITHUB_OUTPUT @@ -130,15 +130,15 @@ jobs: else # Main/nightly: check if image already exists before building echo "image_tag=${{ github.sha }}" >> $GITHUB_OUTPUT - + # Check if 'latest' already points to current SHA (tests passed previously) OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') export REGISTRY_IMAGE="ghcr.io/${OWNER_LOWER}/${{ steps.context.outputs.image_name }}" export IMAGE_TAG="${{ github.sha }}" export IS_PR="false" - + python3 .github/scripts/check_image_exists.py - + # Read the skipped output from check_image_exists.py if [ -f "$GITHUB_OUTPUT" ] && grep -q "skipped=true" "$GITHUB_OUTPUT"; then echo "✅ Image already exists and tests passed, skipping build" @@ -154,7 +154,7 @@ jobs: echo "run_sanity=true" >> $GITHUB_OUTPUT fi fi - + # Pass through image name from context echo "image_name=${{ steps.context.outputs.image_name }}" >> $GITHUB_OUTPUT @@ -187,7 +187,7 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - + - name: Download TileGym wheel (Python 3.10, x86_64) uses: actions/download-artifact@v4 with: @@ -199,7 +199,7 @@ jobs: run: | OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') REGISTRY_IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}" - + echo "owner_lower=${OWNER_LOWER}" >> $GITHUB_OUTPUT echo "registry_image=${REGISTRY_IMAGE}" >> $GITHUB_OUTPUT @@ -227,13 +227,13 @@ jobs: run: | TAGS="${{ steps.vars.outputs.registry_image }}:${{ needs.config.outputs.image_tag }}" TAGS="${TAGS},${{ steps.vars.outputs.registry_image }}:${{ github.sha }}" - + # Add datetime tag for nightly builds if [[ "${{ needs.config.outputs.is_pr }}" == "false" ]]; then DATETIME=$(date -u +%Y%m%d-%H%M%S) TAGS="${TAGS},${{ steps.vars.outputs.registry_image }}:nightly-${DATETIME}" fi - + echo "tags=${TAGS}" >> $GITHUB_OUTPUT - name: Build and push Docker image to GHCR @@ -312,7 +312,7 @@ jobs: run: | OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}:${{ needs.config.outputs.image_tag }}" - + docker pull ${IMAGE} docker run --rm \ --gpus all \ @@ -350,7 +350,7 @@ jobs: needs: [config, build] timeout-minutes: 70 if: | - always() && + always() && needs.config.outputs.run_benchmark == 'true' && (needs.build.result == 'success' || needs.build.result == 'skipped') runs-on: linux-amd64-gpu-rtxpro6000-latest-1 @@ -376,21 +376,21 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | echo "Attempting to download baseline benchmark results..." - + # Find the most recent successful workflow run on main with baseline artifact RUN_ID=$(gh api \ -H "Accept: application/vnd.github+json" \ -H "X-GitHub-Api-Version: 2022-11-28" \ "/repos/${{ github.repository }}/actions/workflows/tilegym-ci.yml/runs?branch=main&status=success&per_page=10" \ --jq '.workflow_runs[].id' | head -1) - + if [ -z "$RUN_ID" ]; then echo "⚠️ No successful workflow runs found on main branch" exit 0 fi - + echo "Found workflow run: $RUN_ID" - + # Download baseline artifact from that run mkdir -p ${{ github.workspace }}/baseline-results if gh run download "$RUN_ID" \ @@ -414,7 +414,7 @@ jobs: run: | OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}:${{ needs.config.outputs.image_tag }}" - + docker pull ${IMAGE} docker run --rm \ --gpus all \ @@ -438,7 +438,7 @@ jobs: if [ -d "${{ github.workspace }}/baseline-results" ] && [ "$(ls -A ${{ github.workspace }}/baseline-results/*.json 2>/dev/null)" ]; then echo "Baseline results found, checking for regressions..." echo "has_baseline=true" >> $GITHUB_OUTPUT - + if python3 .github/scripts/check_benchmark_regression.py \ --current test-results \ --baseline baseline-results \ @@ -448,11 +448,11 @@ jobs: --fail-on-regression; then echo "✅ No regressions detected" echo "passed=true" >> $GITHUB_OUTPUT - + # Check if we should update baseline (only if significant improvements) SHOULD_UPDATE=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['should_update_baseline'])" 2>/dev/null || echo "false") echo "should_update_baseline=${SHOULD_UPDATE}" >> $GITHUB_OUTPUT - + if [ "$SHOULD_UPDATE" == "True" ]; then echo "🎉 Significant improvements detected - will update baseline (nightly only)" else @@ -471,7 +471,7 @@ jobs: echo "passed=true" >> $GITHUB_OUTPUT echo "should_update_baseline=true" >> $GITHUB_OUTPUT fi - + # Note: PR builds check for regressions but cannot update the baseline if [ "${{ needs.config.outputs.is_pr }}" == "true" ]; then echo "" @@ -512,13 +512,13 @@ jobs: steps.regression_check.outputs.has_baseline == 'true' run: | mkdir -p ${{ github.workspace }}/merged-baseline - + python3 .github/scripts/merge_baseline_selective.py \ --old-baseline baseline-results \ --new-results test-results \ --regression-report test-results/regression_report.json \ --output merged-baseline - + - name: Update baseline (nightly only - selective or full) if: needs.config.outputs.is_pr == 'false' uses: actions/upload-artifact@v4 @@ -527,7 +527,7 @@ jobs: # Use merged baseline if it exists (partial update), otherwise use all new results (first run or all improved) path: ${{ steps.regression_check.outputs.has_baseline == 'true' && 'merged-baseline/*.json' || 'test-results/*.json' }} retention-days: 90 - + - name: Log baseline decision if: always() run: | @@ -545,13 +545,13 @@ jobs: TOTAL_FILES=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['total_benchmark_files'])" 2>/dev/null || echo "0") FILES_WITH_REGRESSIONS=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['files_with_regressions'])" 2>/dev/null || echo "0") FILES_SAFE_TO_UPDATE=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['files_safe_to_update'])" 2>/dev/null || echo "0") - + echo "📊 Baseline Update Summary:" echo " Total benchmark files: $TOTAL_FILES" echo " Files with regressions: $FILES_WITH_REGRESSIONS" echo " Files updated: $FILES_SAFE_TO_UPDATE" echo "" - + if [ "$FILES_WITH_REGRESSIONS" -gt 0 ]; then echo "⚠️ SELECTIVE UPDATE: Some benchmarks regressed, keeping old baseline for those" echo " ✅ Updated baseline for $FILES_SAFE_TO_UPDATE non-regressing benchmarks" @@ -613,7 +613,7 @@ jobs: OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}" SHA="${{ github.sha }}" - + echo "Promoting ${IMAGE}:${SHA} to latest and adding verified tags (tests passed)" docker buildx imagetools create \ -t ${IMAGE}:latest \