From 5da8a791ed1ec5b4ad64b5b3abaf09fa3db8f88d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Fri, 17 Oct 2025 09:30:08 +0000 Subject: [PATCH 1/2] Introduce pure builds --- .github/workflows/build_kernel.yaml | 40 +++++++++++++++++++--- .github/workflows/build_kernel_rocm.yaml | 15 +++++--- .github/workflows/build_kernel_xpu.yaml | 15 +++++--- .github/workflows/test_extra_commands.yaml | 11 +++--- 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/.github/workflows/build_kernel.yaml b/.github/workflows/build_kernel.yaml index e4b859c1..e2661c78 100644 --- a/.github/workflows/build_kernel.yaml +++ b/.github/workflows/build_kernel.yaml @@ -9,20 +9,25 @@ on: jobs: build: - name: Build kernel + name: Build kernels runs-on: - group: aws-g6-12xlarge-plus + group: aws-highmemory-32-plus-nix steps: - uses: actions/checkout@v4 - - uses: cachix/install-nix-action@v27 + - uses: DeterminateSystems/nix-installer-action@main with: - nix_path: nixpkgs=channel:nixos-unstable + extra-conf: | + max-jobs = 4 + cores = 12 + sandbox-fallback = false - uses: cachix/cachix-action@v14 with: name: huggingface #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" env: - USER: github_runner + USER: runner + - name: Nix info + run: nix-shell -p nix-info --run "nix-info -m" - name: Build activation kernel run: ( cd examples/activation && nix build .\#redistributable.torch29-cxx11-cu126-x86_64-linux ) - name: Copy activation kernel @@ -58,6 +63,31 @@ jobs: - name: Copy silu-and-mul-universal kernel run: cp -rL examples/silu-and-mul-universal/result silu-and-mul-universal-kernel + - name: Upload kernel artifacts + uses: actions/upload-artifact@v4 + with: + name: built-kernels + path: | + activation-kernel + cutlass-gemm-kernel + relu-kernel + relu-backprop-compile-kernel + silu-and-mul-universal-kernel + + test: + name: Test kernels + needs: build + runs-on: + group: aws-g6-12xlarge-plus + steps: + - uses: actions/checkout@v4 + + - name: Download kernel artifacts + uses: actions/download-artifact@v4 + with: + name: built-kernels + path: . + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build Docker image diff --git a/.github/workflows/build_kernel_rocm.yaml b/.github/workflows/build_kernel_rocm.yaml index 9e46b7ec..a6acbf04 100644 --- a/.github/workflows/build_kernel_rocm.yaml +++ b/.github/workflows/build_kernel_rocm.yaml @@ -11,18 +11,23 @@ jobs: build: name: Build kernel runs-on: - group: aws-g6-12xlarge-plus + group: aws-highmemory-32-plus-nix steps: - uses: actions/checkout@v4 - - uses: cachix/install-nix-action@v27 + - uses: DeterminateSystems/nix-installer-action@main with: - nix_path: nixpkgs=channel:nixos-unstable + extra-conf: | + max-jobs = 4 + cores = 12 + sandbox-fallback = false - uses: cachix/cachix-action@v14 with: name: huggingface - #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" env: - USER: github_runner + USER: runner + #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" + - name: Nix info + run: nix-shell -p nix-info --run "nix-info -m" # For now we only test that there are no regressions in building ROCm # kernels. Also run tests once we have a ROCm runner. - name: Build relu kernel diff --git a/.github/workflows/build_kernel_xpu.yaml b/.github/workflows/build_kernel_xpu.yaml index ebf40ecf..fb13b45d 100644 --- a/.github/workflows/build_kernel_xpu.yaml +++ b/.github/workflows/build_kernel_xpu.yaml @@ -11,18 +11,23 @@ jobs: build: name: Build kernel runs-on: - group: aws-g6-12xlarge-plus + group: aws-highmemory-32-plus-nix steps: - uses: actions/checkout@v4 - - uses: cachix/install-nix-action@v27 + - uses: DeterminateSystems/nix-installer-action@main with: - nix_path: nixpkgs=channel:nixos-unstable + extra-conf: | + max-jobs = 4 + cores = 12 + sandbox-fallback = false - uses: cachix/cachix-action@v14 with: name: huggingface - #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" env: - USER: github_runner + USER: runner + #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" + - name: Nix info + run: nix-shell -p nix-info --run "nix-info -m" # For now we only test that there are no regressions in building XPU # kernels. Also run tests once we have a XPU runner. - name: Build relu kernel diff --git a/.github/workflows/test_extra_commands.yaml b/.github/workflows/test_extra_commands.yaml index 7f26ccc4..ef09c6e7 100644 --- a/.github/workflows/test_extra_commands.yaml +++ b/.github/workflows/test_extra_commands.yaml @@ -10,18 +10,19 @@ on: jobs: build: name: Build kernel - runs-on: - group: aws-g6-12xlarge-plus steps: - uses: actions/checkout@v4 - - uses: cachix/install-nix-action@v27 + - uses: DeterminateSystems/nix-installer-action@main with: - nix_path: nixpkgs=channel:nixos-unstable + extra-conf: | + max-jobs = 4 + cores = 12 + sandbox-fallback = false - uses: cachix/cachix-action@v14 with: name: huggingface #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" env: - USER: github_runner + USER: runner - name: Test nix run .#kernels run: ( cd examples/relu ; nix run .#kernels -- lock ../../tests/run-kernels ) From 3e8075f4bdca61c34763a8193e45e45d14d23c61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Fri, 17 Oct 2025 09:59:28 +0000 Subject: [PATCH 2/2] Re-enable Cachix pushes We can do this now because builds are properly sandboxed. --- .github/workflows/build_kernel.yaml | 4 ++-- .github/workflows/build_kernel_rocm.yaml | 2 +- .github/workflows/build_kernel_xpu.yaml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_kernel.yaml b/.github/workflows/build_kernel.yaml index e2661c78..ad3327db 100644 --- a/.github/workflows/build_kernel.yaml +++ b/.github/workflows/build_kernel.yaml @@ -20,10 +20,10 @@ jobs: max-jobs = 4 cores = 12 sandbox-fallback = false - - uses: cachix/cachix-action@v14 + - uses: cachix/cachix-action@v16 with: name: huggingface - #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" + authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" env: USER: runner - name: Nix info diff --git a/.github/workflows/build_kernel_rocm.yaml b/.github/workflows/build_kernel_rocm.yaml index a6acbf04..d8636444 100644 --- a/.github/workflows/build_kernel_rocm.yaml +++ b/.github/workflows/build_kernel_rocm.yaml @@ -23,9 +23,9 @@ jobs: - uses: cachix/cachix-action@v14 with: name: huggingface + authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" env: USER: runner - #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" - name: Nix info run: nix-shell -p nix-info --run "nix-info -m" # For now we only test that there are no regressions in building ROCm diff --git a/.github/workflows/build_kernel_xpu.yaml b/.github/workflows/build_kernel_xpu.yaml index fb13b45d..e797cdd9 100644 --- a/.github/workflows/build_kernel_xpu.yaml +++ b/.github/workflows/build_kernel_xpu.yaml @@ -20,12 +20,12 @@ jobs: max-jobs = 4 cores = 12 sandbox-fallback = false - - uses: cachix/cachix-action@v14 + - uses: cachix/cachix-action@v16 with: name: huggingface + authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" env: USER: runner - #authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}" - name: Nix info run: nix-shell -p nix-info --run "nix-info -m" # For now we only test that there are no regressions in building XPU