From df1d2244a0b3b82d73d2df4ef42f6569c7cea5bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 9 Oct 2025 11:00:42 +0000 Subject: [PATCH 01/12] Use binary wheels for Torch --- lib/build-sets.nix | 13 ++++++++++--- lib/deps.nix | 2 +- lib/torch-extension/arch.nix | 28 +++++++++++++++++++--------- versions.nix | 15 ++++++++------- 4 files changed, 38 insertions(+), 20 deletions(-) diff --git a/lib/build-sets.nix b/lib/build-sets.nix index 88c46041..8d407471 100644 --- a/lib/build-sets.nix +++ b/lib/build-sets.nix @@ -71,6 +71,7 @@ let cxx11Abi, system, bundleBuild ? false, + sourceBuild ? false, }: let pkgs = @@ -84,9 +85,15 @@ let pkgsByXpuVer.${xpuVersion} else throw "No compute framework set in Torch version"; - torch = pkgs.python3.pkgs."torch_${flattenVersion torchVersion}".override { - inherit cxx11Abi; - }; + torch = + if sourceBuild then + pkgs.python3.pkgs."torch_${flattenVersion torchVersion}".override { + inherit cxx11Abi; + } + else + pkgs.python3.pkgs."torch-bin_${flattenVersion torchVersion}".override { + inherit cxx11Abi; + }; extension = pkgs.callPackage ./torch-extension { inherit torch; }; in { diff --git a/lib/deps.nix b/lib/deps.nix index bffecf01..9e8c6c81 100644 --- a/lib/deps.nix +++ b/lib/deps.nix @@ -30,7 +30,7 @@ let ]; "torch" = [ torch - torch.cxxdev + #torch.cxxdev ]; "cutlass_sycl" = [ torch.xpuPackages.cutlass-sycl ]; }; diff --git a/lib/torch-extension/arch.nix b/lib/torch-extension/arch.nix index 0bee5663..3ce05e7b 100644 --- a/lib/torch-extension/arch.nix +++ b/lib/torch-extension/arch.nix @@ -111,7 +111,7 @@ stdenv.mkDerivation (prevAttrs: { buildInputs = [ torch - torch.cxxdev + #torch.cxxdev ] ++ lib.optionals cudaSupport ( with cudaPackages; @@ -129,7 +129,20 @@ stdenv.mkDerivation (prevAttrs: { ++ lib.optionals rocmSupport ( with rocmPackages; [ + hipblas + hipblaslt + hipcub-devel + hipfft + hiprand + hipsolver + hipsparse hipsparselt + miopen-hip + rccl + rocblas + rocprim-devel + rocrand + rocthrust-devel rocwmma-devel ] ) @@ -145,17 +158,11 @@ stdenv.mkDerivation (prevAttrs: { env = lib.optionalAttrs cudaSupport { CUDAToolkit_ROOT = "${lib.getDev cudaPackages.cuda_nvcc}"; - TORCH_CUDA_ARCH_LIST = - if cudaPackages.cudaOlder "12.8" then - "7.0;7.5;8.0;8.6;8.9;9.0" - else if cudaPackages.cudaOlder "13.0" then - "7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0" - else - # sm_101 has been renamed to sm_110 in CUDA 13. - "7.5;8.0;8.6;8.9;9.0;10.0;11.0;12.0"; + TORCH_CUDA_ARCH_LIST = lib.concatStringsSep ";" torch.cudaCapabilities; } // lib.optionalAttrs rocmSupport { PYTORCH_ROCM_ARCH = lib.concatStringsSep ";" torch.rocmArchs; + ROCM_PATH = "${clr}"; } // lib.optionalAttrs xpuSupport { MKLROOT = oneapi-torch-dev; @@ -167,6 +174,9 @@ stdenv.mkDerivation (prevAttrs: { cmakeFlags = [ (lib.cmakeFeature "Python_EXECUTABLE" "${python3.withPackages (ps: [ torch ])}/bin/python") + # Fix: file RPATH_CHANGE could not write new RPATH, we are rewriting + # rpaths anyway. + (lib.cmakeBool "CMAKE_SKIP_RPATH" true) ] ++ lib.optionals cudaSupport [ (lib.cmakeFeature "CMAKE_CUDA_HOST_COMPILER" "${stdenv.cc}/bin/g++") diff --git a/versions.nix b/versions.nix index 5b43fea1..6ea12bcd 100644 --- a/versions.nix +++ b/versions.nix @@ -32,13 +32,6 @@ systems = [ "x86_64-linux" ]; bundleBuild = true; } - { - torchVersion = "2.7"; - rocmVersion = "6.4.2"; - cxx11Abi = true; - systems = [ "x86_64-linux" ]; - bundleBuild = false; - } { torchVersion = "2.7"; xpuVersion = "2025.0.2"; @@ -116,6 +109,7 @@ cxx11Abi = true; systems = [ "x86_64-linux" ]; bundleBuild = true; + sourceBuild = true; } { torchVersion = "2.9"; @@ -126,6 +120,7 @@ "aarch64-linux" ]; bundleBuild = true; + sourceBuild = true; } { torchVersion = "2.9"; @@ -136,6 +131,7 @@ "aarch64-linux" ]; bundleBuild = true; + sourceBuild = true; } { torchVersion = "2.9"; @@ -146,6 +142,7 @@ "aarch64-linux" ]; bundleBuild = true; + sourceBuild = true; } { torchVersion = "2.9"; @@ -153,6 +150,7 @@ cxx11Abi = true; systems = [ "x86_64-linux" ]; bundleBuild = true; + sourceBuild = true; } { torchVersion = "2.9"; @@ -160,6 +158,7 @@ cxx11Abi = true; systems = [ "x86_64-linux" ]; bundleBuild = true; + sourceBuild = true; } { torchVersion = "2.9"; @@ -167,6 +166,7 @@ metal = true; systems = [ "aarch64-darwin" ]; bundleBuild = true; + sourceBuild = true; } # Non-standard versions; not included in bundle builds. @@ -178,5 +178,6 @@ "x86_64-linux" "aarch64-linux" ]; + sourceBuild = true; } ] From b8314d304faf7d9c2f56c9f333bc12f23f0944d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Wed, 15 Oct 2025 14:11:48 +0000 Subject: [PATCH 02/12] Use the cxxdev output for torch-bin as well --- flake.lock | 6 +++--- lib/torch-extension/arch.nix | 13 +------------ 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/flake.lock b/flake.lock index a42e6625..3c5c54b0 100644 --- a/flake.lock +++ b/flake.lock @@ -73,11 +73,11 @@ "nixpkgs": "nixpkgs" }, "locked": { - "lastModified": 1760519460, - "narHash": "sha256-AL6vLcmL4nShgJ/Rqr7Rml1QMs/MuyCdfEqoGNHN8Jk=", + "lastModified": 1760537150, + "narHash": "sha256-BBCeZqkc3p3aOe91GMRBI63gGJ/UrtpkJEjylBiBol8=", "owner": "huggingface", "repo": "hf-nix", - "rev": "6ca864b261ec3e9228d91f794ca0f10cd1766e9f", + "rev": "65870ec5ba8dc58358c02780fafc699ef4e832c3", "type": "github" }, "original": { diff --git a/lib/torch-extension/arch.nix b/lib/torch-extension/arch.nix index 3ce05e7b..f5baaccf 100644 --- a/lib/torch-extension/arch.nix +++ b/lib/torch-extension/arch.nix @@ -111,7 +111,7 @@ stdenv.mkDerivation (prevAttrs: { buildInputs = [ torch - #torch.cxxdev + torch.cxxdev ] ++ lib.optionals cudaSupport ( with cudaPackages; @@ -129,19 +129,9 @@ stdenv.mkDerivation (prevAttrs: { ++ lib.optionals rocmSupport ( with rocmPackages; [ - hipblas - hipblaslt hipcub-devel - hipfft - hiprand - hipsolver - hipsparse hipsparselt - miopen-hip - rccl - rocblas rocprim-devel - rocrand rocthrust-devel rocwmma-devel ] @@ -162,7 +152,6 @@ stdenv.mkDerivation (prevAttrs: { } // lib.optionalAttrs rocmSupport { PYTORCH_ROCM_ARCH = lib.concatStringsSep ";" torch.rocmArchs; - ROCM_PATH = "${clr}"; } // lib.optionalAttrs xpuSupport { MKLROOT = oneapi-torch-dev; From 25afb1e329b2e710c24c00a6c9a3991499fc30aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 10:29:53 +0000 Subject: [PATCH 03/12] Use Torch 2.9 from binary wheels, remove Torch 2.7 --- flake.lock | 6 +++--- versions.nix | 52 ---------------------------------------------------- 2 files changed, 3 insertions(+), 55 deletions(-) diff --git a/flake.lock b/flake.lock index 3c5c54b0..2d97a181 100644 --- a/flake.lock +++ b/flake.lock @@ -73,11 +73,11 @@ "nixpkgs": "nixpkgs" }, "locked": { - "lastModified": 1760537150, - "narHash": "sha256-BBCeZqkc3p3aOe91GMRBI63gGJ/UrtpkJEjylBiBol8=", + "lastModified": 1760610538, + "narHash": "sha256-irTnU9jJzMYLX1Nw4K9qIdQKQG/7Hj/iTy5YbJ8gz/Q=", "owner": "huggingface", "repo": "hf-nix", - "rev": "65870ec5ba8dc58358c02780fafc699ef4e832c3", + "rev": "eaf2c438cc369784752e5526a64c1b49c62f4fb2", "type": "github" }, "original": { diff --git a/versions.nix b/versions.nix index 6ea12bcd..c16cf4a9 100644 --- a/versions.nix +++ b/versions.nix @@ -1,52 +1,4 @@ [ - { - torchVersion = "2.7"; - cudaVersion = "11.8"; - cxx11Abi = true; - systems = [ "x86_64-linux" ]; - bundleBuild = true; - } - { - torchVersion = "2.7"; - cudaVersion = "12.6"; - cxx11Abi = true; - systems = [ - "x86_64-linux" - ]; - bundleBuild = true; - } - { - torchVersion = "2.7"; - cudaVersion = "12.8"; - cxx11Abi = true; - systems = [ - "x86_64-linux" - "aarch64-linux" - ]; - bundleBuild = true; - } - { - torchVersion = "2.7"; - rocmVersion = "6.3.4"; - cxx11Abi = true; - systems = [ "x86_64-linux" ]; - bundleBuild = true; - } - { - torchVersion = "2.7"; - xpuVersion = "2025.0.2"; - cxx11Abi = true; - systems = [ "x86_64-linux" ]; - bundleBuild = true; - } - { - torchVersion = "2.7"; - cxx11Abi = true; - metal = true; - systems = [ "aarch64-darwin" ]; - bundleBuild = true; - } - { torchVersion = "2.8"; xpuVersion = "2025.1.3"; @@ -109,7 +61,6 @@ cxx11Abi = true; systems = [ "x86_64-linux" ]; bundleBuild = true; - sourceBuild = true; } { torchVersion = "2.9"; @@ -120,7 +71,6 @@ "aarch64-linux" ]; bundleBuild = true; - sourceBuild = true; } { torchVersion = "2.9"; @@ -131,7 +81,6 @@ "aarch64-linux" ]; bundleBuild = true; - sourceBuild = true; } { torchVersion = "2.9"; @@ -142,7 +91,6 @@ "aarch64-linux" ]; bundleBuild = true; - sourceBuild = true; } { torchVersion = "2.9"; From b45e5963c2d459a268948874816714ad6a910182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 13:22:59 +0000 Subject: [PATCH 04/12] Update hf-nix for AArch64 support --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 2d97a181..5e6b0af8 100644 --- a/flake.lock +++ b/flake.lock @@ -73,11 +73,11 @@ "nixpkgs": "nixpkgs" }, "locked": { - "lastModified": 1760610538, - "narHash": "sha256-irTnU9jJzMYLX1Nw4K9qIdQKQG/7Hj/iTy5YbJ8gz/Q=", + "lastModified": 1760620968, + "narHash": "sha256-8xJEJxO2MKwlDFVueQv6dM/iAwPVCKrYskie+j2vR60=", "owner": "huggingface", "repo": "hf-nix", - "rev": "eaf2c438cc369784752e5526a64c1b49c62f4fb2", + "rev": "c7a79829e226b2275a404ad6b86915fb35f036e1", "type": "github" }, "original": { From c60f7fd2c910fc54496b89c7dab7cace79a2d5e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 15:35:46 +0200 Subject: [PATCH 05/12] Use binary Torch 2.9 wheel on macOS --- versions.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/versions.nix b/versions.nix index c16cf4a9..aac8aa8b 100644 --- a/versions.nix +++ b/versions.nix @@ -114,7 +114,6 @@ metal = true; systems = [ "aarch64-darwin" ]; bundleBuild = true; - sourceBuild = true; } # Non-standard versions; not included in bundle builds. From d0470492dca59441b0a98e62da37d6ed25dbc92f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 13:38:29 +0000 Subject: [PATCH 06/12] Update build variants --- build-variants.json | 7 ------- docs/build-variants.md | 7 ------- 2 files changed, 14 deletions(-) diff --git a/build-variants.json b/build-variants.json index eb4e85d6..86e49cdc 100644 --- a/build-variants.json +++ b/build-variants.json @@ -1,14 +1,12 @@ { "aarch64-darwin": { "metal": [ - "torch27-metal-aarch64-darwin", "torch28-metal-aarch64-darwin", "torch29-metal-aarch64-darwin" ] }, "aarch64-linux": { "cuda": [ - "torch27-cxx11-cu128-aarch64-linux", "torch28-cxx11-cu129-aarch64-linux", "torch29-cxx11-cu126-aarch64-linux", "torch29-cxx11-cu128-aarch64-linux", @@ -17,9 +15,6 @@ }, "x86_64-linux": { "cuda": [ - "torch27-cxx11-cu118-x86_64-linux", - "torch27-cxx11-cu126-x86_64-linux", - "torch27-cxx11-cu128-x86_64-linux", "torch28-cxx11-cu126-x86_64-linux", "torch28-cxx11-cu128-x86_64-linux", "torch28-cxx11-cu129-x86_64-linux", @@ -28,14 +23,12 @@ "torch29-cxx11-cu130-x86_64-linux" ], "rocm": [ - "torch27-cxx11-rocm63-x86_64-linux", "torch28-cxx11-rocm63-x86_64-linux", "torch28-cxx11-rocm64-x86_64-linux", "torch29-cxx11-rocm63-x86_64-linux", "torch29-cxx11-rocm64-x86_64-linux" ], "xpu": [ - "torch27-cxx11-xpu20250-x86_64-linux", "torch28-cxx11-xpu20251-x86_64-linux", "torch29-cxx11-xpu20252-x86_64-linux" ] diff --git a/docs/build-variants.md b/docs/build-variants.md index ed7e3f2b..39b5c975 100644 --- a/docs/build-variants.md +++ b/docs/build-variants.md @@ -7,13 +7,11 @@ available. This list will be updated as new PyTorch versions are released. ## Metal aarch64-darwin -- `torch27-metal-aarch64-darwin` - `torch28-metal-aarch64-darwin` - `torch29-metal-aarch64-darwin` ## CUDA aarch64-linux -- `torch27-cxx11-cu128-aarch64-linux` - `torch28-cxx11-cu129-aarch64-linux` - `torch29-cxx11-cu126-aarch64-linux` - `torch29-cxx11-cu128-aarch64-linux` @@ -21,9 +19,6 @@ available. This list will be updated as new PyTorch versions are released. ## CUDA x86_64-linux -- `torch27-cxx11-cu118-x86_64-linux` -- `torch27-cxx11-cu126-x86_64-linux` -- `torch27-cxx11-cu128-x86_64-linux` - `torch28-cxx11-cu126-x86_64-linux` - `torch28-cxx11-cu128-x86_64-linux` - `torch28-cxx11-cu129-x86_64-linux` @@ -33,7 +28,6 @@ available. This list will be updated as new PyTorch versions are released. ## ROCm x86_64-linux -- `torch27-cxx11-rocm63-x86_64-linux` - `torch28-cxx11-rocm63-x86_64-linux` - `torch28-cxx11-rocm64-x86_64-linux` - `torch29-cxx11-rocm63-x86_64-linux` @@ -41,7 +35,6 @@ available. This list will be updated as new PyTorch versions are released. ## XPU x86_64-linux -- `torch27-cxx11-xpu20250-x86_64-linux` - `torch28-cxx11-xpu20251-x86_64-linux` - `torch29-cxx11-xpu20252-x86_64-linux` From 0a468877c62eed6c2ce81c676c4b14c7d24097a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 13:41:42 +0000 Subject: [PATCH 07/12] Update tests/docs Torch versions --- .github/workflows/build_kernel.yaml | 10 +++++----- .github/workflows/build_kernel_macos.yaml | 2 +- .github/workflows/build_kernel_rocm.yaml | 2 +- .github/workflows/build_kernel_xpu.yaml | 2 +- docs/docker.md | 2 +- docs/nix.md | 2 +- tests/Dockerfile.test-kernel | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build_kernel.yaml b/.github/workflows/build_kernel.yaml index fe76e261..f2e92d81 100644 --- a/.github/workflows/build_kernel.yaml +++ b/.github/workflows/build_kernel.yaml @@ -24,22 +24,22 @@ jobs: env: USER: github_runner - name: Build activation kernel - run: ( cd examples/activation && nix build .\#redistributable.torch27-cxx11-cu126-x86_64-linux ) + run: ( cd examples/activation && nix build .\#redistributable.torch29-cxx11-cu126-x86_64-linux ) - name: Copy activation kernel run: cp -rL examples/activation/result activation-kernel - name: Build cutlass GEMM kernel - run: ( cd examples/cutlass-gemm && nix build .\#redistributable.torch27-cxx11-cu126-x86_64-linux ) + run: ( cd examples/cutlass-gemm && nix build .\#redistributable.torch29-cxx11-cu126-x86_64-linux ) - name: Copy cutlass GEMM kernel run: cp -rL examples/cutlass-gemm/result cutlass-gemm-kernel - name: Build relu kernel - run: ( cd examples/relu && nix build .\#redistributable.torch27-cxx11-cu126-x86_64-linux ) + run: ( cd examples/relu && nix build .\#redistributable.torch29-cxx11-cu126-x86_64-linux ) - name: Copy relu kernel run: cp -rL examples/relu/result relu-kernel - name: Build relu-backprop-compile kernel - run: ( cd examples/relu-backprop-compile && nix build .\#redistributable.torch27-cxx11-cu126-x86_64-linux ) + run: ( cd examples/relu-backprop-compile && nix build .\#redistributable.torch29-cxx11-cu126-x86_64-linux ) - name: Copy relu-backprop-compile kernel run: cp -rL examples/relu-backprop-compile/result relu-backprop-compile-kernel @@ -51,7 +51,7 @@ jobs: run: ( cd examples/relu && nix build .#devShells.x86_64-linux.test ) - name: Build silu-and-mul-universal kernel - run: ( cd examples/silu-and-mul-universal && nix build .\#redistributable.torch27-cxx11-cu126-x86_64-linux ) + run: ( cd examples/silu-and-mul-universal && nix build .\#redistributable.torch29-cxx11-cu126-x86_64-linux ) - name: Copy silu-and-mul-universal kernel run: cp -rL examples/silu-and-mul-universal/result silu-and-mul-universal-kernel diff --git a/.github/workflows/build_kernel_macos.yaml b/.github/workflows/build_kernel_macos.yaml index 7b6d9f79..9a8a995c 100644 --- a/.github/workflows/build_kernel_macos.yaml +++ b/.github/workflows/build_kernel_macos.yaml @@ -21,4 +21,4 @@ jobs: # For now we only test that there are no regressions in building macOS # kernels. Also run tests once we have a macOS runner. - name: Build relu kernel - run: ( cd examples/relu && nix build .\#redistributable.torch27-metal-aarch64-darwin -L ) + run: ( cd examples/relu && nix build .\#redistributable.torch29-metal-aarch64-darwin -L ) diff --git a/.github/workflows/build_kernel_rocm.yaml b/.github/workflows/build_kernel_rocm.yaml index e1733706..9e46b7ec 100644 --- a/.github/workflows/build_kernel_rocm.yaml +++ b/.github/workflows/build_kernel_rocm.yaml @@ -26,4 +26,4 @@ jobs: # For now we only test that there are no regressions in building ROCm # kernels. Also run tests once we have a ROCm runner. - name: Build relu kernel - run: ( cd examples/relu && nix build .\#redistributable.torch27-cxx11-rocm63-x86_64-linux -L ) + run: ( cd examples/relu && nix build .\#redistributable.torch29-cxx11-rocm63-x86_64-linux -L ) diff --git a/.github/workflows/build_kernel_xpu.yaml b/.github/workflows/build_kernel_xpu.yaml index e1ebc40e..69bc1439 100644 --- a/.github/workflows/build_kernel_xpu.yaml +++ b/.github/workflows/build_kernel_xpu.yaml @@ -26,4 +26,4 @@ jobs: # For now we only test that there are no regressions in building XPU # kernels. Also run tests once we have a XPU runner. - name: Build relu kernel - run: ( cd examples/relu && nix build .\#redistributable.torch28-cxx11-xpu20251-x86_64-linux -L ) + run: ( cd examples/relu && nix build .\#redistributable.torch29-cxx11-xpu20251-x86_64-linux -L ) diff --git a/docs/docker.md b/docs/docker.md index 74bfe568..20148954 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -186,7 +186,7 @@ To load a kernel locally, you should add the kernel build that is compatible wit ```bash # PyTorch 2.6 and CUDA 12.6 -export PYTHONPATH="result/torch26-cxx11-cu126-x86_64-linux" +export PYTHONPATH="result/torch29-cxx11-cu126-x86_64-linux" ``` The kernel can then be imported as a Python module: diff --git a/docs/nix.md b/docs/nix.md index 2e2bc47b..b5fa1c93 100644 --- a/docs/nix.md +++ b/docs/nix.md @@ -84,7 +84,7 @@ using: ```bash $ rm -rf .venv # Remove existing venv if any. -$ nix develop .#devShells.torch27-cxx11-rocm63-x86_64-linux +$ nix develop .#devShells.torch29-cxx11-rocm64-x86_64-linux ``` ## Shell for testing a kernel diff --git a/tests/Dockerfile.test-kernel b/tests/Dockerfile.test-kernel index c6270c5f..fc089524 100644 --- a/tests/Dockerfile.test-kernel +++ b/tests/Dockerfile.test-kernel @@ -1,9 +1,9 @@ # syntax=docker/dockerfile:1.4 ARG PYTHON_VERSION=3.10 # Ideally we'd test with 11.8, but the GELU kernel is subtly off. -ARG CUDA_VERSION=12.1.0 +ARG CUDA_VERSION=12.6.0 ARG UBUNTU_VERSION=18.04 -ARG TORCH_VERSION=2.5.0 +ARG TORCH_VERSION=2.8.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as base From 6c9d3eb38aeff51e7467dd08cc72b00ba9b14511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 13:45:05 +0000 Subject: [PATCH 08/12] CI fixes --- .github/workflows/build_kernel_xpu.yaml | 2 +- tests/Dockerfile.test-kernel | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_kernel_xpu.yaml b/.github/workflows/build_kernel_xpu.yaml index 69bc1439..49f7d773 100644 --- a/.github/workflows/build_kernel_xpu.yaml +++ b/.github/workflows/build_kernel_xpu.yaml @@ -26,4 +26,4 @@ jobs: # For now we only test that there are no regressions in building XPU # kernels. Also run tests once we have a XPU runner. - name: Build relu kernel - run: ( cd examples/relu && nix build .\#redistributable.torch29-cxx11-xpu20251-x86_64-linux -L ) + run: ( cd examples/relu && nix build .\#redistributable.torch29-cxx11-xpu20252-x86_64-linux -L ) diff --git a/tests/Dockerfile.test-kernel b/tests/Dockerfile.test-kernel index fc089524..60df917e 100644 --- a/tests/Dockerfile.test-kernel +++ b/tests/Dockerfile.test-kernel @@ -3,7 +3,7 @@ ARG PYTHON_VERSION=3.10 # Ideally we'd test with 11.8, but the GELU kernel is subtly off. ARG CUDA_VERSION=12.6.0 ARG UBUNTU_VERSION=18.04 -ARG TORCH_VERSION=2.8.0 +ARG TORCH_VERSION=2.9.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as base @@ -47,10 +47,10 @@ WORKDIR /app/kernel-test RUN CUDA_MAJOR_MINOR=$(echo ${CUDA_VERSION} | cut -d'.' -f1,2) && \ case ${CUDA_MAJOR_MINOR} in \ - "11.8") CUDA_TAG="cu118" ;; \ - "12.1") CUDA_TAG="cu121" ;; \ - "12.2") CUDA_TAG="cu122" ;; \ "12.4") CUDA_TAG="cu124" ;; \ + "12.6") CUDA_TAG="cu126" ;; \ + "12.8") CUDA_TAG="cu128" ;; \ + "13.0") CUDA_TAG="cu130" ;; \ *) CUDA_TAG="" ;; \ esac && \ if [ -n "${CUDA_TAG}" ]; then \ From b265b0d596c4fb2df668b13c9a275a8481d6a118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 14:12:32 +0000 Subject: [PATCH 09/12] Fix another test that used Torch 2.7 --- examples/relu-specific-torch/flake.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/relu-specific-torch/flake.nix b/examples/relu-specific-torch/flake.nix index 0add5d7c..5b01e602 100644 --- a/examples/relu-specific-torch/flake.nix +++ b/examples/relu-specific-torch/flake.nix @@ -15,7 +15,7 @@ path = ./.; torchVersions = defaultVersions: [ { - torchVersion = "2.7"; + torchVersion = "2.9"; cudaVersion = "12.8"; cxx11Abi = true; systems = [ From 6f9e39c132e1440b57a438a1c23c4fb3ca0b9451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 14:37:14 +0000 Subject: [PATCH 10/12] Try to use ubi8 image for tests --- tests/Dockerfile.test-kernel | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/Dockerfile.test-kernel b/tests/Dockerfile.test-kernel index 60df917e..3ec8c7ad 100644 --- a/tests/Dockerfile.test-kernel +++ b/tests/Dockerfile.test-kernel @@ -2,10 +2,10 @@ ARG PYTHON_VERSION=3.10 # Ideally we'd test with 11.8, but the GELU kernel is subtly off. ARG CUDA_VERSION=12.6.0 -ARG UBUNTU_VERSION=18.04 +ARG UBI_VERSION=8 ARG TORCH_VERSION=2.9.0 -FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} as base +FROM nvidia/cuda:${CUDA_VERSION}-devel-ubi${UBI_VERSION} as base # Set environment variables ENV DEBIAN_FRONTEND=noninteractive \ @@ -15,11 +15,8 @@ ENV DEBIAN_FRONTEND=noninteractive \ NVIDIA_DRIVER_CAPABILITIES=compute,utility # Install system dependencies -RUN apt-get update && apt-get install -y --no-install-recommends \ - curl \ - python3 \ - python3-pip \ - && rm -rf /var/lib/apt/lists/* +RUN dnf install -y \ + curl # Install uv package manager RUN curl -LsSf https://astral.sh/uv/install.sh | sh From 90ff9a13410d0d317a7e3c3a5538057ed0e516a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 15:20:35 +0000 Subject: [PATCH 11/12] Dependencies should be filtered by backend We were pulling in cutlass-sycl for CUDA builds because the dependendies were not filtered by backend/framework. --- lib/build.nix | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/lib/build.nix b/lib/build.nix index a8c24ae1..3f50f4ca 100644 --- a/lib/build.nix +++ b/lib/build.nix @@ -22,6 +22,7 @@ let isRocm isXpu ; + inherit (import ./build-variants.nix { inherit lib; }) computeFramework; in rec { resolveDeps = import ./deps.nix { inherit lib; }; @@ -29,22 +30,22 @@ rec { readToml = path: builtins.fromTOML (builtins.readFile path); validateBuildConfig = - buildConfig: + buildToml: let - kernels = lib.attrValues (buildConfig.kernel or { }); - hasOldUniversal = builtins.hasAttr "universal" (buildConfig.torch or { }); + kernels = lib.attrValues (buildToml.kernel or { }); + hasOldUniversal = builtins.hasAttr "universal" (buildToml.torch or { }); hasLanguage = lib.any (kernel: kernel ? language) kernels; in assert lib.assertMsg (!hasOldUniversal && !hasLanguage) '' build.toml seems to be of an older version, update it with: build2cmake update-build build.toml''; - buildConfig; + buildToml; backends = - buildConfig: + buildToml: let - kernels = lib.attrValues (buildConfig.kernel or { }); + kernels = lib.attrValues (buildToml.kernel or { }); kernelBackend = kernel: kernel.backend; init = { cuda = false; @@ -66,11 +67,11 @@ rec { # Filter buildsets that are applicable to a given kernel build config. filterApplicableBuildSets = - buildConfig: buildSets: + buildToml: buildSets: let - backends' = backends buildConfig; - minCuda = buildConfig.general.cuda-minver or "11.8"; - maxCuda = buildConfig.general.cuda-maxver or "99.9"; + backends' = backends buildToml; + minCuda = buildToml.general.cuda-minver or "11.8"; + maxCuda = buildToml.general.cuda-maxver or "99.9"; versionBetween = minver: maxver: ver: builtins.compareVersions ver minver >= 0 && builtins.compareVersions ver maxver <= 0; @@ -82,7 +83,7 @@ rec { || (isRocm buildSet.buildConfig && backends'.rocm) || (isMetal buildSet.buildConfig && backends'.metal) || (isXpu buildSet.buildConfig && backends'.xpu) - || (buildConfig.general.universal or false); + || (buildToml.general.universal or false); cudaVersionSupported = !(isCuda buildSet.buildConfig) || versionBetween minCuda maxCuda buildSet.pkgs.cudaPackages.cudaMajorMinorVersion; @@ -111,11 +112,13 @@ rec { }: let inherit (lib) fileset; - buildConfig = readBuildConfig path; - kernels = buildConfig.kernel or { }; + buildToml = readBuildConfig path; + kernels = lib.filterAttrs (_: kernel: computeFramework buildConfig == kernel.backend) ( + buildToml.kernel or { } + ); extraDeps = resolveDeps { inherit pkgs torch; - deps = lib.unique (lib.flatten (lib.mapAttrsToList (_: buildConfig: buildConfig.depends) kernels)); + deps = lib.unique (lib.flatten (lib.mapAttrsToList (_: kernel: kernel.depends) kernels)); }; # Use the mkSourceSet function to get the source @@ -125,11 +128,11 @@ rec { listMax = lib.foldl' lib.max 1; nvccThreads = listMax ( lib.mapAttrsToList ( - _: buildConfig: builtins.length (buildConfig.cuda-capabilities or supportedCudaCapabilities) - ) buildConfig.kernel + _: kernel: builtins.length (kernel.cuda-capabilities or supportedCudaCapabilities) + ) buildToml.kernel ); in - if buildConfig.general.universal then + if buildToml.general.universal then # No torch extension sources? Treat it as a noarch package. extension.mkNoArchExtension { @@ -138,7 +141,7 @@ rec { rev doGetKernelCheck ; - extensionName = buildConfig.general.name; + extensionName = buildToml.general.name; } else extension.mkExtension { @@ -151,7 +154,7 @@ rec { rev ; - extensionName = buildConfig.general.name; + extensionName = buildToml.general.name; doAbiCheck = true; }; @@ -198,9 +201,9 @@ rec { ; bundleOnly = true; }; - buildConfig = readBuildConfig path; + buildToml = readBuildConfig path; namePaths = - if buildConfig.general.universal then + if buildToml.general.universal then # Noarch, just get the first extension. { "torch-universal" = builtins.head (builtins.attrValues extensions); } else From a02026453e1541b81d243ba16558392af9b93c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 16 Oct 2025 15:21:17 +0000 Subject: [PATCH 12/12] ROCm: use torch-bin --- versions.nix | 2 -- 1 file changed, 2 deletions(-) diff --git a/versions.nix b/versions.nix index aac8aa8b..f6ee6fb5 100644 --- a/versions.nix +++ b/versions.nix @@ -98,7 +98,6 @@ cxx11Abi = true; systems = [ "x86_64-linux" ]; bundleBuild = true; - sourceBuild = true; } { torchVersion = "2.9"; @@ -106,7 +105,6 @@ cxx11Abi = true; systems = [ "x86_64-linux" ]; bundleBuild = true; - sourceBuild = true; } { torchVersion = "2.9";