From 0b7e009a0be3cbfff121449c8171f4ea5be6bfa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Mon, 13 Oct 2025 10:16:17 +0000 Subject: [PATCH 1/2] Restructure extension handling Before this change the `torch-extension` directory was just a derivation that built an extension for a given configuration + build set. The downside of this approach was that we could not easily get things like - The standard environment to be used by dev shells. - Overrides for caching. For instance, we override ROCm's `clr` and XPU's `oneapi-torch-dev` and `onednn-xpu` to use stdenv with an old glibc. This change modifies `torch-extension` so that we can instantiate it given a build set. A set is returned that can give access to the things mentioned above, as well as mkExtension and mkNoArchExtension functions. We assign this set to the corresponding build set, so that it is directly accessible from the build set. --- lib/build-sets.nix | 2 + lib/build-version.nix | 1 + lib/build.nix | 40 +--- lib/torch-extension/arch.nix | 219 +++++++++++++++++ lib/torch-extension/default.nix | 226 ++---------------- .../no-arch.nix} | 17 +- 6 files changed, 268 insertions(+), 237 deletions(-) create mode 100644 lib/torch-extension/arch.nix rename lib/{torch-extension-noarch/default.nix => torch-extension/no-arch.nix} (95%) diff --git a/lib/build-sets.nix b/lib/build-sets.nix index 602bebe0..88c46041 100644 --- a/lib/build-sets.nix +++ b/lib/build-sets.nix @@ -87,10 +87,12 @@ let torch = pkgs.python3.pkgs."torch_${flattenVersion torchVersion}".override { inherit cxx11Abi; }; + extension = pkgs.callPackage ./torch-extension { inherit torch; }; in { inherit buildConfig + extension pkgs torch bundleBuild diff --git a/lib/build-version.nix b/lib/build-version.nix index 62fe4684..527649c7 100644 --- a/lib/build-version.nix +++ b/lib/build-version.nix @@ -1,5 +1,6 @@ { buildConfig, + extension, pkgs, torch, bundleBuild, diff --git a/lib/build.nix b/lib/build.nix index 121f4795..a8c24ae1 100644 --- a/lib/build.nix +++ b/lib/build.nix @@ -22,21 +22,6 @@ let isRocm isXpu ; - mkStdenv = - buildSet: oldLinuxCompat: - let - inherit (buildSet) pkgs torch; - in - if pkgs.stdenv.hostPlatform.isDarwin then - pkgs.stdenv - else if oldLinuxCompat then - # Uses CUDA stdenv when we are building for CUDA. - pkgs.stdenvGlibc_2_27 - else if torch.cudaSupport then - torch.cudaPackages.backendStdenv - else - pkgs.stdenv; - in rec { resolveDeps = import ./deps.nix { inherit lib; }; @@ -113,6 +98,7 @@ rec { mkTorchExtension = { buildConfig, + extension, pkgs, torch, bundleBuild, @@ -122,7 +108,6 @@ rec { rev, doGetKernelCheck, stripRPath ? false, - oldLinuxCompat ? false, }: let inherit (lib) fileset; @@ -143,34 +128,32 @@ rec { _: buildConfig: builtins.length (buildConfig.cuda-capabilities or supportedCudaCapabilities) ) buildConfig.kernel ); - stdenv = mkStdenv { inherit pkgs torch; } oldLinuxCompat; in if buildConfig.general.universal then # No torch extension sources? Treat it as a noarch package. - pkgs.callPackage ./torch-extension-noarch ({ + + extension.mkNoArchExtension { inherit src rev - torch doGetKernelCheck ; extensionName = buildConfig.general.name; - }) + } else - pkgs.callPackage ./torch-extension ({ + extension.mkExtension { inherit doGetKernelCheck extraDeps nvccThreads src - stdenv stripRPath - torch rev ; + extensionName = buildConfig.general.name; - doAbiCheck = oldLinuxCompat; - }); + doAbiCheck = true; + }; # Build multiple Torch extensions. mkDistTorchExtensions = @@ -189,7 +172,6 @@ rec { value = mkTorchExtension buildSet { inherit path rev doGetKernelCheck; stripRPath = true; - oldLinuxCompat = true; }; }; applicableBuildSets' = @@ -247,8 +229,7 @@ rec { let pkgs = buildSet.pkgs; rocmSupport = pkgs.config.rocmSupport or false; - stdenv = mkStdenv buildSet false; - mkShell = pkgs.mkShell.override { inherit stdenv; }; + mkShell = pkgs.mkShell.override { inherit (buildSet.extension) stdenv; }; in { name = torchBuildVersion buildSet; @@ -288,8 +269,7 @@ rec { pkgs = buildSet.pkgs; rocmSupport = pkgs.config.rocmSupport or false; xpuSupport = pkgs.config.xpuSupport or false; - stdenv = mkStdenv buildSet false; - mkShell = pkgs.mkShell.override { inherit stdenv; }; + mkShell = pkgs.mkShell.override { inherit (buildSet.extension) stdenv; }; in { name = torchBuildVersion buildSet; diff --git a/lib/torch-extension/arch.nix b/lib/torch-extension/arch.nix new file mode 100644 index 00000000..0bee5663 --- /dev/null +++ b/lib/torch-extension/arch.nix @@ -0,0 +1,219 @@ +{ + cudaSupport ? torch.cudaSupport, + rocmSupport ? torch.rocmSupport, + xpuSupport ? torch.xpuSupport, + + lib, + stdenv, + cudaPackages, + cmake, + cmakeNvccThreadsHook, + ninja, + build2cmake, + get-kernel-check, + kernel-abi-check, + python3, + rewrite-nix-paths-macho, + rocmPackages, + writeScriptBin, + xpuPackages, + + apple-sdk_15, + clr, + oneapi-torch-dev, + onednn-xpu, + torch, +}: + +{ + # Whether to do ABI checks. + doAbiCheck ? true, + + # Whether to run get-kernel-check. + doGetKernelCheck ? true, + + extensionName, + + # Extra dependencies (such as CUTLASS). + extraDeps ? [ ], + + nvccThreads, + + # Wheter to strip rpath for non-nix use. + stripRPath ? false, + + # Revision to bake into the ops name. + rev, + + src, +}: + +let + # On Darwin, we need the host's xcrun for `xcrun metal` to compile Metal shaders. + # It's not supported by the nixpkgs shim. + xcrunHost = writeScriptBin "xcrunHost" '' + # Use system SDK for Metal files. + unset DEVELOPER_DIR + /usr/bin/xcrun $@ + ''; + +in + +stdenv.mkDerivation (prevAttrs: { + name = "${extensionName}-torch-ext"; + + inherit doAbiCheck nvccThreads src; + + # Generate build files. + postPatch = '' + build2cmake generate-torch --backend ${ + if cudaSupport then + "cuda" + else if rocmSupport then + "rocm" + else if xpuSupport then + "xpu" + else + "metal" + } --ops-id ${rev} build.toml + ''; + + # hipify copies files, but its target is run in the CMake build and install + # phases. Since some of the files come from the Nix store, this fails the + # second time around. + preInstall = '' + chmod -R u+w . + ''; + + nativeBuildInputs = [ + kernel-abi-check + cmake + ninja + build2cmake + ] + ++ lib.optionals doGetKernelCheck [ + get-kernel-check + ] + ++ lib.optionals cudaSupport [ + cmakeNvccThreadsHook + cudaPackages.cuda_nvcc + ] + ++ lib.optionals rocmSupport [ + clr + ] + ++ lib.optionals xpuSupport ([ + xpuPackages.ocloc + oneapi-torch-dev + ]) + ++ lib.optionals stdenv.hostPlatform.isDarwin [ + rewrite-nix-paths-macho + ]; + + buildInputs = [ + torch + torch.cxxdev + ] + ++ lib.optionals cudaSupport ( + with cudaPackages; + [ + cuda_cudart + + # Make dependent on build configuration dependencies once + # the Torch dependency is gone. + cuda_cccl + libcublas + libcusolver + libcusparse + ] + ) + ++ lib.optionals rocmSupport ( + with rocmPackages; + [ + hipsparselt + rocwmma-devel + ] + ) + ++ lib.optionals xpuSupport ([ + oneapi-torch-dev + onednn-xpu + ]) + ++ lib.optionals stdenv.hostPlatform.isDarwin [ + apple-sdk_15 + ] + ++ extraDeps; + + env = + lib.optionalAttrs cudaSupport { + CUDAToolkit_ROOT = "${lib.getDev cudaPackages.cuda_nvcc}"; + TORCH_CUDA_ARCH_LIST = + if cudaPackages.cudaOlder "12.8" then + "7.0;7.5;8.0;8.6;8.9;9.0" + else if cudaPackages.cudaOlder "13.0" then + "7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0" + else + # sm_101 has been renamed to sm_110 in CUDA 13. + "7.5;8.0;8.6;8.9;9.0;10.0;11.0;12.0"; + } + // lib.optionalAttrs rocmSupport { + PYTORCH_ROCM_ARCH = lib.concatStringsSep ";" torch.rocmArchs; + } + // lib.optionalAttrs xpuSupport { + MKLROOT = oneapi-torch-dev; + SYCL_ROOT = oneapi-torch-dev; + }; + + # If we use the default setup, CMAKE_CUDA_HOST_COMPILER gets set to nixpkgs g++. + dontSetupCUDAToolkitCompilers = true; + + cmakeFlags = [ + (lib.cmakeFeature "Python_EXECUTABLE" "${python3.withPackages (ps: [ torch ])}/bin/python") + ] + ++ lib.optionals cudaSupport [ + (lib.cmakeFeature "CMAKE_CUDA_HOST_COMPILER" "${stdenv.cc}/bin/g++") + ] + ++ lib.optionals rocmSupport [ + # Ensure sure that we use HIP from our CLR override and not HIP from + # the symlink-joined ROCm toolkit. + (lib.cmakeFeature "CMAKE_HIP_COMPILER_ROCM_ROOT" "${clr}") + (lib.cmakeFeature "HIP_ROOT_DIR" "${clr}") + ] + ++ lib.optionals xpuSupport [ + (lib.cmakeFeature "ONEDNN_XPU_INCLUDE_DIR" "${onednn-xpu}/include") + ] + ++ lib.optionals stdenv.hostPlatform.isDarwin [ + # Use host compiler for Metal. Not included in the redistributable SDK. + (lib.cmakeFeature "METAL_COMPILER" "${xcrunHost}/bin/xcrunHost") + ]; + + postInstall = '' + ( + cd .. + cp -r torch-ext/${extensionName} $out/ + ) + cp $out/_${extensionName}_*/* $out/${extensionName} + rm -rf $out/_${extensionName}_* + '' + + (lib.optionalString (stripRPath && stdenv.hostPlatform.isLinux)) '' + find $out/${extensionName} -name '*.so' \ + -exec patchelf --set-rpath "" {} \; + '' + + (lib.optionalString (stripRPath && stdenv.hostPlatform.isDarwin)) '' + find $out/${extensionName} -name '*.so' \ + -exec rewrite-nix-paths-macho {} \; + + # Stub some rpath. + find $out/${extensionName} -name '*.so' \ + -exec install_name_tool -add_rpath "@loader_path/lib" {} \; + ''; + + doInstallCheck = true; + + getKernelCheck = extensionName; + + # We need access to the host system on Darwin for the Metal compiler. + __noChroot = stdenv.hostPlatform.isDarwin; + + passthru = { + inherit torch; + }; +}) diff --git a/lib/torch-extension/default.nix b/lib/torch-extension/default.nix index 507c9188..850131a2 100644 --- a/lib/torch-extension/default.nix +++ b/lib/torch-extension/default.nix @@ -1,223 +1,47 @@ { - extensionName, - nvccThreads, - rev, - - # Whether to run get-kernel-check. - doGetKernelCheck ? true, - - # Wheter to strip rpath for non-nix use. - stripRPath ? false, - - src, - - config, - cudaSupport ? torch.cudaSupport, - rocmSupport ? torch.rocmSupport, - xpuSupport ? torch.xpuSupport, - - lib, + callPackage, stdenv, - cudaPackages, - cmake, - cmakeNvccThreadsHook, - ninja, - build2cmake, - get-kernel-check, - kernel-abi-check, - python3, - rewrite-nix-paths-macho, + stdenvGlibc_2_27, rocmPackages, writeScriptBin, xpuPackages, - apple-sdk_15, - extraDeps ? [ ], torch, - - doAbiCheck, }: let + effectiveStdenv = if stdenv.hostPlatform.isLinux then stdenvGlibc_2_27 else stdenv; + # CLR that uses the provided stdenv, which can be different from the default # to support old glibc/libstdc++ versions. clr = ( rocmPackages.clr.override { clang = rocmPackages.llvm.clang.override { - inherit stdenv; - bintools = rocmPackages.llvm.bintools.override { libc = stdenv.cc.libc; }; - glibc = stdenv.cc.libc; + stdenv = effectiveStdenv; + bintools = rocmPackages.llvm.bintools.override { libc = effectiveStdenv.cc.libc; }; + glibc = effectiveStdenv.cc.libc; }; } ); - oneapi-torch-dev = xpuPackages.oneapi-torch-dev.override { inherit stdenv; }; - onednn-xpu = xpuPackages.onednn-xpu.override { inherit stdenv oneapi-torch-dev; }; - - # On Darwin, we need the host's xcrun for `xcrun metal` to compile Metal shaders. - # It's not supported by the nixpkgs shim. - xcrunHost = writeScriptBin "xcrunHost" '' - # Use system SDK for Metal files. - unset DEVELOPER_DIR - /usr/bin/xcrun $@ - ''; - + oneapi-torch-dev = xpuPackages.oneapi-torch-dev.override { stdenv = effectiveStdenv; }; + onednn-xpu = xpuPackages.onednn-xpu.override { + inherit oneapi-torch-dev; + stdenv = effectiveStdenv; + }; in -stdenv.mkDerivation (prevAttrs: { - name = "${extensionName}-torch-ext"; - - inherit doAbiCheck nvccThreads src; - - # Generate build files. - postPatch = '' - build2cmake generate-torch --backend ${ - if cudaSupport then - "cuda" - else if rocmSupport then - "rocm" - else if xpuSupport then - "xpu" - else - "metal" - } --ops-id ${rev} build.toml - ''; - - # hipify copies files, but its target is run in the CMake build and install - # phases. Since some of the files come from the Nix store, this fails the - # second time around. - preInstall = '' - chmod -R u+w . - ''; - - nativeBuildInputs = [ - kernel-abi-check - cmake - ninja - build2cmake - ] - ++ lib.optionals doGetKernelCheck [ - get-kernel-check - ] - ++ lib.optionals cudaSupport [ - cmakeNvccThreadsHook - cudaPackages.cuda_nvcc - ] - ++ lib.optionals rocmSupport [ - clr - ] - ++ lib.optionals xpuSupport ([ - xpuPackages.ocloc - oneapi-torch-dev - ]) - ++ lib.optionals stdenv.hostPlatform.isDarwin [ - rewrite-nix-paths-macho - ]; - - buildInputs = [ - torch - torch.cxxdev - ] - ++ lib.optionals cudaSupport ( - with cudaPackages; - [ - cuda_cudart - - # Make dependent on build configuration dependencies once - # the Torch dependency is gone. - cuda_cccl - libcublas - libcusolver - libcusparse - ] - ) - ++ lib.optionals rocmSupport ( - with rocmPackages; - [ - hipsparselt - rocwmma-devel - ] - ) - ++ lib.optionals xpuSupport ([ - oneapi-torch-dev - onednn-xpu - ]) - ++ lib.optionals stdenv.hostPlatform.isDarwin [ - apple-sdk_15 - ] - ++ extraDeps; - - env = - lib.optionalAttrs cudaSupport { - CUDAToolkit_ROOT = "${lib.getDev cudaPackages.cuda_nvcc}"; - TORCH_CUDA_ARCH_LIST = - if cudaPackages.cudaOlder "12.8" then - "7.0;7.5;8.0;8.6;8.9;9.0" - else if cudaPackages.cudaOlder "13.0" then - "7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0" - else - # sm_101 has been renamed to sm_110 in CUDA 13. - "7.5;8.0;8.6;8.9;9.0;10.0;11.0;12.0"; - } - // lib.optionalAttrs rocmSupport { - PYTORCH_ROCM_ARCH = lib.concatStringsSep ";" torch.rocmArchs; - } - // lib.optionalAttrs xpuSupport { - MKLROOT = oneapi-torch-dev; - SYCL_ROOT = oneapi-torch-dev; - }; - - # If we use the default setup, CMAKE_CUDA_HOST_COMPILER gets set to nixpkgs g++. - dontSetupCUDAToolkitCompilers = true; - - cmakeFlags = [ - (lib.cmakeFeature "Python_EXECUTABLE" "${python3.withPackages (ps: [ torch ])}/bin/python") - ] - ++ lib.optionals cudaSupport [ - (lib.cmakeFeature "CMAKE_CUDA_HOST_COMPILER" "${stdenv.cc}/bin/g++") - ] - ++ lib.optionals rocmSupport [ - # Ensure sure that we use HIP from our CLR override and not HIP from - # the symlink-joined ROCm toolkit. - (lib.cmakeFeature "CMAKE_HIP_COMPILER_ROCM_ROOT" "${clr}") - (lib.cmakeFeature "HIP_ROOT_DIR" "${clr}") - ] - ++ lib.optionals xpuSupport [ - (lib.cmakeFeature "ONEDNN_XPU_INCLUDE_DIR" "${onednn-xpu}/include") - ] - ++ lib.optionals stdenv.hostPlatform.isDarwin [ - # Use host compiler for Metal. Not included in the redistributable SDK. - (lib.cmakeFeature "METAL_COMPILER" "${xcrunHost}/bin/xcrunHost") - ]; - - postInstall = '' - ( - cd .. - cp -r torch-ext/${extensionName} $out/ - ) - cp $out/_${extensionName}_*/* $out/${extensionName} - rm -rf $out/_${extensionName}_* - '' - + (lib.optionalString (stripRPath && stdenv.hostPlatform.isLinux)) '' - find $out/${extensionName} -name '*.so' \ - -exec patchelf --set-rpath "" {} \; - '' - + (lib.optionalString (stripRPath && stdenv.hostPlatform.isDarwin)) '' - find $out/${extensionName} -name '*.so' \ - -exec rewrite-nix-paths-macho {} \; - - # Stub some rpath. - find $out/${extensionName} -name '*.so' \ - -exec install_name_tool -add_rpath "@loader_path/lib" {} \; - ''; - - doInstallCheck = true; - - getKernelCheck = extensionName; +{ + mkExtension = callPackage ./arch.nix { + inherit + clr + oneapi-torch-dev + onednn-xpu + torch + ; + stdenv = effectiveStdenv; + }; - # We need access to the host system on Darwin for the Metal compiler. - __noChroot = stdenv.hostPlatform.isDarwin; + mkNoArchExtension = callPackage ./no-arch.nix { inherit torch; }; - passthru = { - inherit torch; - }; -}) + stdenv = effectiveStdenv; +} diff --git a/lib/torch-extension-noarch/default.nix b/lib/torch-extension/no-arch.nix similarity index 95% rename from lib/torch-extension-noarch/default.nix rename to lib/torch-extension/no-arch.nix index f234b95f..7aef4787 100644 --- a/lib/torch-extension-noarch/default.nix +++ b/lib/torch-extension/no-arch.nix @@ -1,15 +1,20 @@ { + lib, stdenv, - extensionName, - rev, - - # Whether to run get-kernel-check. - doGetKernelCheck ? true, - lib, build2cmake, get-kernel-check, torch, +}: + +{ + # Whether to run get-kernel-check. + doGetKernelCheck ? true, + + extensionName, + + # Revision to bake into the ops name. + rev, src, }: From b590c3f38d806b5c04cb5a81395f10514ac87d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Mon, 13 Oct 2025 10:25:57 +0000 Subject: [PATCH 2/2] Cache extra ROCm/XPU build dependencies --- flake.nix | 1 + lib/torch-extension/default.nix | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/flake.nix b/flake.nix index 716dc0a4..1d1f4e66 100644 --- a/flake.nix +++ b/flake.nix @@ -146,6 +146,7 @@ with buildSet.pkgs; ( allOutputs buildSet.torch + ++ lib.concatMap allOutputs buildSet.extension.extraBuildDeps ++ allOutputs build2cmake ++ allOutputs kernel-abi-check ++ allOutputs python3Packages.kernels diff --git a/lib/torch-extension/default.nix b/lib/torch-extension/default.nix index 850131a2..2905e95b 100644 --- a/lib/torch-extension/default.nix +++ b/lib/torch-extension/default.nix @@ -1,4 +1,8 @@ { + rocmSupport ? torch.rocmSupport, + xpuSupport ? torch.xpuSupport, + + lib, callPackage, stdenv, stdenvGlibc_2_27, @@ -31,6 +35,13 @@ let }; in { + extraBuildDeps = + lib.optionals xpuSupport [ + oneapi-torch-dev + onednn-xpu + ] + ++ lib.optionals rocmSupport [ clr ]; + mkExtension = callPackage ./arch.nix { inherit clr