diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 478826c525..9022be4def 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -263,6 +263,7 @@ jobs: } - name: Install cuda.pathfinder extra wheels for testing + if: ${{ !endsWith(matrix.PY_VER, 't') }} # see issue #1820 shell: bash --noprofile --norc -xeuo pipefail {0} run: | pushd cuda_pathfinder @@ -271,6 +272,7 @@ jobs: popd - name: Run cuda.pathfinder tests with all_must_work + if: ${{ !endsWith(matrix.PY_VER, 't') }} # see issue #1820 env: CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS: all_must_work CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS: all_must_work diff --git a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py index 69bb223a3d..e514b2e088 100644 --- a/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py +++ b/cuda_pathfinder/cuda/pathfinder/_dynamic_libs/descriptor_catalog.py @@ -314,7 +314,7 @@ class DescriptorSpec: linux_sonames=("libmathdx.so.0",), windows_dlls=("mathdx64_0.dll",), site_packages_linux=("nvidia/cu13/lib", "nvidia/cu12/lib"), - site_packages_windows=("nvidia/cu13/bin/x86_64", "nvidia/cu12/bin"), + site_packages_windows=("nvidia/cu13/bin", "nvidia/cu12/bin"), dependencies=("nvrtc",), ), DescriptorSpec( diff --git a/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py b/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py index a46830e4ed..df1e52eb0f 100644 --- a/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py +++ b/cuda_pathfinder/cuda/pathfinder/_headers/header_descriptor_catalog.py @@ -145,6 +145,14 @@ class HeaderDescriptorSpec: conda_targets_layout=False, use_ctk_root_canary=False, ), + HeaderDescriptorSpec( + name="cute", + packaged_with="other", + header_basename="cute/tensor.hpp", + site_packages_dirs=("cutlass_library/source/include",), + conda_targets_layout=False, + use_ctk_root_canary=False, + ), HeaderDescriptorSpec( name="cutensor", packaged_with="other", @@ -153,6 +161,22 @@ class HeaderDescriptorSpec: conda_targets_layout=False, use_ctk_root_canary=False, ), + HeaderDescriptorSpec( + name="cutlass", + packaged_with="other", + header_basename="cutlass/cutlass.h", + site_packages_dirs=("cutlass_library/source/include",), + conda_targets_layout=False, + use_ctk_root_canary=False, + ), + HeaderDescriptorSpec( + name="mathdx", + packaged_with="other", + header_basename="libmathdx.h", + site_packages_dirs=("nvidia/cu13/include", "nvidia/cu12/include"), + conda_targets_layout=False, + use_ctk_root_canary=False, + ), HeaderDescriptorSpec( name="nvshmem", packaged_with="other", diff --git a/cuda_pathfinder/pyproject.toml b/cuda_pathfinder/pyproject.toml index 872d30eb4d..2ccc253ac8 100644 --- a/cuda_pathfinder/pyproject.toml +++ b/cuda_pathfinder/pyproject.toml @@ -38,10 +38,12 @@ cu13 = [ "nvidia-cudss-cu13", "nvidia-cufftmp-cu13; sys_platform != 'win32'", "nvidia-cusparselt-cu13", + "nvidia-libmathdx-cu13", "nvidia-nccl-cu13; sys_platform != 'win32'", "nvidia-nvshmem-cu13; sys_platform != 'win32'", ] host = [ + "nvidia-cutlass", "nvpl-fft; platform_system == 'Linux' and platform_machine == 'aarch64'", ] diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py index a47a235b2d..a7b95e167b 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_headers.py +++ b/cuda_pathfinder/tests/test_find_nvidia_headers.py @@ -41,7 +41,10 @@ NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES = { "cusparseLt": r"^nvidia-cusparselt-.*$", + "cute": r"^nvidia-cutlass$", "cutensor": r"^cutensor-.*$", + "cutlass": r"^nvidia-cutlass$", + "mathdx": r"^nvidia-libmathdx-.*$", "nvshmem": r"^nvidia-nvshmem-.*$", } diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 016acfd25d..36487bd58e 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -90,7 +90,6 @@ def test_known_but_platform_unavailable_libname_raises_dynamic_lib_not_available IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES = { - "cufftMp": r"^nvidia-cufftmp-.*$", "mathdx": r"^nvidia-libmathdx-.*$", }