From 6df7ef4ae63c258f217fa4a64f1b1411d5cc8762 Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Wed, 11 Mar 2026 11:46:04 -0700 Subject: [PATCH 1/8] added cuda toolkit check Signed-off-by: Jaya Venkatesh --- pyproject.toml | 1 + rapids_cli/doctor/checks/cuda_toolkit.py | 145 +++++++++++++++++++++++ rapids_cli/tests/test_cuda_toolkit.py | 131 ++++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 rapids_cli/doctor/checks/cuda_toolkit.py create mode 100644 rapids_cli/tests/test_cuda_toolkit.py diff --git a/pyproject.toml b/pyproject.toml index 10f13aa..91a33f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ gpu_compute_capability = "rapids_cli.doctor.checks.gpu:check_gpu_compute_capabil cuda = "rapids_cli.doctor.checks.cuda_driver:cuda_check" memory_to_gpu_ratio = "rapids_cli.doctor.checks.memory:check_memory_to_gpu_ratio" nvlink_status = "rapids_cli.doctor.checks.nvlink:check_nvlink_status" +cuda_toolkit = "rapids_cli.doctor.checks.cuda_toolkit:cuda_toolkit_check" [project.urls] Homepage = "https://github.com/rapidsai/rapids-cli" diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py new file mode 100644 index 0000000..dad46bf --- /dev/null +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -0,0 +1,145 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Check CUDA toolkit library availability and version consistency.""" + +import os +import re +from pathlib import Path + +import pynvml + +# Core libraries to check for findability. +# cudart: universal — everything needs it. +# nvrtc: JIT compilation — cupy, cudf UDFs. Frequently missing with pip (pre-cupy 14). +# nvvm: numba-cuda JIT — cudf string UDFs. Was moved/renamed in CUDA 13.1. +_CUDA_LIBS = { + "cudart": "libcudart.so", + "nvrtc": "libnvrtc.so", + "nvvm": "libnvvm.so", +} + +_INSTALL_ADVICE = { + "conda": ( + "Update your conda environment with the correct CUDA toolkit version, " + "e.g. 'conda install cuda-toolkit' in your active environment." + ), + "pip": ( + "Update the CUDA pip packages in your environment, " + "e.g. 'pip install --upgrade nvidia-cuda-toolkit'." + ), +} +_DEFAULT_ADVICE = ( + "Install the CUDA Toolkit matching your driver, " + "or use conda which manages CUDA automatically." +) + +_CUDA_SYMLINK = Path("/usr/local/cuda") + +def _get_advice(found_via: str | None) -> str: + """Return install advice based on how cuda-pathfinder found the library.""" + if found_via: + for key, advice in _INSTALL_ADVICE.items(): + if key in found_via: + return advice + return _DEFAULT_ADVICE + + +def _get_toolkit_cuda_major() -> int | None: + """Return the CUDA major version of the toolkit via cuda-pathfinder headers. + + Parses #define CUDA_VERSION from cuda_runtime_version.h. + Returns None if headers are not available. + """ + import cuda.pathfinder + + header_dir = cuda.pathfinder.find_nvidia_header_directory("cudart") + if header_dir is None: + return None + version_file = Path(header_dir) / "cuda_runtime_version.h" + if not version_file.exists(): + return None + match = re.search(r"#define\s+CUDA_VERSION\s+(\d+)", version_file.read_text()) + return int(match.group(1)) // 1000 if match else None + + +def _extract_major_from_cuda_path(path: Path) -> int | None: + """Extract CUDA major version from a path like /usr/local/cuda-12.4 or its version.txt.""" + match = re.search(r"cuda-(\d+)", str(path)) + if match: + return int(match.group(1)) + version_file = path / "version.txt" + if version_file.exists(): + match = re.search(r"(\d+)\.", version_file.read_text()) + if match: + return int(match.group(1)) + return None + + +def cuda_toolkit_check(verbose=False): + """Check CUDA toolkit library availability and version consistency.""" + import cuda.pathfinder + from cuda.pathfinder import DynamicLibNotFoundError + + # Check library findability + found_via = {} + missing = [] + for libname, soname in _CUDA_LIBS.items(): + try: + loaded = cuda.pathfinder.load_nvidia_dynamic_lib(libname) + found_via[libname] = loaded.found_via + except (DynamicLibNotFoundError, RuntimeError): + missing.append(soname) + + if missing: + advice = _get_advice(next(iter(found_via.values()), None)) + raise ValueError( + f"{', '.join(missing)} could not be found. " + f"RAPIDS will not be able to run GPU operations. {advice}" + ) + + # Get driver CUDA major version + try: + pynvml.nvmlInit() + driver_major = pynvml.nvmlSystemGetCudaDriverVersion() // 1000 + except pynvml.NVMLError as e: + raise ValueError( + "Unable to query the GPU driver's CUDA version. " + "RAPIDS requires a working NVIDIA GPU driver." + ) from e + + # Get toolkit CUDA major version and compare to driver + # Only error when toolkit > driver (drivers are backward compatible) + toolkit_major = _get_toolkit_cuda_major() + if toolkit_major is not None and toolkit_major > driver_major: + advice = _get_advice(found_via.get("cudart")) + raise ValueError( + f"CUDA toolkit is version {toolkit_major} but the GPU driver " + f"only supports up to CUDA {driver_major}. {advice}" + ) + + # Check /usr/local/cuda symlink + if _CUDA_SYMLINK.exists(): + sym_major = _extract_major_from_cuda_path(_CUDA_SYMLINK.resolve()) + if sym_major is not None and sym_major > driver_major: + raise ValueError( + f"/usr/local/cuda points to CUDA {sym_major} but the GPU driver " + f"only supports up to CUDA {driver_major}. " + f"Update the symlink to a CUDA {driver_major}.x installation." + ) + + # Check CUDA_HOME / CUDA_PATH + for env_var in ("CUDA_HOME", "CUDA_PATH"): + env_val = os.environ.get(env_var) + if env_val: + env_major = _extract_major_from_cuda_path(Path(env_val)) + if env_major is not None and env_major > driver_major: + raise ValueError( + f"{env_var}={env_val} (CUDA {env_major}) but the GPU driver " + f"only supports up to CUDA {driver_major}. " + f"Set {env_var} to a CUDA {driver_major}.x path." + ) + + if verbose: + version_str = f"CUDA {toolkit_major}" if toolkit_major else "unknown version" + return f"CUDA toolkit OK ({version_str}). Driver supports CUDA {driver_major}." + return True diff --git a/rapids_cli/tests/test_cuda_toolkit.py b/rapids_cli/tests/test_cuda_toolkit.py new file mode 100644 index 0000000..09ade4d --- /dev/null +++ b/rapids_cli/tests/test_cuda_toolkit.py @@ -0,0 +1,131 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +from dataclasses import dataclass +from pathlib import Path +from unittest.mock import patch + +import pynvml +import pytest + +from rapids_cli.doctor.checks.cuda_toolkit import cuda_toolkit_check + + +@dataclass +class FakeLoadedLib: + """Mimics the return value of cuda.pathfinder.load_nvidia_dynamic_lib().""" + + abs_path: str | None = None + found_via: str = "conda" + was_already_loaded_from_elsewhere: bool = False + + +def _fake_loader(overrides=None): + """Build a side_effect for load_nvidia_dynamic_lib. All 3 libs found by default.""" + from cuda.pathfinder import DynamicLibNotFoundError + + defaults = {"cudart": FakeLoadedLib(), "nvrtc": FakeLoadedLib(), "nvvm": FakeLoadedLib()} + if overrides: + defaults.update(overrides) + + def loader(libname): + val = defaults[libname] + if isinstance(val, Exception): + raise val + return val + + return loader + + +def test_check_success(tmp_path): + (tmp_path / "cuda_runtime_version.h").write_text("#define CUDA_VERSION 12040\n") + with ( + patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch("pynvml.nvmlInit"), + patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), + patch("cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path)), + patch.dict("os.environ", {}, clear=True), + ): + result = cuda_toolkit_check(verbose=True) + assert isinstance(result, str) + assert "CUDA 12" in result + + +def test_check_missing_libs(): + from cuda.pathfinder import DynamicLibNotFoundError + + with patch( + "cuda.pathfinder.load_nvidia_dynamic_lib", + side_effect=_fake_loader({ + "cudart": DynamicLibNotFoundError("not found"), + "nvrtc": DynamicLibNotFoundError("not found"), + "nvvm": DynamicLibNotFoundError("not found"), + }), + ): + with pytest.raises(ValueError, match="libcudart.so"): + cuda_toolkit_check() + + +def test_check_driver_query_fails(): + with ( + patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)), + ): + with pytest.raises(ValueError, match="Unable to query"): + cuda_toolkit_check() + + +def test_check_toolkit_newer_than_driver(tmp_path): + """CUDA 13 toolkit + CUDA 12 driver = error.""" + (tmp_path / "cuda_runtime_version.h").write_text("#define CUDA_VERSION 13000\n") + with ( + patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch("pynvml.nvmlInit"), + patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), + patch("cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path)), + ): + with pytest.raises(ValueError, match="only supports up to CUDA 12"): + cuda_toolkit_check() + + +def test_check_toolkit_older_than_driver_passes(tmp_path): + """CUDA 12 toolkit + CUDA 13 driver = fine (backward compatible).""" + (tmp_path / "cuda_runtime_version.h").write_text("#define CUDA_VERSION 12040\n") + with ( + patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch("pynvml.nvmlInit"), + patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=13000), + patch("cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path)), + patch.dict("os.environ", {}, clear=True), + ): + assert cuda_toolkit_check(verbose=False) is True + + +def test_check_cuda_symlink_newer_than_driver(tmp_path): + symlink_target = tmp_path / "cuda-13.0" + symlink_target.mkdir() + symlink_path = tmp_path / "cuda" + symlink_path.symlink_to(symlink_target) + + with ( + patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch("pynvml.nvmlInit"), + patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), + patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), + patch("rapids_cli.doctor.checks.cuda_toolkit._CUDA_SYMLINK", symlink_path), + patch.dict("os.environ", {}, clear=True), + ): + with pytest.raises(ValueError, match="points to CUDA 13"): + cuda_toolkit_check() + + +def test_check_cuda_home_newer_than_driver(): + with ( + patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch("pynvml.nvmlInit"), + patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), + patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), + patch("rapids_cli.doctor.checks.cuda_toolkit._CUDA_SYMLINK", Path("/nonexistent")), + patch.dict("os.environ", {"CUDA_HOME": "/usr/local/cuda-13.0"}, clear=True), + ): + with pytest.raises(ValueError, match="CUDA_HOME"): + cuda_toolkit_check() From bff8cb3ff76112999b35bec0d1ff4e82fa0c057e Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Wed, 11 Mar 2026 16:04:42 -0700 Subject: [PATCH 2/8] fixed formatting and error messages Signed-off-by: Jaya Venkatesh --- rapids_cli/doctor/checks/cuda_toolkit.py | 136 ++++++++++++++++------- rapids_cli/tests/test_cuda_toolkit.py | 46 +++++--- 2 files changed, 125 insertions(+), 57 deletions(-) diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py index dad46bf..d275ffb 100644 --- a/rapids_cli/doctor/checks/cuda_toolkit.py +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -9,57 +9,109 @@ import pynvml # Core libraries to check for findability. -# cudart: universal — everything needs it. -# nvrtc: JIT compilation — cupy, cudf UDFs. Frequently missing with pip (pre-cupy 14). -# nvvm: numba-cuda JIT — cudf string UDFs. Was moved/renamed in CUDA 13.1. _CUDA_LIBS = { "cudart": "libcudart.so", "nvrtc": "libnvrtc.so", "nvvm": "libnvvm.so", } -_INSTALL_ADVICE = { - "conda": ( - "Update your conda environment with the correct CUDA toolkit version, " - "e.g. 'conda install cuda-toolkit' in your active environment." - ), - "pip": ( - "Update the CUDA pip packages in your environment, " - "e.g. 'pip install --upgrade nvidia-cuda-toolkit'." - ), +_CUDA_SYMLINK = Path("/usr/local/cuda") + +# Maps cuda-pathfinder's found_via values to human-readable source labels. +_SOURCE_LABELS = { + "conda": "conda", + "site-packages": "pip", + "system": "system", + "CUDA_HOME": "CUDA_HOME", } -_DEFAULT_ADVICE = ( - "Install the CUDA Toolkit matching your driver, " - "or use conda which manages CUDA automatically." -) -_CUDA_SYMLINK = Path("/usr/local/cuda") -def _get_advice(found_via: str | None) -> str: - """Return install advice based on how cuda-pathfinder found the library.""" +def _get_source_label(found_via: str | None) -> str | None: + """Map cuda-pathfinder's found_via to a human-readable source label.""" if found_via: - for key, advice in _INSTALL_ADVICE.items(): + for key, label in _SOURCE_LABELS.items(): if key in found_via: - return advice - return _DEFAULT_ADVICE + return label + return None + + +def _format_mismatch_error( + toolkit_major: int, + driver_major: int, + found_via: str | None, + cudart_path: str | None, +) -> str: + """Build a clear error message for toolkit > driver version mismatch.""" + source = _get_source_label(found_via) + + location = f"CUDA {toolkit_major} toolkit" + if source and cudart_path: + location += f" (found via {source} at {cudart_path})" + elif source: + location += f" (found via {source})" + elif cudart_path: + location += f" (at {cudart_path})" + + return ( + f"{location} is newer than what the GPU driver supports (CUDA {driver_major}). " + f"Either update the GPU driver to one that supports CUDA {toolkit_major}, " + f"or recreate your environment with CUDA {driver_major} packages." + ) + + +def _format_missing_error(missing_libs: list[str], found_via: str | None) -> str: + """Build a clear error message for missing CUDA libraries.""" + source = _get_source_label(found_via) + missing_str = ", ".join(missing_libs) + + if source: + return ( + f"A {source} CUDA installation was detected, but {missing_str} could not be found. " + f"Try reinstalling the CUDA packages in your {source} environment." + ) + + return ( + f"Some CUDA libraries ({missing_str}) could not be found. " + "Install the CUDA Toolkit, or use conda/pip which manage CUDA automatically." + ) + +def _get_toolkit_cuda_major(cudart_path: str | None = None) -> int | None: + """Return the CUDA major version of the toolkit. -def _get_toolkit_cuda_major() -> int | None: - """Return the CUDA major version of the toolkit via cuda-pathfinder headers. + Tries two strategies in order: + 1. Parse #define CUDA_VERSION from cuda_runtime_version.h (precise, needs dev headers) + 2. Call cudaRuntimeGetVersion via ctypes on the loaded libcudart.so - Parses #define CUDA_VERSION from cuda_runtime_version.h. - Returns None if headers are not available. + Args: + cudart_path: Absolute path to libcudart.so from cuda-pathfinder, used as fallback. """ + import ctypes + import cuda.pathfinder + # header parsing header_dir = cuda.pathfinder.find_nvidia_header_directory("cudart") - if header_dir is None: - return None - version_file = Path(header_dir) / "cuda_runtime_version.h" - if not version_file.exists(): - return None - match = re.search(r"#define\s+CUDA_VERSION\s+(\d+)", version_file.read_text()) - return int(match.group(1)) // 1000 if match else None + if header_dir is not None: + version_file = Path(header_dir) / "cuda_runtime_version.h" + if version_file.exists(): + match = re.search( + r"#define\s+CUDA_VERSION\s+(\d+)", version_file.read_text() + ) + if match: + return int(match.group(1)) // 1000 + + # if header parsing fails, call cudaRuntimeGetVersion via ctypes + if cudart_path is not None: + try: + libcudart = ctypes.CDLL(cudart_path) + version = ctypes.c_int() + if libcudart.cudaRuntimeGetVersion(ctypes.byref(version)) == 0: + return version.value // 1000 + except OSError: + pass + + return None def _extract_major_from_cuda_path(path: Path) -> int | None: @@ -82,20 +134,20 @@ def cuda_toolkit_check(verbose=False): # Check library findability found_via = {} + cudart_path = None missing = [] for libname, soname in _CUDA_LIBS.items(): try: loaded = cuda.pathfinder.load_nvidia_dynamic_lib(libname) found_via[libname] = loaded.found_via + if libname == "cudart": + cudart_path = loaded.abs_path except (DynamicLibNotFoundError, RuntimeError): missing.append(soname) if missing: - advice = _get_advice(next(iter(found_via.values()), None)) - raise ValueError( - f"{', '.join(missing)} could not be found. " - f"RAPIDS will not be able to run GPU operations. {advice}" - ) + any_found_via = next(iter(found_via.values()), None) + raise ValueError(_format_missing_error(missing, any_found_via)) # Get driver CUDA major version try: @@ -109,12 +161,12 @@ def cuda_toolkit_check(verbose=False): # Get toolkit CUDA major version and compare to driver # Only error when toolkit > driver (drivers are backward compatible) - toolkit_major = _get_toolkit_cuda_major() + toolkit_major = _get_toolkit_cuda_major(cudart_path) if toolkit_major is not None and toolkit_major > driver_major: - advice = _get_advice(found_via.get("cudart")) raise ValueError( - f"CUDA toolkit is version {toolkit_major} but the GPU driver " - f"only supports up to CUDA {driver_major}. {advice}" + _format_mismatch_error( + toolkit_major, driver_major, found_via.get("cudart"), cudart_path + ) ) # Check /usr/local/cuda symlink diff --git a/rapids_cli/tests/test_cuda_toolkit.py b/rapids_cli/tests/test_cuda_toolkit.py index 09ade4d..df8b16f 100644 --- a/rapids_cli/tests/test_cuda_toolkit.py +++ b/rapids_cli/tests/test_cuda_toolkit.py @@ -16,14 +16,16 @@ class FakeLoadedLib: abs_path: str | None = None found_via: str = "conda" - was_already_loaded_from_elsewhere: bool = False def _fake_loader(overrides=None): """Build a side_effect for load_nvidia_dynamic_lib. All 3 libs found by default.""" - from cuda.pathfinder import DynamicLibNotFoundError - defaults = {"cudart": FakeLoadedLib(), "nvrtc": FakeLoadedLib(), "nvvm": FakeLoadedLib()} + defaults = { + "cudart": FakeLoadedLib(abs_path="/usr/lib/libcudart.so.12"), + "nvrtc": FakeLoadedLib(), + "nvvm": FakeLoadedLib(), + } if overrides: defaults.update(overrides) @@ -42,7 +44,9 @@ def test_check_success(tmp_path): patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), patch("pynvml.nvmlInit"), patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), - patch("cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path)), + patch( + "cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path) + ), patch.dict("os.environ", {}, clear=True), ): result = cuda_toolkit_check(verbose=True) @@ -55,13 +59,15 @@ def test_check_missing_libs(): with patch( "cuda.pathfinder.load_nvidia_dynamic_lib", - side_effect=_fake_loader({ - "cudart": DynamicLibNotFoundError("not found"), - "nvrtc": DynamicLibNotFoundError("not found"), - "nvvm": DynamicLibNotFoundError("not found"), - }), + side_effect=_fake_loader( + { + "cudart": DynamicLibNotFoundError("not found"), + "nvrtc": DynamicLibNotFoundError("not found"), + "nvvm": DynamicLibNotFoundError("not found"), + } + ), ): - with pytest.raises(ValueError, match="libcudart.so"): + with pytest.raises(ValueError, match="could not be found"): cuda_toolkit_check() @@ -77,13 +83,19 @@ def test_check_driver_query_fails(): def test_check_toolkit_newer_than_driver(tmp_path): """CUDA 13 toolkit + CUDA 12 driver = error.""" (tmp_path / "cuda_runtime_version.h").write_text("#define CUDA_VERSION 13000\n") + cuda13_libs = {"cudart": FakeLoadedLib(abs_path="/usr/lib/libcudart.so.13")} with ( - patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch( + "cuda.pathfinder.load_nvidia_dynamic_lib", + side_effect=_fake_loader(cuda13_libs), + ), patch("pynvml.nvmlInit"), patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), - patch("cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path)), + patch( + "cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path) + ), ): - with pytest.raises(ValueError, match="only supports up to CUDA 12"): + with pytest.raises(ValueError, match="newer than what the GPU driver supports"): cuda_toolkit_check() @@ -94,7 +106,9 @@ def test_check_toolkit_older_than_driver_passes(tmp_path): patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), patch("pynvml.nvmlInit"), patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=13000), - patch("cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path)), + patch( + "cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path) + ), patch.dict("os.environ", {}, clear=True), ): assert cuda_toolkit_check(verbose=False) is True @@ -124,7 +138,9 @@ def test_check_cuda_home_newer_than_driver(): patch("pynvml.nvmlInit"), patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), - patch("rapids_cli.doctor.checks.cuda_toolkit._CUDA_SYMLINK", Path("/nonexistent")), + patch( + "rapids_cli.doctor.checks.cuda_toolkit._CUDA_SYMLINK", Path("/nonexistent") + ), patch.dict("os.environ", {"CUDA_HOME": "/usr/local/cuda-13.0"}, clear=True), ): with pytest.raises(ValueError, match="CUDA_HOME"): From f358112405d3a6631adc9290370312dc74db74e9 Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Wed, 11 Mar 2026 16:13:26 -0700 Subject: [PATCH 3/8] fixed system_path checks Signed-off-by: Jaya Venkatesh --- rapids_cli/doctor/checks/cuda_toolkit.py | 42 ++++++++++++++---------- rapids_cli/tests/test_cuda_toolkit.py | 22 +++++++++++-- 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py index d275ffb..14c4ae4 100644 --- a/rapids_cli/doctor/checks/cuda_toolkit.py +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -169,28 +169,34 @@ def cuda_toolkit_check(verbose=False): ) ) - # Check /usr/local/cuda symlink - if _CUDA_SYMLINK.exists(): - sym_major = _extract_major_from_cuda_path(_CUDA_SYMLINK.resolve()) - if sym_major is not None and sym_major > driver_major: - raise ValueError( - f"/usr/local/cuda points to CUDA {sym_major} but the GPU driver " - f"only supports up to CUDA {driver_major}. " - f"Update the symlink to a CUDA {driver_major}.x installation." - ) - - # Check CUDA_HOME / CUDA_PATH - for env_var in ("CUDA_HOME", "CUDA_PATH"): - env_val = os.environ.get(env_var) - if env_val: - env_major = _extract_major_from_cuda_path(Path(env_val)) - if env_major is not None and env_major > driver_major: + # Only check system paths if CUDA was found via system/CUDA_HOME. + # When found via conda or pip, RAPIDS uses those libs and ignores system paths. + cudart_source = found_via.get("cudart", "") + uses_system_paths = cudart_source not in ("conda", "site-packages") + + if uses_system_paths: + # Check /usr/local/cuda symlink + if _CUDA_SYMLINK.exists(): + sym_major = _extract_major_from_cuda_path(_CUDA_SYMLINK.resolve()) + if sym_major is not None and sym_major > driver_major: raise ValueError( - f"{env_var}={env_val} (CUDA {env_major}) but the GPU driver " + f"/usr/local/cuda points to CUDA {sym_major} but the GPU driver " f"only supports up to CUDA {driver_major}. " - f"Set {env_var} to a CUDA {driver_major}.x path." + f"Update the symlink to a CUDA {driver_major}.x installation." ) + # Check CUDA_HOME / CUDA_PATH + for env_var in ("CUDA_HOME", "CUDA_PATH"): + env_val = os.environ.get(env_var) + if env_val: + env_major = _extract_major_from_cuda_path(Path(env_val)) + if env_major is not None and env_major > driver_major: + raise ValueError( + f"{env_var}={env_val} (CUDA {env_major}) but the GPU driver " + f"only supports up to CUDA {driver_major}. " + f"Set {env_var} to a CUDA {driver_major}.x path." + ) + if verbose: version_str = f"CUDA {toolkit_major}" if toolkit_major else "unknown version" return f"CUDA toolkit OK ({version_str}). Driver supports CUDA {driver_major}." diff --git a/rapids_cli/tests/test_cuda_toolkit.py b/rapids_cli/tests/test_cuda_toolkit.py index df8b16f..0088538 100644 --- a/rapids_cli/tests/test_cuda_toolkit.py +++ b/rapids_cli/tests/test_cuda_toolkit.py @@ -115,13 +115,22 @@ def test_check_toolkit_older_than_driver_passes(tmp_path): def test_check_cuda_symlink_newer_than_driver(tmp_path): + """Only checked when CUDA was found via system paths, not conda/pip.""" symlink_target = tmp_path / "cuda-13.0" symlink_target.mkdir() symlink_path = tmp_path / "cuda" symlink_path.symlink_to(symlink_target) + system_libs = { + "cudart": FakeLoadedLib( + abs_path="/usr/lib/libcudart.so.12", found_via="system-search" + ), + } with ( - patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch( + "cuda.pathfinder.load_nvidia_dynamic_lib", + side_effect=_fake_loader(system_libs), + ), patch("pynvml.nvmlInit"), patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), @@ -133,8 +142,17 @@ def test_check_cuda_symlink_newer_than_driver(tmp_path): def test_check_cuda_home_newer_than_driver(): + """Only checked when CUDA was found via system paths, not conda/pip.""" + system_libs = { + "cudart": FakeLoadedLib( + abs_path="/usr/lib/libcudart.so.12", found_via="system-search" + ), + } with ( - patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), + patch( + "cuda.pathfinder.load_nvidia_dynamic_lib", + side_effect=_fake_loader(system_libs), + ), patch("pynvml.nvmlInit"), patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), From 5a4420106a04accfea5b8efc0a2dafb7d476a865 Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Wed, 11 Mar 2026 16:43:35 -0700 Subject: [PATCH 4/8] fixed docstring Signed-off-by: Jaya Venkatesh --- rapids_cli/doctor/checks/cuda_toolkit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py index 14c4ae4..3739320 100644 --- a/rapids_cli/doctor/checks/cuda_toolkit.py +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -79,7 +79,7 @@ def _format_missing_error(missing_libs: list[str], found_via: str | None) -> str def _get_toolkit_cuda_major(cudart_path: str | None = None) -> int | None: """Return the CUDA major version of the toolkit. - Tries two strategies in order: + Tries two different methods: 1. Parse #define CUDA_VERSION from cuda_runtime_version.h (precise, needs dev headers) 2. Call cudaRuntimeGetVersion via ctypes on the loaded libcudart.so From 4885ac910f4ccafa2594574681b609d5044ff906 Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Wed, 11 Mar 2026 16:49:22 -0700 Subject: [PATCH 5/8] made the code more modular Signed-off-by: Jaya Venkatesh --- rapids_cli/doctor/checks/cuda_toolkit.py | 37 ++++++++++-------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py index 3739320..24269f2 100644 --- a/rapids_cli/doctor/checks/cuda_toolkit.py +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -45,13 +45,10 @@ def _format_mismatch_error( source = _get_source_label(found_via) location = f"CUDA {toolkit_major} toolkit" - if source and cudart_path: - location += f" (found via {source} at {cudart_path})" - elif source: - location += f" (found via {source})" - elif cudart_path: - location += f" (at {cudart_path})" - + details = [v for v in (f"found via {source}" if source else None, + f"at {cudart_path}" if cudart_path else None) if v] + if details: + location += f" ({', '.join(details)})" return ( f"{location} is newer than what the GPU driver supports (CUDA {driver_major}). " f"Either update the GPU driver to one that supports CUDA {toolkit_major}, " @@ -126,6 +123,16 @@ def _extract_major_from_cuda_path(path: Path) -> int | None: return int(match.group(1)) return None +def _check_path_version(label: str, path: Path, driver_major: int) -> None: + """Raise if a CUDA path points to a version newer than the driver supports.""" + major = _extract_major_from_cuda_path(path) + if major is not None and major > driver_major: + raise ValueError( + f"{label} points to CUDA {major} but the GPU driver " + f"only supports up to CUDA {driver_major}. " + f"Update {label} to a CUDA {driver_major}.x installation." + ) + def cuda_toolkit_check(verbose=False): """Check CUDA toolkit library availability and version consistency.""" @@ -177,25 +184,13 @@ def cuda_toolkit_check(verbose=False): if uses_system_paths: # Check /usr/local/cuda symlink if _CUDA_SYMLINK.exists(): - sym_major = _extract_major_from_cuda_path(_CUDA_SYMLINK.resolve()) - if sym_major is not None and sym_major > driver_major: - raise ValueError( - f"/usr/local/cuda points to CUDA {sym_major} but the GPU driver " - f"only supports up to CUDA {driver_major}. " - f"Update the symlink to a CUDA {driver_major}.x installation." - ) + _check_path_version("/usr/local/cuda", _CUDA_SYMLINK.resolve(), driver_major) # Check CUDA_HOME / CUDA_PATH for env_var in ("CUDA_HOME", "CUDA_PATH"): env_val = os.environ.get(env_var) if env_val: - env_major = _extract_major_from_cuda_path(Path(env_val)) - if env_major is not None and env_major > driver_major: - raise ValueError( - f"{env_var}={env_val} (CUDA {env_major}) but the GPU driver " - f"only supports up to CUDA {driver_major}. " - f"Set {env_var} to a CUDA {driver_major}.x path." - ) + _check_path_version(f"{env_var}={env_val}", Path(env_val), driver_major) if verbose: version_str = f"CUDA {toolkit_major}" if toolkit_major else "unknown version" From ea65345eff1c384431203a3054ceeeac56c421ab Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Wed, 11 Mar 2026 16:56:45 -0700 Subject: [PATCH 6/8] fixed formatting Signed-off-by: Jaya Venkatesh --- rapids_cli/doctor/checks/cuda_toolkit.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py index 24269f2..366b20f 100644 --- a/rapids_cli/doctor/checks/cuda_toolkit.py +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -45,8 +45,14 @@ def _format_mismatch_error( source = _get_source_label(found_via) location = f"CUDA {toolkit_major} toolkit" - details = [v for v in (f"found via {source}" if source else None, - f"at {cudart_path}" if cudart_path else None) if v] + details = [ + v + for v in ( + f"found via {source}" if source else None, + f"at {cudart_path}" if cudart_path else None, + ) + if v + ] if details: location += f" ({', '.join(details)})" return ( @@ -123,6 +129,7 @@ def _extract_major_from_cuda_path(path: Path) -> int | None: return int(match.group(1)) return None + def _check_path_version(label: str, path: Path, driver_major: int) -> None: """Raise if a CUDA path points to a version newer than the driver supports.""" major = _extract_major_from_cuda_path(path) @@ -184,7 +191,9 @@ def cuda_toolkit_check(verbose=False): if uses_system_paths: # Check /usr/local/cuda symlink if _CUDA_SYMLINK.exists(): - _check_path_version("/usr/local/cuda", _CUDA_SYMLINK.resolve(), driver_major) + _check_path_version( + "/usr/local/cuda", _CUDA_SYMLINK.resolve(), driver_major + ) # Check CUDA_HOME / CUDA_PATH for env_var in ("CUDA_HOME", "CUDA_PATH"): From 7526e9adb6c8ce917801c6858aa29c6cfcd59ab6 Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Thu, 12 Mar 2026 13:39:32 -0700 Subject: [PATCH 7/8] switched to cuda-core Signed-off-by: Jaya Venkatesh --- dependencies.yaml | 1 + pyproject.toml | 1 + rapids_cli/doctor/checks/cuda_toolkit.py | 124 ++++++++----- rapids_cli/tests/test_cuda.py | 28 ++- rapids_cli/tests/test_cuda_toolkit.py | 223 ++++++++++++----------- 5 files changed, 210 insertions(+), 167 deletions(-) diff --git a/dependencies.yaml b/dependencies.yaml index afc80b5..d312739 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -61,6 +61,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: + - cuda-core >=0.6.0 - nvidia-ml-py>=12.0 - cuda-pathfinder >=1.2.3 - packaging diff --git a/pyproject.toml b/pyproject.toml index 91a33f8..882cc68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,7 @@ license-files = ["LICENSE"] readme = "README.md" requires-python = ">=3.10" dependencies = [ + "cuda-core >=0.6.0", "cuda-pathfinder >=1.2.3", "importlib-metadata >= 4.13.0; python_version < '3.12'", "nvidia-ml-py>=12.0", diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py index 366b20f..7a8efc0 100644 --- a/rapids_cli/doctor/checks/cuda_toolkit.py +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -4,10 +4,9 @@ import os import re +from dataclasses import dataclass, field from pathlib import Path -import pynvml - # Core libraries to check for findability. _CUDA_LIBS = { "cudart": "libcudart.so", @@ -25,6 +24,16 @@ "CUDA_HOME": "CUDA_HOME", } +@dataclass +class CudaToolkitInfo: + """Gathered CUDA toolkit and driver information for the check to evaluate.""" + + found_libs: dict[str, str] = field(default_factory=dict) # libname -> found_via + cudart_path: str | None = None + missing_libs: list[str] = field(default_factory=list) + driver_major: int | None = None + toolkit_major: int | None = None + def _get_source_label(found_via: str | None) -> str | None: """Map cuda-pathfinder's found_via to a human-readable source label.""" @@ -79,6 +88,20 @@ def _format_missing_error(missing_libs: list[str], found_via: str | None) -> str ) +def _ctypes_cuda_version(cudart_path: str) -> int | None: + """Get CUDA major version by calling cudaRuntimeGetVersion via ctypes.""" + import ctypes + + try: + libcudart = ctypes.CDLL(cudart_path) + version = ctypes.c_int() + if libcudart.cudaRuntimeGetVersion(ctypes.byref(version)) == 0: + return version.value // 1000 + except OSError: + pass + return None + + def _get_toolkit_cuda_major(cudart_path: str | None = None) -> int | None: """Return the CUDA major version of the toolkit. @@ -89,8 +112,6 @@ def _get_toolkit_cuda_major(cudart_path: str | None = None) -> int | None: Args: cudart_path: Absolute path to libcudart.so from cuda-pathfinder, used as fallback. """ - import ctypes - import cuda.pathfinder # header parsing @@ -106,28 +127,15 @@ def _get_toolkit_cuda_major(cudart_path: str | None = None) -> int | None: # if header parsing fails, call cudaRuntimeGetVersion via ctypes if cudart_path is not None: - try: - libcudart = ctypes.CDLL(cudart_path) - version = ctypes.c_int() - if libcudart.cudaRuntimeGetVersion(ctypes.byref(version)) == 0: - return version.value // 1000 - except OSError: - pass + return _ctypes_cuda_version(cudart_path) return None def _extract_major_from_cuda_path(path: Path) -> int | None: - """Extract CUDA major version from a path like /usr/local/cuda-12.4 or its version.txt.""" + """Extract CUDA major version from a path like /usr/local/cuda-12.4.""" match = re.search(r"cuda-(\d+)", str(path)) - if match: - return int(match.group(1)) - version_file = path / "version.txt" - if version_file.exists(): - match = re.search(r"(\d+)\.", version_file.read_text()) - if match: - return int(match.group(1)) - return None + return int(match.group(1)) if match else None def _check_path_version(label: str, path: Path, driver_major: int) -> None: @@ -141,61 +149,81 @@ def _check_path_version(label: str, path: Path, driver_major: int) -> None: ) -def cuda_toolkit_check(verbose=False): - """Check CUDA toolkit library availability and version consistency.""" +def _gather_toolkit_info() -> CudaToolkitInfo: # pragma: no cover + """Gather CUDA toolkit and driver information from the real system.""" import cuda.pathfinder + from cuda.core.system import get_driver_version from cuda.pathfinder import DynamicLibNotFoundError - # Check library findability - found_via = {} - cudart_path = None - missing = [] + info = CudaToolkitInfo() + + # Discover libraries for libname, soname in _CUDA_LIBS.items(): try: loaded = cuda.pathfinder.load_nvidia_dynamic_lib(libname) - found_via[libname] = loaded.found_via + info.found_libs[libname] = loaded.found_via if libname == "cudart": - cudart_path = loaded.abs_path + info.cudart_path = loaded.abs_path except (DynamicLibNotFoundError, RuntimeError): - missing.append(soname) - - if missing: - any_found_via = next(iter(found_via.values()), None) - raise ValueError(_format_missing_error(missing, any_found_via)) + info.missing_libs.append(soname) - # Get driver CUDA major version + # Get driver version try: - pynvml.nvmlInit() - driver_major = pynvml.nvmlSystemGetCudaDriverVersion() // 1000 - except pynvml.NVMLError as e: + info.driver_major = get_driver_version()[0] + except Exception: + info.driver_major = None + + # Get toolkit version + if not info.missing_libs: + info.toolkit_major = _get_toolkit_cuda_major(info.cudart_path) + + return info + + +def cuda_toolkit_check( + verbose=False, *, toolkit_info: CudaToolkitInfo | None = None, **kwargs +): + """Check CUDA toolkit library availability and version consistency.""" + if toolkit_info is None: # pragma: no cover + toolkit_info = _gather_toolkit_info() + + # Check library findability + if toolkit_info.missing_libs: + any_found_via = next(iter(toolkit_info.found_libs.values()), None) + raise ValueError( + _format_missing_error(toolkit_info.missing_libs, any_found_via) + ) + + # Check driver availability + if toolkit_info.driver_major is None: raise ValueError( "Unable to query the GPU driver's CUDA version. " "RAPIDS requires a working NVIDIA GPU driver." - ) from e + ) + + driver_major = toolkit_info.driver_major + toolkit_major = toolkit_info.toolkit_major - # Get toolkit CUDA major version and compare to driver - # Only error when toolkit > driver (drivers are backward compatible) - toolkit_major = _get_toolkit_cuda_major(cudart_path) + # Compare toolkit to driver (only error when toolkit > driver, drivers are backward compatible) if toolkit_major is not None and toolkit_major > driver_major: raise ValueError( _format_mismatch_error( - toolkit_major, driver_major, found_via.get("cudart"), cudart_path + toolkit_major, + driver_major, + toolkit_info.found_libs.get("cudart"), + toolkit_info.cudart_path, ) ) # Only check system paths if CUDA was found via system/CUDA_HOME. # When found via conda or pip, RAPIDS uses those libs and ignores system paths. - cudart_source = found_via.get("cudart", "") - uses_system_paths = cudart_source not in ("conda", "site-packages") - - if uses_system_paths: - # Check /usr/local/cuda symlink + cudart_source = toolkit_info.found_libs.get("cudart", "") + if cudart_source not in ("conda", "site-packages"): if _CUDA_SYMLINK.exists(): _check_path_version( "/usr/local/cuda", _CUDA_SYMLINK.resolve(), driver_major ) - # Check CUDA_HOME / CUDA_PATH for env_var in ("CUDA_HOME", "CUDA_PATH"): env_val = os.environ.get(env_var) if env_val: diff --git a/rapids_cli/tests/test_cuda.py b/rapids_cli/tests/test_cuda.py index 70097b2..c6d4525 100644 --- a/rapids_cli/tests/test_cuda.py +++ b/rapids_cli/tests/test_cuda.py @@ -2,25 +2,33 @@ # SPDX-License-Identifier: Apache-2.0 from unittest.mock import patch -from rapids_cli.doctor.checks.cuda_driver import cuda_check - +import pynvml +import pytest -def mock_cuda_version(): - return 12050 +from rapids_cli.doctor.checks.cuda_driver import cuda_check -def test_get_cuda_version_success(): +def test_cuda_check_success(): with ( patch("pynvml.nvmlInit"), patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050), ): - version = mock_cuda_version() - assert version + assert cuda_check(verbose=True) == 12050 -def test_cuda_check_success(capfd): +def test_cuda_check_init_fails(): + with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)): + with pytest.raises(ValueError, match="Unable to look up CUDA version"): + cuda_check() + + +def test_cuda_check_version_query_fails(): with ( patch("pynvml.nvmlInit"), - patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050), + patch( + "pynvml.nvmlSystemGetCudaDriverVersion", + side_effect=pynvml.NVMLError(1), + ), ): - assert cuda_check(verbose=True) + with pytest.raises(ValueError, match="Unable to look up CUDA version"): + cuda_check() diff --git a/rapids_cli/tests/test_cuda_toolkit.py b/rapids_cli/tests/test_cuda_toolkit.py index 0088538..8d1a19a 100644 --- a/rapids_cli/tests/test_cuda_toolkit.py +++ b/rapids_cli/tests/test_cuda_toolkit.py @@ -1,117 +1,128 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from dataclasses import dataclass from pathlib import Path from unittest.mock import patch -import pynvml import pytest -from rapids_cli.doctor.checks.cuda_toolkit import cuda_toolkit_check +from rapids_cli.doctor.checks.cuda_toolkit import ( + CudaToolkitInfo, + _ctypes_cuda_version, + _get_toolkit_cuda_major, + cuda_toolkit_check, +) -@dataclass -class FakeLoadedLib: - """Mimics the return value of cuda.pathfinder.load_nvidia_dynamic_lib().""" - - abs_path: str | None = None - found_via: str = "conda" - - -def _fake_loader(overrides=None): - """Build a side_effect for load_nvidia_dynamic_lib. All 3 libs found by default.""" - +def _make_info(**overrides): + """Build a CudaToolkitInfo with sensible defaults. Override any field.""" defaults = { - "cudart": FakeLoadedLib(abs_path="/usr/lib/libcudart.so.12"), - "nvrtc": FakeLoadedLib(), - "nvvm": FakeLoadedLib(), + "found_libs": {"cudart": "conda", "nvrtc": "conda", "nvvm": "conda"}, + "cudart_path": "/usr/lib/libcudart.so", + "missing_libs": [], + "driver_major": 12, + "toolkit_major": 12, } - if overrides: - defaults.update(overrides) + defaults.update(overrides) + return CudaToolkitInfo(**defaults) - def loader(libname): - val = defaults[libname] - if isinstance(val, Exception): - raise val - return val - return loader +# Version detection tests -def test_check_success(tmp_path): +def test_get_toolkit_version_from_headers(tmp_path): (tmp_path / "cuda_runtime_version.h").write_text("#define CUDA_VERSION 12040\n") + with patch( + "cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path) + ): + assert _get_toolkit_cuda_major() == 12 + + +def test_get_toolkit_version_no_headers_falls_back_to_ctypes(): + """When headers unavailable, falls through to ctypes cudaRuntimeGetVersion.""" + import ctypes + + version_obj = ctypes.c_int(0) with ( - patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), - patch("pynvml.nvmlInit"), - patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), - patch( - "cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path) - ), - patch.dict("os.environ", {}, clear=True), + patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), + patch("ctypes.CDLL") as mock_cdll, + patch("ctypes.c_int", return_value=version_obj), ): - result = cuda_toolkit_check(verbose=True) - assert isinstance(result, str) - assert "CUDA 12" in result + mock_lib = mock_cdll.return_value + def fake_get_version(ref): + version_obj.value = 13000 + return 0 -def test_check_missing_libs(): - from cuda.pathfinder import DynamicLibNotFoundError + mock_lib.cudaRuntimeGetVersion = fake_get_version + assert _get_toolkit_cuda_major("/usr/lib/libcudart.so") == 13 - with patch( - "cuda.pathfinder.load_nvidia_dynamic_lib", - side_effect=_fake_loader( - { - "cudart": DynamicLibNotFoundError("not found"), - "nvrtc": DynamicLibNotFoundError("not found"), - "nvvm": DynamicLibNotFoundError("not found"), - } + +def test_get_toolkit_version_returns_none_when_unavailable(): + with patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None): + assert _get_toolkit_cuda_major() is None + + +def test_ctypes_cuda_version_oserror(): + """ctypes returns None when the library can't be loaded.""" + with patch("ctypes.CDLL", side_effect=OSError("not found")): + assert _ctypes_cuda_version("/nonexistent/libcudart.so") is None + + +# Check function tests + + +def test_check_success(): + info = _make_info() + result = cuda_toolkit_check(verbose=True, toolkit_info=info) + assert isinstance(result, str) + assert "CUDA 12" in result + + +@pytest.mark.parametrize( + "found_libs, missing_libs, expected_match", + [ + ({}, ["libcudart.so", "libnvrtc.so", "libnvvm.so"], "could not be found"), + ( + {"cudart": "conda", "nvrtc": "conda"}, + ["libnvvm.so"], + "conda CUDA installation", ), - ): - with pytest.raises(ValueError, match="could not be found"): - cuda_toolkit_check() + ], + ids=["all_missing", "partial_missing"], +) +def test_check_missing_libs(found_libs, missing_libs, expected_match): + info = _make_info( + found_libs=found_libs, + missing_libs=missing_libs, + cudart_path=None if not found_libs else "/usr/lib/libcudart.so", + toolkit_major=None if not found_libs else 12, + ) + with pytest.raises(ValueError, match=expected_match): + cuda_toolkit_check(toolkit_info=info) def test_check_driver_query_fails(): - with ( - patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), - patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)), - ): - with pytest.raises(ValueError, match="Unable to query"): - cuda_toolkit_check() + info = _make_info(driver_major=None) + with pytest.raises(ValueError, match="Unable to query"): + cuda_toolkit_check(toolkit_info=info) -def test_check_toolkit_newer_than_driver(tmp_path): +def test_check_toolkit_newer_than_driver(): """CUDA 13 toolkit + CUDA 12 driver = error.""" - (tmp_path / "cuda_runtime_version.h").write_text("#define CUDA_VERSION 13000\n") - cuda13_libs = {"cudart": FakeLoadedLib(abs_path="/usr/lib/libcudart.so.13")} - with ( - patch( - "cuda.pathfinder.load_nvidia_dynamic_lib", - side_effect=_fake_loader(cuda13_libs), - ), - patch("pynvml.nvmlInit"), - patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), - patch( - "cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path) - ), - ): - with pytest.raises(ValueError, match="newer than what the GPU driver supports"): - cuda_toolkit_check() + info = _make_info( + found_libs={"cudart": "conda", "nvrtc": "conda", "nvvm": "conda"}, + cudart_path="/usr/lib/libcudart.so.13", + toolkit_major=13, + driver_major=12, + ) + with pytest.raises(ValueError, match="newer than what the GPU driver supports"): + cuda_toolkit_check(toolkit_info=info) -def test_check_toolkit_older_than_driver_passes(tmp_path): +def test_check_toolkit_older_than_driver_passes(): """CUDA 12 toolkit + CUDA 13 driver = fine (backward compatible).""" - (tmp_path / "cuda_runtime_version.h").write_text("#define CUDA_VERSION 12040\n") - with ( - patch("cuda.pathfinder.load_nvidia_dynamic_lib", side_effect=_fake_loader()), - patch("pynvml.nvmlInit"), - patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=13000), - patch( - "cuda.pathfinder.find_nvidia_header_directory", return_value=str(tmp_path) - ), - patch.dict("os.environ", {}, clear=True), - ): - assert cuda_toolkit_check(verbose=False) is True + info = _make_info(toolkit_major=12, driver_major=13) + assert cuda_toolkit_check(verbose=False, toolkit_info=info) is True def test_check_cuda_symlink_newer_than_driver(tmp_path): @@ -121,45 +132,39 @@ def test_check_cuda_symlink_newer_than_driver(tmp_path): symlink_path = tmp_path / "cuda" symlink_path.symlink_to(symlink_target) - system_libs = { - "cudart": FakeLoadedLib( - abs_path="/usr/lib/libcudart.so.12", found_via="system-search" - ), - } + info = _make_info( + found_libs={ + "cudart": "system-search", + "nvrtc": "system-search", + "nvvm": "system-search", + }, + toolkit_major=12, + driver_major=12, + ) with ( - patch( - "cuda.pathfinder.load_nvidia_dynamic_lib", - side_effect=_fake_loader(system_libs), - ), - patch("pynvml.nvmlInit"), - patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), - patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), patch("rapids_cli.doctor.checks.cuda_toolkit._CUDA_SYMLINK", symlink_path), patch.dict("os.environ", {}, clear=True), ): with pytest.raises(ValueError, match="points to CUDA 13"): - cuda_toolkit_check() + cuda_toolkit_check(toolkit_info=info) def test_check_cuda_home_newer_than_driver(): """Only checked when CUDA was found via system paths, not conda/pip.""" - system_libs = { - "cudart": FakeLoadedLib( - abs_path="/usr/lib/libcudart.so.12", found_via="system-search" - ), - } + info = _make_info( + found_libs={ + "cudart": "system-search", + "nvrtc": "system-search", + "nvvm": "system-search", + }, + toolkit_major=12, + driver_major=12, + ) with ( - patch( - "cuda.pathfinder.load_nvidia_dynamic_lib", - side_effect=_fake_loader(system_libs), - ), - patch("pynvml.nvmlInit"), - patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040), - patch("cuda.pathfinder.find_nvidia_header_directory", return_value=None), patch( "rapids_cli.doctor.checks.cuda_toolkit._CUDA_SYMLINK", Path("/nonexistent") ), patch.dict("os.environ", {"CUDA_HOME": "/usr/local/cuda-13.0"}, clear=True), ): with pytest.raises(ValueError, match="CUDA_HOME"): - cuda_toolkit_check() + cuda_toolkit_check(toolkit_info=info) From 9de27b68bcbd918b7fe2af06916585988966e17e Mon Sep 17 00:00:00 2001 From: Jaya Venkatesh Date: Thu, 12 Mar 2026 13:40:56 -0700 Subject: [PATCH 8/8] fixed formatting Signed-off-by: Jaya Venkatesh --- rapids_cli/doctor/checks/cuda_toolkit.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rapids_cli/doctor/checks/cuda_toolkit.py b/rapids_cli/doctor/checks/cuda_toolkit.py index 7a8efc0..5de953f 100644 --- a/rapids_cli/doctor/checks/cuda_toolkit.py +++ b/rapids_cli/doctor/checks/cuda_toolkit.py @@ -24,6 +24,7 @@ "CUDA_HOME": "CUDA_HOME", } + @dataclass class CudaToolkitInfo: """Gathered CUDA toolkit and driver information for the check to evaluate."""