diff --git a/cuda_core/cuda/core/_graph/_graph_builder.pyx b/cuda_core/cuda/core/_graph/_graph_builder.pyx index f1a11b5ded..58b1d93b9a 100644 --- a/cuda_core/cuda/core/_graph/_graph_builder.pyx +++ b/cuda_core/cuda/core/_graph/_graph_builder.pyx @@ -11,10 +11,10 @@ from cuda.core._graph._utils cimport _attach_host_callback_to_graph from cuda.core._resource_handles cimport as_cu from cuda.core._stream cimport Stream from cuda.core._utils.cuda_utils cimport HANDLE_RETURN +from cuda.core._utils.version cimport cy_binding_version, cy_driver_version + from cuda.core._utils.cuda_utils import ( driver, - get_binding_version, - get_driver_version, handle_return, ) @@ -169,7 +169,7 @@ def _instantiate_graph(h_graph, options: GraphCompleteOptions | None = None) -> elif params.result_out == driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED: raise RuntimeError("Instantiation for device launch failed due to the nodes belonging to different contexts.") elif ( - get_binding_version() >= (12, 8) + cy_binding_version() >= (12, 8, 0) and params.result_out == driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_CONDITIONAL_HANDLE_UNUSED ): raise RuntimeError("One or more conditional handles are not associated with conditional builders.") @@ -449,10 +449,10 @@ class GraphBuilder: The newly created conditional handle. """ - if get_driver_version() < 12030: - raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional handles") - if get_binding_version() < (12, 3): - raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional handles") + if cy_driver_version() < (12, 3, 0): + raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional handles") + if cy_binding_version() < (12, 3, 0): + raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional handles") if default_value is not None: flags = driver.CU_GRAPH_COND_ASSIGN_DEFAULT else: @@ -522,10 +522,10 @@ class GraphBuilder: The newly created conditional graph builder. """ - if get_driver_version() < 12030: - raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional if") - if get_binding_version() < (12, 3): - raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional if") + if cy_driver_version() < (12, 3, 0): + raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional if") + if cy_binding_version() < (12, 3, 0): + raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional if") node_params = driver.CUgraphNodeParams() node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL node_params.conditional.handle = handle @@ -553,10 +553,10 @@ class GraphBuilder: A tuple of two new graph builders, one for the if branch and one for the else branch. """ - if get_driver_version() < 12080: - raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional if-else") - if get_binding_version() < (12, 8): - raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional if-else") + if cy_driver_version() < (12, 8, 0): + raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional if-else") + if cy_binding_version() < (12, 8, 0): + raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional if-else") node_params = driver.CUgraphNodeParams() node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL node_params.conditional.handle = handle @@ -587,10 +587,10 @@ class GraphBuilder: A tuple of new graph builders, one for each branch. """ - if get_driver_version() < 12080: - raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional switch") - if get_binding_version() < (12, 8): - raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional switch") + if cy_driver_version() < (12, 8, 0): + raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional switch") + if cy_binding_version() < (12, 8, 0): + raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional switch") node_params = driver.CUgraphNodeParams() node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL node_params.conditional.handle = handle @@ -618,10 +618,10 @@ class GraphBuilder: The newly created while loop graph builder. """ - if get_driver_version() < 12030: - raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional while loop") - if get_binding_version() < (12, 3): - raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional while loop") + if cy_driver_version() < (12, 3, 0): + raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional while loop") + if cy_binding_version() < (12, 3, 0): + raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional while loop") node_params = driver.CUgraphNodeParams() node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL node_params.conditional.handle = handle @@ -649,12 +649,6 @@ class GraphBuilder: child_graph : :obj:`~_graph.GraphBuilder` The child graph builder. Must have finished building. """ - if (get_driver_version() < 12000) or (get_binding_version() < (12, 0)): - raise NotImplementedError( - f"Launching child graphs is not implemented for versions older than CUDA 12." - f"Found driver version is {get_driver_version()} and binding version is {get_binding_version()}" - ) - if not child_graph._building_ended: raise ValueError("Child graph has not finished building.") diff --git a/cuda_core/cuda/core/_graph/_graphdef.pyx b/cuda_core/cuda/core/_graph/_graphdef.pyx index dd4ee22ae1..e924540281 100644 --- a/cuda_core/cuda/core/_graph/_graphdef.pyx +++ b/cuda_core/cuda/core/_graph/_graphdef.pyx @@ -94,8 +94,8 @@ cdef bint _version_checked = False cdef bint _check_node_get_params(): global _has_cuGraphNodeGetParams, _version_checked if not _version_checked: - ver = handle_return(driver.cuDriverGetVersion()) - _has_cuGraphNodeGetParams = ver >= 13020 + from cuda.core._utils.version import driver_version + _has_cuGraphNodeGetParams = driver_version() >= (13, 2, 0) _version_checked = True return _has_cuGraphNodeGetParams diff --git a/cuda_core/cuda/core/_launch_config.pyx b/cuda_core/cuda/core/_launch_config.pyx index 798df71d9e..0970ea36c7 100644 --- a/cuda_core/cuda/core/_launch_config.pyx +++ b/cuda_core/cuda/core/_launch_config.pyx @@ -4,49 +4,16 @@ from libc.string cimport memset -from cuda.core._utils.cuda_utils cimport ( - HANDLE_RETURN, -) - -import threading - from cuda.core._device import Device from cuda.core._utils.cuda_utils import ( CUDAError, cast_to_3_tuple, driver, - get_binding_version, ) - -cdef bint _inited = False -cdef bint _use_ex = False -cdef object _lock = threading.Lock() - -# Attribute names for identity comparison and representation _LAUNCH_CONFIG_ATTRS = ('grid', 'cluster', 'block', 'shmem_size', 'cooperative_launch') -cdef int _lazy_init() except?-1: - global _inited, _use_ex - if _inited: - return 0 - - cdef tuple _py_major_minor - cdef int _driver_ver - with _lock: - if _inited: - return 0 - - # binding availability depends on cuda-python version - _py_major_minor = get_binding_version() - HANDLE_RETURN(cydriver.cuDriverGetVersion(&_driver_ver)) - _use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8)) - _inited = True - - return 0 - - cdef class LaunchConfig: """Customizable launch options. @@ -99,8 +66,6 @@ cdef class LaunchConfig: cooperative_launch : bool, optional Whether to launch as cooperative kernel (default: False) """ - _lazy_init() - # Convert and validate grid and block dimensions self.grid = cast_to_3_tuple("LaunchConfig.grid", grid) self.block = cast_to_3_tuple("LaunchConfig.block", block) @@ -110,10 +75,6 @@ cdef class LaunchConfig: # device compute capability or attributes. # thread block clusters are supported starting H100 if cluster is not None: - if not _use_ex: - err, drvers = driver.cuDriverGetVersion() - drvers_fmt = f" (got driver version {drvers})" if err == driver.CUresult.CUDA_SUCCESS else "" - raise CUDAError(f"thread block clusters require cuda.bindings & driver 11.8+{drvers_fmt}") cc = Device().compute_capability if cc < (9, 0): raise CUDAError( @@ -153,7 +114,6 @@ cdef class LaunchConfig: return hash(self._identity()) cdef cydriver.CUlaunchConfig _to_native_launch_config(self): - _lazy_init() cdef cydriver.CUlaunchConfig drv_cfg cdef cydriver.CUlaunchAttribute attr memset(&drv_cfg, 0, sizeof(drv_cfg)) @@ -201,8 +161,6 @@ cpdef object _to_native_launch_config(LaunchConfig config): driver.CUlaunchConfig Native CUDA driver launch configuration """ - _lazy_init() - cdef object drv_cfg = driver.CUlaunchConfig() cdef list attrs cdef object attr diff --git a/cuda_core/cuda/core/_launcher.pyx b/cuda_core/cuda/core/_launcher.pyx index ce5f7339e0..f8189d95ed 100644 --- a/cuda_core/cuda/core/_launcher.pyx +++ b/cuda_core/cuda/core/_launcher.pyx @@ -15,39 +15,9 @@ from cuda.core._utils.cuda_utils cimport ( check_or_create_options, HANDLE_RETURN, ) - -import threading - from cuda.core._module import Kernel from cuda.core._stream import Stream -from cuda.core._utils.cuda_utils import ( - _reduce_3_tuple, - get_binding_version, -) - - -cdef bint _inited = False -cdef bint _use_ex = False -cdef object _lock = threading.Lock() - - -cdef int _lazy_init() except?-1: - global _inited, _use_ex - if _inited: - return 0 - - cdef int _driver_ver - with _lock: - if _inited: - return 0 - - # binding availability depends on cuda-python version - _py_major_minor = get_binding_version() - HANDLE_RETURN(cydriver.cuDriverGetVersion(&_driver_ver)) - _use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8)) - _inited = True - - return 0 +from math import prod def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kernel: Kernel, *kernel_args): @@ -70,7 +40,6 @@ def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kern """ cdef Stream s = Stream_accept(stream, allow_stream_protocol=True) - _lazy_init() cdef LaunchConfig conf = check_or_create_options(LaunchConfig, config, "launch config") # TODO: can we ensure kernel_args is valid/safe to use here? @@ -78,41 +47,24 @@ def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kern cdef ParamHolder ker_args = ParamHolder(kernel_args) cdef void** args_ptr = (ker_args.ptr) - # Note: We now use CUkernel handles exclusively (CUDA 12+), but they can be cast to - # CUfunction for use with cuLaunchKernel, as both handle types are interchangeable - # for kernel launch purposes. cdef Kernel ker = kernel cdef cydriver.CUfunction func_handle = as_cu(ker._h_kernel) - # Note: CUkernel can still be launched via cuLaunchKernel (not just cuLaunchKernelEx). - # We check both binding & driver versions here mainly to see if the "Ex" API is - # available and if so we use it, as it's more feature rich. - if _use_ex: - drv_cfg = conf._to_native_launch_config() - drv_cfg.hStream = as_cu(s._h_stream) - if conf.cooperative_launch: - _check_cooperative_launch(kernel, conf, s) - with nogil: - HANDLE_RETURN(cydriver.cuLaunchKernelEx(&drv_cfg, func_handle, args_ptr, NULL)) - else: - # TODO: check if config has any unsupported attrs - HANDLE_RETURN( - cydriver.cuLaunchKernel( - func_handle, - conf.grid[0], conf.grid[1], conf.grid[2], - conf.block[0], conf.block[1], conf.block[2], - conf.shmem_size, as_cu(s._h_stream), args_ptr, NULL - ) - ) + drv_cfg = conf._to_native_launch_config() + drv_cfg.hStream = as_cu(s._h_stream) + if conf.cooperative_launch: + _check_cooperative_launch(kernel, conf, s) + with nogil: + HANDLE_RETURN(cydriver.cuLaunchKernelEx(&drv_cfg, func_handle, args_ptr, NULL)) cdef _check_cooperative_launch(kernel: Kernel, config: LaunchConfig, stream: Stream): dev = stream.device num_sm = dev.properties.multiprocessor_count max_grid_size = ( - kernel.occupancy.max_active_blocks_per_multiprocessor(_reduce_3_tuple(config.block), config.shmem_size) * num_sm + kernel.occupancy.max_active_blocks_per_multiprocessor(prod(config.block), config.shmem_size) * num_sm ) - if _reduce_3_tuple(config.grid) > max_grid_size: + if prod(config.grid) > max_grid_size: # For now let's try not to be smart and adjust the grid size behind users' back. # We explicitly ask users to adjust. x, y, z = config.grid diff --git a/cuda_core/cuda/core/_linker.pyx b/cuda_core/cuda/core/_linker.pyx index ce7c6e4528..cde117b1bb 100644 --- a/cuda_core/cuda/core/_linker.pyx +++ b/cuda_core/cuda/core/_linker.pyx @@ -37,7 +37,6 @@ from cuda.core._utils.cuda_utils import ( CUDAError, check_or_create_options, driver, - handle_return, is_sequence, ) @@ -620,9 +619,8 @@ cdef inline void Linker_annotate_error_log(Linker self, object e): # TODO: revisit this treatment for py313t builds _driver = None # populated if nvJitLink cannot be used -_driver_ver = None _inited = False -_use_nvjitlink_backend = False # set by _decide_nvjitlink_or_driver() +_use_nvjitlink_backend = None # set by _decide_nvjitlink_or_driver() # Input type mappings populated by _lazy_init() with C-level enum ints. _nvjitlink_input_types = None @@ -637,13 +635,10 @@ def _nvjitlink_has_version_symbol(nvjitlink) -> bool: # Note: this function is reused in the tests def _decide_nvjitlink_or_driver() -> bool: """Return True if falling back to the cuLink* driver APIs.""" - global _driver_ver, _driver, _use_nvjitlink_backend - if _driver_ver is not None: + global _driver, _use_nvjitlink_backend + if _use_nvjitlink_backend is not None: return not _use_nvjitlink_backend - _driver_ver = handle_return(driver.cuDriverGetVersion()) - _driver_ver = (_driver_ver // 1000, (_driver_ver % 1000) // 10) - warn_txt_common = ( "the driver APIs will be used instead, which do not support" " minor version compatibility or linking LTO IRs." @@ -668,6 +663,7 @@ def _decide_nvjitlink_or_driver() -> bool: ) warn(warn_txt, stacklevel=2, category=RuntimeWarning) + _use_nvjitlink_backend = False _driver = driver return True diff --git a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py index 7aff5709b7..7d952e102f 100644 --- a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py @@ -16,11 +16,11 @@ Transaction, check_or_create_options, driver, - get_binding_version, ) from cuda.core._utils.cuda_utils import ( _check_driver_error as raise_if_driver_error, ) +from cuda.core._utils.version import binding_version __all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"] @@ -99,8 +99,7 @@ class VirtualMemoryResourceOptions: _t = driver.CUmemAllocationType # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not _allocation_type = {"pinned": _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 - ver_major, ver_minor = get_binding_version() - if ver_major >= 13: + if binding_version() >= (13, 0, 0): _allocation_type["managed"] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED @staticmethod diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx index 4e8f810619..2eaff7fb11 100644 --- a/cuda_core/cuda/core/_module.pyx +++ b/cuda_core/cuda/core/_module.pyx @@ -6,8 +6,6 @@ from __future__ import annotations from libc.stddef cimport size_t -import functools -import threading from collections import namedtuple from cuda.core._device import Device @@ -33,110 +31,12 @@ from cuda.core._utils.clear_error_support import ( raise_code_path_meant_to_be_unreachable, ) from cuda.core._utils.cuda_utils cimport HANDLE_RETURN -from cuda.core._utils.cuda_utils import driver, get_binding_version +from cuda.core._utils.version cimport cy_driver_version +from cuda.core._utils.cuda_utils import driver from cuda.bindings cimport cydriver __all__ = ["Kernel", "ObjectCode"] -# Lazy initialization state and synchronization -# For Python 3.13t (free-threaded builds), we use a lock to ensure thread-safe initialization. -# For regular Python builds with GIL, the lock overhead is minimal and the code remains safe. -cdef object _init_lock = threading.Lock() -cdef bint _inited = False -cdef int _py_major_ver = 0 -cdef int _py_minor_ver = 0 -cdef int _driver_ver = 0 -cdef tuple _kernel_ctypes = None -cdef bint _paraminfo_supported = False - - -cdef int _lazy_init() except -1: - """ - Initialize module-level state in a thread-safe manner. - - This function is thread-safe and suitable for both: - - Regular Python builds (with GIL) - - Python 3.13t free-threaded builds (without GIL) - - Uses double-checked locking pattern for performance: - - Fast path: check without lock if already initialized - - Slow path: acquire lock and initialize if needed - """ - global _inited - # Fast path: already initialized (no lock needed for read) - if _inited: - return 0 - - cdef int drv_ver - # Slow path: acquire lock and initialize - with _init_lock: - # Double-check: another thread might have initialized while we waited - if _inited: - return 0 - - global _py_major_ver, _py_minor_ver, _driver_ver, _kernel_ctypes, _paraminfo_supported - # binding availability depends on cuda-python version - _py_major_ver, _py_minor_ver = get_binding_version() - _kernel_ctypes = (driver.CUkernel,) - with nogil: - HANDLE_RETURN(cydriver.cuDriverGetVersion(&drv_ver)) - _driver_ver = drv_ver - _paraminfo_supported = _driver_ver >= 12040 - - # Mark as initialized (must be last to ensure all state is set) - _inited = True - - return 0 - - -# Auto-initializing accessors (cdef for internal use) -cdef inline int _get_py_major_ver() except -1: - """Get the Python binding major version, initializing if needed.""" - _lazy_init() - return _py_major_ver - - -cdef inline int _get_py_minor_ver() except -1: - """Get the Python binding minor version, initializing if needed.""" - _lazy_init() - return _py_minor_ver - - -cdef inline int _get_driver_ver() except -1: - """Get the CUDA driver version, initializing if needed.""" - _lazy_init() - return _driver_ver - - -cdef inline tuple _get_kernel_ctypes(): - """Get the kernel ctypes tuple, initializing if needed.""" - _lazy_init() - return _kernel_ctypes - - -cdef inline bint _is_paraminfo_supported() except -1: - """Return True if cuKernelGetParamInfo is available (driver >= 12.4).""" - _lazy_init() - return _paraminfo_supported - - -@functools.cache -def _is_cukernel_get_library_supported() -> bool: - """Return True when cuKernelGetLibrary is available for inverse kernel-to-library lookup. - - Requires cuda-python bindings >= 12.5 and driver >= 12.5. - """ - return ( - (_get_py_major_ver(), _get_py_minor_ver()) >= (12, 5) - and _get_driver_ver() >= 12050 - and hasattr(driver, "cuKernelGetLibrary") - ) - - -cdef inline LibraryHandle _make_empty_library_handle(): - """Create an empty LibraryHandle to indicate no library loaded.""" - return LibraryHandle() # Empty shared_ptr - cdef class KernelAttributes: """Provides access to kernel attributes.""" @@ -149,7 +49,6 @@ cdef class KernelAttributes: cdef KernelAttributes self = KernelAttributes.__new__(KernelAttributes) self._h_kernel = h_kernel self._cache = {} - _lazy_init() return self cdef int _get_cached_attribute(self, int device_id, cydriver.CUfunction_attribute attribute) except? -1: @@ -508,11 +407,10 @@ cdef class Kernel: return self._attributes cdef tuple _get_arguments_info(self, bint param_info=False): - if not _is_paraminfo_supported(): - driver_ver = _get_driver_ver() + if cy_driver_version() < (12, 4, 0): raise NotImplementedError( "Driver version 12.4 or newer is required for this function. " - f"Using driver version {driver_ver // 1000}.{(driver_ver % 1000) // 10}" + f"Using driver version {'.'.join(map(str, cy_driver_version()))}" ) cdef size_t arg_pos = 0 cdef list param_info_data = [] @@ -650,7 +548,6 @@ cdef class ObjectCode: # _h_library is assigned during _lazy_load_module self._h_library = LibraryHandle() # Empty handle - _lazy_init() self._code_type = code_type self._module = module diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx index 96d7aa0567..194ef6da53 100644 --- a/cuda_core/cuda/core/_program.pyx +++ b/cuda_core/cuda/core/_program.pyx @@ -13,7 +13,7 @@ from dataclasses import dataclass import threading from warnings import warn -from cuda.bindings import driver, nvrtc +from cuda.bindings import nvrtc from cuda.pathfinder._optional_cuda_import import _optional_cuda_import from libcpp.vector cimport vector @@ -34,11 +34,11 @@ from cuda.core._utils.cuda_utils import ( CUDAError, _handle_boolean_option, check_or_create_options, - get_binding_version, handle_return, is_nested_sequence, is_sequence, ) +from cuda.core._utils.version import binding_version, driver_version __all__ = ["Program", "ProgramOptions"] @@ -520,10 +520,10 @@ def _get_nvvm_module(): _nvvm_import_attempted = True try: - version = get_binding_version() - if version < (12, 9): + version = binding_version() + if version < (12, 9, 0): raise RuntimeError( - f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. " + f"NVVM bindings require cuda-bindings >= 12.9.0, but found {'.'.join(map(str, version))}. " "Please update cuda-bindings to use NVVM features." ) @@ -579,9 +579,9 @@ cdef inline void _process_define_macro(list options, object macro) except *: cpdef bint _can_load_generated_ptx() except? -1: """Check if the driver can load PTX generated by the current NVRTC version.""" - driver_ver = handle_return(driver.cuDriverGetVersion()) + drv = driver_version() nvrtc_major, nvrtc_minor = handle_return(nvrtc.nvrtcVersion()) - return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver + return (nvrtc_major, nvrtc_minor, 0) <= drv cdef inline object _translate_program_options(object options): diff --git a/cuda_core/cuda/core/_utils/cuda_utils.pyx b/cuda_core/cuda/core/_utils/cuda_utils.pyx index ec6c587f3f..867d066ce2 100644 --- a/cuda_core/cuda/core/_utils/cuda_utils.pyx +++ b/cuda_core/cuda/core/_utils/cuda_utils.pyx @@ -4,7 +4,6 @@ import functools from functools import partial -import importlib.metadata import multiprocessing import platform import warnings @@ -61,10 +60,6 @@ def cast_to_3_tuple(label, cfg): return cfg + (1,) * (3 - len(cfg)) -def _reduce_3_tuple(t: tuple): - return t[0] * t[1] * t[2] - - cdef int HANDLE_RETURN(cydriver.CUresult err) except?-1 nogil: if err != cydriver.CUresult.CUDA_SUCCESS: return _check_driver_error(err) @@ -298,18 +293,6 @@ def is_nested_sequence(obj): return is_sequence(obj) and any(is_sequence(elem) for elem in obj) -@functools.cache -def get_binding_version(): - try: - major_minor = importlib.metadata.version("cuda-bindings").split(".")[:2] - except importlib.metadata.PackageNotFoundError: - major_minor = importlib.metadata.version("cuda-python").split(".")[:2] - return tuple(int(v) for v in major_minor) - -@functools.cache -def get_driver_version(): - return handle_return(driver.cuDriverGetVersion()) - class Transaction: """ diff --git a/cuda_core/cuda/core/_utils/version.pxd b/cuda_core/cuda/core/_utils/version.pxd new file mode 100644 index 0000000000..2746d463db --- /dev/null +++ b/cuda_core/cuda/core/_utils/version.pxd @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +cdef tuple cy_binding_version() +cdef tuple cy_driver_version() diff --git a/cuda_core/cuda/core/_utils/version.pyx b/cuda_core/cuda/core/_utils/version.pyx new file mode 100644 index 0000000000..09ea585242 --- /dev/null +++ b/cuda_core/cuda/core/_utils/version.pyx @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + +import functools +import importlib.metadata + +from cuda.core._utils.cuda_utils import driver, handle_return + + +@functools.cache +def binding_version() -> tuple[int, int, int]: + """Return the cuda-bindings version as a (major, minor, patch) triple.""" + try: + parts = importlib.metadata.version("cuda-bindings").split(".")[:3] + except importlib.metadata.PackageNotFoundError: + parts = importlib.metadata.version("cuda-python").split(".")[:3] + return tuple(int(v) for v in parts) + + +@functools.cache +def driver_version() -> tuple[int, int, int]: + """Return the CUDA driver version as a (major, minor, patch) triple.""" + cdef int ver = handle_return(driver.cuDriverGetVersion()) + return (ver // 1000, (ver // 10) % 100, ver % 10) + + +cdef tuple _cached_binding_version = None +cdef tuple _cached_driver_version = None + + +cdef tuple cy_binding_version(): + global _cached_binding_version + if _cached_binding_version is None: + _cached_binding_version = binding_version() + return _cached_binding_version + + +cdef tuple cy_driver_version(): + global _cached_driver_version + if _cached_driver_version is None: + _cached_driver_version = driver_version() + return _cached_driver_version diff --git a/cuda_core/tests/graph/test_explicit.py b/cuda_core/tests/graph/test_explicit.py index ab023f5ffa..33826cb5fd 100644 --- a/cuda_core/tests/graph/test_explicit.py +++ b/cuda_core/tests/graph/test_explicit.py @@ -48,18 +48,18 @@ def _skip_if_no_managed_mempool(): def _driver_has_node_get_params(): - from cuda.bindings import driver as drv + from cuda.core._utils.version import driver_version - return drv.cuDriverGetVersion()[1] >= 13020 + return driver_version() >= (13, 2, 0) _HAS_NODE_GET_PARAMS = _driver_has_node_get_params() def _bindings_major_version(): - from cuda.core._utils.cuda_utils import get_binding_version + from cuda.core._utils.version import binding_version - return get_binding_version()[0] + return binding_version()[0] _BINDINGS_MAJOR = _bindings_major_version() diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py index 04670b96f2..f218182766 100644 --- a/cuda_core/tests/test_cuda_utils.py +++ b/cuda_core/tests/test_cuda_utils.py @@ -21,7 +21,9 @@ def test_driver_cu_result_explanations_health(): assert code in expl_dict known_codes.add(code) - if cuda_utils.get_binding_version() >= (13, 0): + from cuda.core._utils.version import binding_version + + if binding_version() >= (13, 0, 0): # Ensure expl_dict has no codes not known as a CUresult enum extra_expl = sorted(set(expl_dict.keys()) - known_codes) assert not extra_expl @@ -37,7 +39,9 @@ def test_runtime_cuda_error_explanations_health(): assert code in expl_dict known_codes.add(code) - if cuda_utils.get_binding_version() >= (13, 0): + from cuda.core._utils.version import binding_version + + if binding_version() >= (13, 0, 0): # Ensure expl_dict has no codes not known as a cudaError_t enum extra_expl = sorted(set(expl_dict.keys()) - known_codes) assert not extra_expl diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py index 95e47ce8d9..c4e1e9931f 100644 --- a/cuda_core/tests/test_device.py +++ b/cuda_core/tests/test_device.py @@ -10,7 +10,8 @@ import cuda.core from cuda.core import Device -from cuda.core._utils.cuda_utils import ComputeCapability, get_binding_version, handle_return +from cuda.core._utils.cuda_utils import ComputeCapability, handle_return +from cuda.core._utils.version import binding_version, driver_version def test_device_init_disabled(): @@ -18,14 +19,6 @@ def test_device_init_disabled(): cuda.core._device.DeviceProperties() # Ensure back door is locked. -@pytest.fixture(scope="module") -def cuda_version(): - # binding availability depends on cuda-python version - _py_major_ver, _ = get_binding_version() - _driver_ver = handle_return(driver.cuDriverGetVersion()) - return _py_major_ver, _driver_ver - - def test_to_system_device(deinit_cuda): from cuda.core.system import _system @@ -115,8 +108,8 @@ def test_pci_bus_id(): def test_uuid(): device = Device() - driver_ver = handle_return(driver.cuDriverGetVersion()) - if driver_ver < 13000: + drv_ver = driver_version() + if drv_ver < (13, 0, 0): uuid = handle_return(driver.cuDeviceGetUuid_v2(device.device_id)) else: uuid = handle_return(driver.cuDeviceGetUuid(device.device_id)) @@ -306,8 +299,8 @@ def test_arch(): ("only_partial_host_native_atomic_supported", bool), ] -version = get_binding_version() -if version[0] >= 13: +version = binding_version() +if version >= (13, 0, 0): cuda_base_properties += cuda_13_properties @@ -324,7 +317,7 @@ def test_device_properties_complete(): excluded_props = set() # Exclude CUDA 13+ specific properties when not available - if version[0] < 13: + if version < (13, 0, 0): excluded_props.update({prop[0] for prop in cuda_13_properties}) filtered_tab_props = tab_props - excluded_props diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py index 2bc7e25d21..598b46ac7a 100644 --- a/cuda_core/tests/test_module.py +++ b/cuda_core/tests/test_module.py @@ -10,7 +10,8 @@ import cuda.core from cuda.core import Device, Kernel, ObjectCode, Program, ProgramOptions from cuda.core._program import _can_load_generated_ptx -from cuda.core._utils.cuda_utils import CUDAError, driver, get_binding_version, handle_return +from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return +from cuda.core._utils.version import binding_version, driver_version try: import numba @@ -34,11 +35,7 @@ @pytest.fixture(scope="module") def cuda12_4_prerequisite_check(): - # binding availability depends on cuda-python version - # and version of underlying CUDA toolkit - _py_major_ver, _ = get_binding_version() - _driver_ver = handle_return(driver.cuDriverGetVersion()) - return _py_major_ver >= 12 and _driver_ver >= 12040 + return binding_version() >= (12, 0, 0) and driver_version() >= (12, 4, 0) def test_kernel_attributes_init_disabled(): diff --git a/cuda_core/tests/test_optional_dependency_imports.py b/cuda_core/tests/test_optional_dependency_imports.py index ebdd10e4a7..730c6e7834 100644 --- a/cuda_core/tests/test_optional_dependency_imports.py +++ b/cuda_core/tests/test_optional_dependency_imports.py @@ -2,8 +2,6 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -import types - import pytest from cuda.core import _linker, _program @@ -14,38 +12,26 @@ def restore_optional_import_state(): saved_nvvm_module = _program._nvvm_module saved_nvvm_attempted = _program._nvvm_import_attempted saved_driver = _linker._driver - saved_driver_ver = _linker._driver_ver saved_inited = _linker._inited saved_use_nvjitlink = _linker._use_nvjitlink_backend _program._nvvm_module = None _program._nvvm_import_attempted = False _linker._driver = None - _linker._driver_ver = None _linker._inited = False - _linker._use_nvjitlink_backend = False + _linker._use_nvjitlink_backend = None yield _program._nvvm_module = saved_nvvm_module _program._nvvm_import_attempted = saved_nvvm_attempted _linker._driver = saved_driver - _linker._driver_ver = saved_driver_ver _linker._inited = saved_inited _linker._use_nvjitlink_backend = saved_use_nvjitlink -def _patch_driver_version(monkeypatch, version=13000): - monkeypatch.setattr( - _linker, - "driver", - types.SimpleNamespace(cuDriverGetVersion=lambda: version), - ) - monkeypatch.setattr(_linker, "handle_return", lambda value: value) - - def test_get_nvvm_module_reraises_nested_module_not_found(monkeypatch): - monkeypatch.setattr(_program, "get_binding_version", lambda: (12, 9)) + monkeypatch.setattr(_program, "binding_version", lambda: (12, 9, 0)) def fake__optional_cuda_import(modname, probe_function=None): assert modname == "cuda.bindings.nvvm" @@ -62,7 +48,7 @@ def fake__optional_cuda_import(modname, probe_function=None): def test_get_nvvm_module_reports_missing_nvvm_module(monkeypatch): - monkeypatch.setattr(_program, "get_binding_version", lambda: (12, 9)) + monkeypatch.setattr(_program, "binding_version", lambda: (12, 9, 0)) def fake__optional_cuda_import(modname, probe_function=None): assert modname == "cuda.bindings.nvvm" @@ -76,7 +62,7 @@ def fake__optional_cuda_import(modname, probe_function=None): def test_get_nvvm_module_handles_missing_libnvvm(monkeypatch): - monkeypatch.setattr(_program, "get_binding_version", lambda: (12, 9)) + monkeypatch.setattr(_program, "binding_version", lambda: (12, 9, 0)) def fake__optional_cuda_import(modname, probe_function=None): assert modname == "cuda.bindings.nvvm" @@ -90,8 +76,6 @@ def fake__optional_cuda_import(modname, probe_function=None): def test_decide_nvjitlink_or_driver_reraises_nested_module_not_found(monkeypatch): - _patch_driver_version(monkeypatch) - def fake__optional_cuda_import(modname, probe_function=None): assert modname == "cuda.bindings.nvjitlink" assert probe_function is not None @@ -107,8 +91,6 @@ def fake__optional_cuda_import(modname, probe_function=None): def test_decide_nvjitlink_or_driver_falls_back_when_module_missing(monkeypatch): - _patch_driver_version(monkeypatch) - def fake__optional_cuda_import(modname, probe_function=None): assert modname == "cuda.bindings.nvjitlink" assert probe_function is not None diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 2507b82f0d..ac40fb735d 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE +import contextlib import re import warnings @@ -11,11 +12,10 @@ from cuda.core._device import Device from cuda.core._module import Kernel, ObjectCode from cuda.core._program import Program, ProgramOptions -from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return +from cuda.core._utils.cuda_utils import CUDAError, handle_return pytest_plugins = ("cuda_python_test_helpers.nvvm_bitcode",) -cuda_driver_version = handle_return(driver.cuDriverGetVersion()) is_culink_backend = _linker._decide_nvjitlink_or_driver() @@ -34,12 +34,8 @@ def _is_nvvm_available(): not _is_nvvm_available(), reason="NVVM not available (libNVVM not found or cuda-bindings < 12.9.0)" ) -try: - from cuda.core._utils.cuda_utils import driver, handle_return, nvrtc - - _cuda_driver_version = handle_return(driver.cuDriverGetVersion()) -except Exception: - _cuda_driver_version = 0 +with contextlib.suppress(Exception): + from cuda.core._utils.cuda_utils import nvrtc def _get_nvrtc_version_for_tests():