diff --git a/cuda_core/cuda/core/_graph/_graph_builder.pyx b/cuda_core/cuda/core/_graph/_graph_builder.pyx
index f1a11b5ded..58b1d93b9a 100644
--- a/cuda_core/cuda/core/_graph/_graph_builder.pyx
+++ b/cuda_core/cuda/core/_graph/_graph_builder.pyx
@@ -11,10 +11,10 @@ from cuda.core._graph._utils cimport _attach_host_callback_to_graph
 from cuda.core._resource_handles cimport as_cu
 from cuda.core._stream cimport Stream
 from cuda.core._utils.cuda_utils cimport HANDLE_RETURN
+from cuda.core._utils.version cimport cy_binding_version, cy_driver_version
+
 from cuda.core._utils.cuda_utils import (
     driver,
-    get_binding_version,
-    get_driver_version,
     handle_return,
 )
 
@@ -169,7 +169,7 @@ def _instantiate_graph(h_graph, options: GraphCompleteOptions | None = None) ->
     elif params.result_out == driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED:
         raise RuntimeError("Instantiation for device launch failed due to the nodes belonging to different contexts.")
     elif (
-        get_binding_version() >= (12, 8)
+        cy_binding_version() >= (12, 8, 0)
         and params.result_out == driver.CUgraphInstantiateResult.CUDA_GRAPH_INSTANTIATE_CONDITIONAL_HANDLE_UNUSED
     ):
         raise RuntimeError("One or more conditional handles are not associated with conditional builders.")
@@ -449,10 +449,10 @@ class GraphBuilder:
             The newly created conditional handle.
 
         """
-        if get_driver_version() < 12030:
-            raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional handles")
-        if get_binding_version() < (12, 3):
-            raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional handles")
+        if cy_driver_version() < (12, 3, 0):
+            raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional handles")
+        if cy_binding_version() < (12, 3, 0):
+            raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional handles")
         if default_value is not None:
             flags = driver.CU_GRAPH_COND_ASSIGN_DEFAULT
         else:
@@ -522,10 +522,10 @@ class GraphBuilder:
             The newly created conditional graph builder.
 
         """
-        if get_driver_version() < 12030:
-            raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional if")
-        if get_binding_version() < (12, 3):
-            raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional if")
+        if cy_driver_version() < (12, 3, 0):
+            raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional if")
+        if cy_binding_version() < (12, 3, 0):
+            raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional if")
         node_params = driver.CUgraphNodeParams()
         node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL
         node_params.conditional.handle = handle
@@ -553,10 +553,10 @@ class GraphBuilder:
             A tuple of two new graph builders, one for the if branch and one for the else branch.
 
         """
-        if get_driver_version() < 12080:
-            raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional if-else")
-        if get_binding_version() < (12, 8):
-            raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional if-else")
+        if cy_driver_version() < (12, 8, 0):
+            raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional if-else")
+        if cy_binding_version() < (12, 8, 0):
+            raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional if-else")
         node_params = driver.CUgraphNodeParams()
         node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL
         node_params.conditional.handle = handle
@@ -587,10 +587,10 @@ class GraphBuilder:
             A tuple of new graph builders, one for each branch.
 
         """
-        if get_driver_version() < 12080:
-            raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional switch")
-        if get_binding_version() < (12, 8):
-            raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional switch")
+        if cy_driver_version() < (12, 8, 0):
+            raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional switch")
+        if cy_binding_version() < (12, 8, 0):
+            raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional switch")
         node_params = driver.CUgraphNodeParams()
         node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL
         node_params.conditional.handle = handle
@@ -618,10 +618,10 @@ class GraphBuilder:
             The newly created while loop graph builder.
 
         """
-        if get_driver_version() < 12030:
-            raise RuntimeError(f"Driver version {get_driver_version()} does not support conditional while loop")
-        if get_binding_version() < (12, 3):
-            raise RuntimeError(f"Binding version {get_binding_version()} does not support conditional while loop")
+        if cy_driver_version() < (12, 3, 0):
+            raise RuntimeError(f"Driver version {'.'.join(map(str, cy_driver_version()))} does not support conditional while loop")
+        if cy_binding_version() < (12, 3, 0):
+            raise RuntimeError(f"Binding version {'.'.join(map(str, cy_binding_version()))} does not support conditional while loop")
         node_params = driver.CUgraphNodeParams()
         node_params.type = driver.CUgraphNodeType.CU_GRAPH_NODE_TYPE_CONDITIONAL
         node_params.conditional.handle = handle
@@ -649,12 +649,6 @@ class GraphBuilder:
         child_graph : :obj:`~_graph.GraphBuilder`
             The child graph builder. Must have finished building.
         """
-        if (get_driver_version() < 12000) or (get_binding_version() < (12, 0)):
-            raise NotImplementedError(
-                f"Launching child graphs is not implemented for versions older than CUDA 12."
-                f"Found driver version is {get_driver_version()} and binding version is {get_binding_version()}"
-            )
-
         if not child_graph._building_ended:
             raise ValueError("Child graph has not finished building.")
 
diff --git a/cuda_core/cuda/core/_graph/_graphdef.pyx b/cuda_core/cuda/core/_graph/_graphdef.pyx
index dd4ee22ae1..e924540281 100644
--- a/cuda_core/cuda/core/_graph/_graphdef.pyx
+++ b/cuda_core/cuda/core/_graph/_graphdef.pyx
@@ -94,8 +94,8 @@ cdef bint _version_checked = False
 cdef bint _check_node_get_params():
     global _has_cuGraphNodeGetParams, _version_checked
     if not _version_checked:
-        ver = handle_return(driver.cuDriverGetVersion())
-        _has_cuGraphNodeGetParams = ver >= 13020
+        from cuda.core._utils.version import driver_version
+        _has_cuGraphNodeGetParams = driver_version() >= (13, 2, 0)
         _version_checked = True
     return _has_cuGraphNodeGetParams
 
diff --git a/cuda_core/cuda/core/_launch_config.pyx b/cuda_core/cuda/core/_launch_config.pyx
index 798df71d9e..0970ea36c7 100644
--- a/cuda_core/cuda/core/_launch_config.pyx
+++ b/cuda_core/cuda/core/_launch_config.pyx
@@ -4,49 +4,16 @@
 
 from libc.string cimport memset
 
-from cuda.core._utils.cuda_utils cimport (
-    HANDLE_RETURN,
-)
-
-import threading
-
 from cuda.core._device import Device
 from cuda.core._utils.cuda_utils import (
     CUDAError,
     cast_to_3_tuple,
     driver,
-    get_binding_version,
 )
 
-
-cdef bint _inited = False
-cdef bint _use_ex = False
-cdef object _lock = threading.Lock()
-
-# Attribute names for identity comparison and representation
 _LAUNCH_CONFIG_ATTRS = ('grid', 'cluster', 'block', 'shmem_size', 'cooperative_launch')
 
 
-cdef int _lazy_init() except?-1:
-    global _inited, _use_ex
-    if _inited:
-        return 0
-
-    cdef tuple _py_major_minor
-    cdef int _driver_ver
-    with _lock:
-        if _inited:
-            return 0
-
-        # binding availability depends on cuda-python version
-        _py_major_minor = get_binding_version()
-        HANDLE_RETURN(cydriver.cuDriverGetVersion(&_driver_ver))
-        _use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))
-        _inited = True
-
-    return 0
-
-
 cdef class LaunchConfig:
     """Customizable launch options.
 
@@ -99,8 +66,6 @@ cdef class LaunchConfig:
         cooperative_launch : bool, optional
             Whether to launch as cooperative kernel (default: False)
         """
-        _lazy_init()
-
         # Convert and validate grid and block dimensions
         self.grid = cast_to_3_tuple("LaunchConfig.grid", grid)
         self.block = cast_to_3_tuple("LaunchConfig.block", block)
@@ -110,10 +75,6 @@ cdef class LaunchConfig:
         # device compute capability or attributes.
         # thread block clusters are supported starting H100
         if cluster is not None:
-            if not _use_ex:
-                err, drvers = driver.cuDriverGetVersion()
-                drvers_fmt = f" (got driver version {drvers})" if err == driver.CUresult.CUDA_SUCCESS else ""
-                raise CUDAError(f"thread block clusters require cuda.bindings & driver 11.8+{drvers_fmt}")
             cc = Device().compute_capability
             if cc < (9, 0):
                 raise CUDAError(
@@ -153,7 +114,6 @@ cdef class LaunchConfig:
         return hash(self._identity())
 
     cdef cydriver.CUlaunchConfig _to_native_launch_config(self):
-        _lazy_init()
         cdef cydriver.CUlaunchConfig drv_cfg
         cdef cydriver.CUlaunchAttribute attr
         memset(&drv_cfg, 0, sizeof(drv_cfg))
@@ -201,8 +161,6 @@ cpdef object _to_native_launch_config(LaunchConfig config):
     driver.CUlaunchConfig
         Native CUDA driver launch configuration
     """
-    _lazy_init()
-
     cdef object drv_cfg = driver.CUlaunchConfig()
     cdef list attrs
     cdef object attr
diff --git a/cuda_core/cuda/core/_launcher.pyx b/cuda_core/cuda/core/_launcher.pyx
index ce5f7339e0..f8189d95ed 100644
--- a/cuda_core/cuda/core/_launcher.pyx
+++ b/cuda_core/cuda/core/_launcher.pyx
@@ -15,39 +15,9 @@ from cuda.core._utils.cuda_utils cimport (
     check_or_create_options,
     HANDLE_RETURN,
 )
-
-import threading
-
 from cuda.core._module import Kernel
 from cuda.core._stream import Stream
-from cuda.core._utils.cuda_utils import (
-    _reduce_3_tuple,
-    get_binding_version,
-)
-
-
-cdef bint _inited = False
-cdef bint _use_ex = False
-cdef object _lock = threading.Lock()
-
-
-cdef int _lazy_init() except?-1:
-    global _inited, _use_ex
-    if _inited:
-        return 0
-
-    cdef int _driver_ver
-    with _lock:
-        if _inited:
-            return 0
-
-        # binding availability depends on cuda-python version
-        _py_major_minor = get_binding_version()
-        HANDLE_RETURN(cydriver.cuDriverGetVersion(&_driver_ver))
-        _use_ex = (_driver_ver >= 11080) and (_py_major_minor >= (11, 8))
-        _inited = True
-
-    return 0
+from math import prod
 
 
 def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kernel: Kernel, *kernel_args):
@@ -70,7 +40,6 @@ def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kern
 
     """
     cdef Stream s = Stream_accept(stream, allow_stream_protocol=True)
-    _lazy_init()
     cdef LaunchConfig conf = check_or_create_options(LaunchConfig, config, "launch config")
 
     # TODO: can we ensure kernel_args is valid/safe to use here?
@@ -78,41 +47,24 @@ def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kern
     cdef ParamHolder ker_args = ParamHolder(kernel_args)
     cdef void** args_ptr = <void**><uintptr_t>(ker_args.ptr)
 
-    # Note: We now use CUkernel handles exclusively (CUDA 12+), but they can be cast to
-    # CUfunction for use with cuLaunchKernel, as both handle types are interchangeable
-    # for kernel launch purposes.
     cdef Kernel ker = <Kernel>kernel
     cdef cydriver.CUfunction func_handle = <cydriver.CUfunction>as_cu(ker._h_kernel)
 
-    # Note: CUkernel can still be launched via cuLaunchKernel (not just cuLaunchKernelEx).
-    # We check both binding & driver versions here mainly to see if the "Ex" API is
-    # available and if so we use it, as it's more feature rich.
-    if _use_ex:
-        drv_cfg = conf._to_native_launch_config()
-        drv_cfg.hStream = as_cu(s._h_stream)
-        if conf.cooperative_launch:
-            _check_cooperative_launch(kernel, conf, s)
-        with nogil:
-            HANDLE_RETURN(cydriver.cuLaunchKernelEx(&drv_cfg, func_handle, args_ptr, NULL))
-    else:
-        # TODO: check if config has any unsupported attrs
-        HANDLE_RETURN(
-            cydriver.cuLaunchKernel(
-                func_handle,
-                conf.grid[0], conf.grid[1], conf.grid[2],
-                conf.block[0], conf.block[1], conf.block[2],
-                conf.shmem_size, as_cu(s._h_stream), args_ptr, NULL
-            )
-        )
+    drv_cfg = conf._to_native_launch_config()
+    drv_cfg.hStream = as_cu(s._h_stream)
+    if conf.cooperative_launch:
+        _check_cooperative_launch(kernel, conf, s)
+    with nogil:
+        HANDLE_RETURN(cydriver.cuLaunchKernelEx(&drv_cfg, func_handle, args_ptr, NULL))
 
 
 cdef _check_cooperative_launch(kernel: Kernel, config: LaunchConfig, stream: Stream):
     dev = stream.device
     num_sm = dev.properties.multiprocessor_count
     max_grid_size = (
-        kernel.occupancy.max_active_blocks_per_multiprocessor(_reduce_3_tuple(config.block), config.shmem_size) * num_sm
+        kernel.occupancy.max_active_blocks_per_multiprocessor(prod(config.block), config.shmem_size) * num_sm
     )
-    if _reduce_3_tuple(config.grid) > max_grid_size:
+    if prod(config.grid) > max_grid_size:
         # For now let's try not to be smart and adjust the grid size behind users' back.
         # We explicitly ask users to adjust.
         x, y, z = config.grid
diff --git a/cuda_core/cuda/core/_linker.pyx b/cuda_core/cuda/core/_linker.pyx
index ce7c6e4528..cde117b1bb 100644
--- a/cuda_core/cuda/core/_linker.pyx
+++ b/cuda_core/cuda/core/_linker.pyx
@@ -37,7 +37,6 @@ from cuda.core._utils.cuda_utils import (
     CUDAError,
     check_or_create_options,
     driver,
-    handle_return,
     is_sequence,
 )
 
@@ -620,9 +619,8 @@ cdef inline void Linker_annotate_error_log(Linker self, object e):
 
 # TODO: revisit this treatment for py313t builds
 _driver = None  # populated if nvJitLink cannot be used
-_driver_ver = None
 _inited = False
-_use_nvjitlink_backend = False  # set by _decide_nvjitlink_or_driver()
+_use_nvjitlink_backend = None  # set by _decide_nvjitlink_or_driver()
 
 # Input type mappings populated by _lazy_init() with C-level enum ints.
 _nvjitlink_input_types = None
@@ -637,13 +635,10 @@ def _nvjitlink_has_version_symbol(nvjitlink) -> bool:
 # Note: this function is reused in the tests
 def _decide_nvjitlink_or_driver() -> bool:
     """Return True if falling back to the cuLink* driver APIs."""
-    global _driver_ver, _driver, _use_nvjitlink_backend
-    if _driver_ver is not None:
+    global _driver, _use_nvjitlink_backend
+    if _use_nvjitlink_backend is not None:
         return not _use_nvjitlink_backend
 
-    _driver_ver = handle_return(driver.cuDriverGetVersion())
-    _driver_ver = (_driver_ver // 1000, (_driver_ver % 1000) // 10)
-
     warn_txt_common = (
         "the driver APIs will be used instead, which do not support"
         " minor version compatibility or linking LTO IRs."
@@ -668,6 +663,7 @@ def _decide_nvjitlink_or_driver() -> bool:
         )
 
     warn(warn_txt, stacklevel=2, category=RuntimeWarning)
+    _use_nvjitlink_backend = False
     _driver = driver
     return True
 
diff --git a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py
index 7aff5709b7..7d952e102f 100644
--- a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py
+++ b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py
@@ -16,11 +16,11 @@
     Transaction,
     check_or_create_options,
     driver,
-    get_binding_version,
 )
 from cuda.core._utils.cuda_utils import (
     _check_driver_error as raise_if_driver_error,
 )
+from cuda.core._utils.version import binding_version
 
 __all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"]
 
@@ -99,8 +99,7 @@ class VirtualMemoryResourceOptions:
     _t = driver.CUmemAllocationType
     # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not
     _allocation_type = {"pinned": _t.CU_MEM_ALLOCATION_TYPE_PINNED}  # noqa: RUF012
-    ver_major, ver_minor = get_binding_version()
-    if ver_major >= 13:
+    if binding_version() >= (13, 0, 0):
         _allocation_type["managed"] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED
 
     @staticmethod
diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx
index 4e8f810619..2eaff7fb11 100644
--- a/cuda_core/cuda/core/_module.pyx
+++ b/cuda_core/cuda/core/_module.pyx
@@ -6,8 +6,6 @@ from __future__ import annotations
 
 from libc.stddef cimport size_t
 
-import functools
-import threading
 from collections import namedtuple
 
 from cuda.core._device import Device
@@ -33,110 +31,12 @@ from cuda.core._utils.clear_error_support import (
     raise_code_path_meant_to_be_unreachable,
 )
 from cuda.core._utils.cuda_utils cimport HANDLE_RETURN
-from cuda.core._utils.cuda_utils import driver, get_binding_version
+from cuda.core._utils.version cimport cy_driver_version
+from cuda.core._utils.cuda_utils import driver
 from cuda.bindings cimport cydriver
 
 __all__ = ["Kernel", "ObjectCode"]
 
-# Lazy initialization state and synchronization
-# For Python 3.13t (free-threaded builds), we use a lock to ensure thread-safe initialization.
-# For regular Python builds with GIL, the lock overhead is minimal and the code remains safe.
-cdef object _init_lock = threading.Lock()
-cdef bint _inited = False
-cdef int _py_major_ver = 0
-cdef int _py_minor_ver = 0
-cdef int _driver_ver = 0
-cdef tuple _kernel_ctypes = None
-cdef bint _paraminfo_supported = False
-
-
-cdef int _lazy_init() except -1:
-    """
-    Initialize module-level state in a thread-safe manner.
-
-    This function is thread-safe and suitable for both:
-    - Regular Python builds (with GIL)
-    - Python 3.13t free-threaded builds (without GIL)
-
-    Uses double-checked locking pattern for performance:
-    - Fast path: check without lock if already initialized
-    - Slow path: acquire lock and initialize if needed
-    """
-    global _inited
-    # Fast path: already initialized (no lock needed for read)
-    if _inited:
-        return 0
-
-    cdef int drv_ver
-    # Slow path: acquire lock and initialize
-    with _init_lock:
-        # Double-check: another thread might have initialized while we waited
-        if _inited:
-            return 0
-
-        global _py_major_ver, _py_minor_ver, _driver_ver, _kernel_ctypes, _paraminfo_supported
-        # binding availability depends on cuda-python version
-        _py_major_ver, _py_minor_ver = get_binding_version()
-        _kernel_ctypes = (driver.CUkernel,)
-        with nogil:
-            HANDLE_RETURN(cydriver.cuDriverGetVersion(&drv_ver))
-        _driver_ver = drv_ver
-        _paraminfo_supported = _driver_ver >= 12040
-
-        # Mark as initialized (must be last to ensure all state is set)
-        _inited = True
-
-    return 0
-
-
-# Auto-initializing accessors (cdef for internal use)
-cdef inline int _get_py_major_ver() except -1:
-    """Get the Python binding major version, initializing if needed."""
-    _lazy_init()
-    return _py_major_ver
-
-
-cdef inline int _get_py_minor_ver() except -1:
-    """Get the Python binding minor version, initializing if needed."""
-    _lazy_init()
-    return _py_minor_ver
-
-
-cdef inline int _get_driver_ver() except -1:
-    """Get the CUDA driver version, initializing if needed."""
-    _lazy_init()
-    return _driver_ver
-
-
-cdef inline tuple _get_kernel_ctypes():
-    """Get the kernel ctypes tuple, initializing if needed."""
-    _lazy_init()
-    return _kernel_ctypes
-
-
-cdef inline bint _is_paraminfo_supported() except -1:
-    """Return True if cuKernelGetParamInfo is available (driver >= 12.4)."""
-    _lazy_init()
-    return _paraminfo_supported
-
-
-@functools.cache
-def _is_cukernel_get_library_supported() -> bool:
-    """Return True when cuKernelGetLibrary is available for inverse kernel-to-library lookup.
-
-    Requires cuda-python bindings >= 12.5 and driver >= 12.5.
-    """
-    return (
-        (_get_py_major_ver(), _get_py_minor_ver()) >= (12, 5)
-        and _get_driver_ver() >= 12050
-        and hasattr(driver, "cuKernelGetLibrary")
-    )
-
-
-cdef inline LibraryHandle _make_empty_library_handle():
-    """Create an empty LibraryHandle to indicate no library loaded."""
-    return LibraryHandle()  # Empty shared_ptr
-
 
 cdef class KernelAttributes:
     """Provides access to kernel attributes."""
@@ -149,7 +49,6 @@ cdef class KernelAttributes:
         cdef KernelAttributes self = KernelAttributes.__new__(KernelAttributes)
         self._h_kernel = h_kernel
         self._cache = {}
-        _lazy_init()
         return self
 
     cdef int _get_cached_attribute(self, int device_id, cydriver.CUfunction_attribute attribute) except? -1:
@@ -508,11 +407,10 @@ cdef class Kernel:
         return self._attributes
 
     cdef tuple _get_arguments_info(self, bint param_info=False):
-        if not _is_paraminfo_supported():
-            driver_ver = _get_driver_ver()
+        if cy_driver_version() < (12, 4, 0):
             raise NotImplementedError(
                 "Driver version 12.4 or newer is required for this function. "
-                f"Using driver version {driver_ver // 1000}.{(driver_ver % 1000) // 10}"
+                f"Using driver version {'.'.join(map(str, cy_driver_version()))}"
             )
         cdef size_t arg_pos = 0
         cdef list param_info_data = []
@@ -650,7 +548,6 @@ cdef class ObjectCode:
 
         # _h_library is assigned during _lazy_load_module
         self._h_library = LibraryHandle()  # Empty handle
-        _lazy_init()
 
         self._code_type = code_type
         self._module = module
diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx
index 96d7aa0567..194ef6da53 100644
--- a/cuda_core/cuda/core/_program.pyx
+++ b/cuda_core/cuda/core/_program.pyx
@@ -13,7 +13,7 @@ from dataclasses import dataclass
 import threading
 from warnings import warn
 
-from cuda.bindings import driver, nvrtc
+from cuda.bindings import nvrtc
 from cuda.pathfinder._optional_cuda_import import _optional_cuda_import
 
 from libcpp.vector cimport vector
@@ -34,11 +34,11 @@ from cuda.core._utils.cuda_utils import (
     CUDAError,
     _handle_boolean_option,
     check_or_create_options,
-    get_binding_version,
     handle_return,
     is_nested_sequence,
     is_sequence,
 )
+from cuda.core._utils.version import binding_version, driver_version
 
 __all__ = ["Program", "ProgramOptions"]
 
@@ -520,10 +520,10 @@ def _get_nvvm_module():
     _nvvm_import_attempted = True
 
     try:
-        version = get_binding_version()
-        if version < (12, 9):
+        version = binding_version()
+        if version < (12, 9, 0):
             raise RuntimeError(
-                f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. "
+                f"NVVM bindings require cuda-bindings >= 12.9.0, but found {'.'.join(map(str, version))}. "
                 "Please update cuda-bindings to use NVVM features."
             )
 
@@ -579,9 +579,9 @@ cdef inline void _process_define_macro(list options, object macro) except *:
 
 cpdef bint _can_load_generated_ptx() except? -1:
     """Check if the driver can load PTX generated by the current NVRTC version."""
-    driver_ver = handle_return(driver.cuDriverGetVersion())
+    drv = driver_version()
     nvrtc_major, nvrtc_minor = handle_return(nvrtc.nvrtcVersion())
-    return nvrtc_major * 1000 + nvrtc_minor * 10 <= driver_ver
+    return (nvrtc_major, nvrtc_minor, 0) <= drv
 
 
 cdef inline object _translate_program_options(object options):
diff --git a/cuda_core/cuda/core/_utils/cuda_utils.pyx b/cuda_core/cuda/core/_utils/cuda_utils.pyx
index ec6c587f3f..867d066ce2 100644
--- a/cuda_core/cuda/core/_utils/cuda_utils.pyx
+++ b/cuda_core/cuda/core/_utils/cuda_utils.pyx
@@ -4,7 +4,6 @@
 
 import functools
 from functools import partial
-import importlib.metadata
 import multiprocessing
 import platform
 import warnings
@@ -61,10 +60,6 @@ def cast_to_3_tuple(label, cfg):
     return cfg + (1,) * (3 - len(cfg))
 
 
-def _reduce_3_tuple(t: tuple):
-    return t[0] * t[1] * t[2]
-
-
 cdef int HANDLE_RETURN(cydriver.CUresult err) except?-1 nogil:
     if err != cydriver.CUresult.CUDA_SUCCESS:
         return _check_driver_error(err)
@@ -298,18 +293,6 @@ def is_nested_sequence(obj):
     return is_sequence(obj) and any(is_sequence(elem) for elem in obj)
 
 
-@functools.cache
-def get_binding_version():
-    try:
-        major_minor = importlib.metadata.version("cuda-bindings").split(".")[:2]
-    except importlib.metadata.PackageNotFoundError:
-        major_minor = importlib.metadata.version("cuda-python").split(".")[:2]
-    return tuple(int(v) for v in major_minor)
-
-@functools.cache
-def get_driver_version():
-    return handle_return(driver.cuDriverGetVersion())
-
 
 class Transaction:
     """
diff --git a/cuda_core/cuda/core/_utils/version.pxd b/cuda_core/cuda/core/_utils/version.pxd
new file mode 100644
index 0000000000..2746d463db
--- /dev/null
+++ b/cuda_core/cuda/core/_utils/version.pxd
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cdef tuple cy_binding_version()
+cdef tuple cy_driver_version()
diff --git a/cuda_core/cuda/core/_utils/version.pyx b/cuda_core/cuda/core/_utils/version.pyx
new file mode 100644
index 0000000000..09ea585242
--- /dev/null
+++ b/cuda_core/cuda/core/_utils/version.pyx
@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import functools
+import importlib.metadata
+
+from cuda.core._utils.cuda_utils import driver, handle_return
+
+
+@functools.cache
+def binding_version() -> tuple[int, int, int]:
+    """Return the cuda-bindings version as a (major, minor, patch) triple."""
+    try:
+        parts = importlib.metadata.version("cuda-bindings").split(".")[:3]
+    except importlib.metadata.PackageNotFoundError:
+        parts = importlib.metadata.version("cuda-python").split(".")[:3]
+    return tuple(int(v) for v in parts)
+
+
+@functools.cache
+def driver_version() -> tuple[int, int, int]:
+    """Return the CUDA driver version as a (major, minor, patch) triple."""
+    cdef int ver = handle_return(driver.cuDriverGetVersion())
+    return (ver // 1000, (ver // 10) % 100, ver % 10)
+
+
+cdef tuple _cached_binding_version = None
+cdef tuple _cached_driver_version = None
+
+
+cdef tuple cy_binding_version():
+    global _cached_binding_version
+    if _cached_binding_version is None:
+        _cached_binding_version = binding_version()
+    return _cached_binding_version
+
+
+cdef tuple cy_driver_version():
+    global _cached_driver_version
+    if _cached_driver_version is None:
+        _cached_driver_version = driver_version()
+    return _cached_driver_version
diff --git a/cuda_core/tests/graph/test_explicit.py b/cuda_core/tests/graph/test_explicit.py
index ab023f5ffa..33826cb5fd 100644
--- a/cuda_core/tests/graph/test_explicit.py
+++ b/cuda_core/tests/graph/test_explicit.py
@@ -48,18 +48,18 @@ def _skip_if_no_managed_mempool():
 
 
 def _driver_has_node_get_params():
-    from cuda.bindings import driver as drv
+    from cuda.core._utils.version import driver_version
 
-    return drv.cuDriverGetVersion()[1] >= 13020
+    return driver_version() >= (13, 2, 0)
 
 
 _HAS_NODE_GET_PARAMS = _driver_has_node_get_params()
 
 
 def _bindings_major_version():
-    from cuda.core._utils.cuda_utils import get_binding_version
+    from cuda.core._utils.version import binding_version
 
-    return get_binding_version()[0]
+    return binding_version()[0]
 
 
 _BINDINGS_MAJOR = _bindings_major_version()
diff --git a/cuda_core/tests/test_cuda_utils.py b/cuda_core/tests/test_cuda_utils.py
index 04670b96f2..f218182766 100644
--- a/cuda_core/tests/test_cuda_utils.py
+++ b/cuda_core/tests/test_cuda_utils.py
@@ -21,7 +21,9 @@ def test_driver_cu_result_explanations_health():
         assert code in expl_dict
         known_codes.add(code)
 
-    if cuda_utils.get_binding_version() >= (13, 0):
+    from cuda.core._utils.version import binding_version
+
+    if binding_version() >= (13, 0, 0):
         # Ensure expl_dict has no codes not known as a CUresult enum
         extra_expl = sorted(set(expl_dict.keys()) - known_codes)
         assert not extra_expl
@@ -37,7 +39,9 @@ def test_runtime_cuda_error_explanations_health():
         assert code in expl_dict
         known_codes.add(code)
 
-    if cuda_utils.get_binding_version() >= (13, 0):
+    from cuda.core._utils.version import binding_version
+
+    if binding_version() >= (13, 0, 0):
         # Ensure expl_dict has no codes not known as a cudaError_t enum
         extra_expl = sorted(set(expl_dict.keys()) - known_codes)
         assert not extra_expl
diff --git a/cuda_core/tests/test_device.py b/cuda_core/tests/test_device.py
index 95e47ce8d9..c4e1e9931f 100644
--- a/cuda_core/tests/test_device.py
+++ b/cuda_core/tests/test_device.py
@@ -10,7 +10,8 @@
 
 import cuda.core
 from cuda.core import Device
-from cuda.core._utils.cuda_utils import ComputeCapability, get_binding_version, handle_return
+from cuda.core._utils.cuda_utils import ComputeCapability, handle_return
+from cuda.core._utils.version import binding_version, driver_version
 
 
 def test_device_init_disabled():
@@ -18,14 +19,6 @@ def test_device_init_disabled():
         cuda.core._device.DeviceProperties()  # Ensure back door is locked.
 
 
-@pytest.fixture(scope="module")
-def cuda_version():
-    # binding availability depends on cuda-python version
-    _py_major_ver, _ = get_binding_version()
-    _driver_ver = handle_return(driver.cuDriverGetVersion())
-    return _py_major_ver, _driver_ver
-
-
 def test_to_system_device(deinit_cuda):
     from cuda.core.system import _system
 
@@ -115,8 +108,8 @@ def test_pci_bus_id():
 
 def test_uuid():
     device = Device()
-    driver_ver = handle_return(driver.cuDriverGetVersion())
-    if driver_ver < 13000:
+    drv_ver = driver_version()
+    if drv_ver < (13, 0, 0):
         uuid = handle_return(driver.cuDeviceGetUuid_v2(device.device_id))
     else:
         uuid = handle_return(driver.cuDeviceGetUuid(device.device_id))
@@ -306,8 +299,8 @@ def test_arch():
     ("only_partial_host_native_atomic_supported", bool),
 ]
 
-version = get_binding_version()
-if version[0] >= 13:
+version = binding_version()
+if version >= (13, 0, 0):
     cuda_base_properties += cuda_13_properties
 
 
@@ -324,7 +317,7 @@ def test_device_properties_complete():
 
     excluded_props = set()
     # Exclude CUDA 13+ specific properties when not available
-    if version[0] < 13:
+    if version < (13, 0, 0):
         excluded_props.update({prop[0] for prop in cuda_13_properties})
 
     filtered_tab_props = tab_props - excluded_props
diff --git a/cuda_core/tests/test_module.py b/cuda_core/tests/test_module.py
index 2bc7e25d21..598b46ac7a 100644
--- a/cuda_core/tests/test_module.py
+++ b/cuda_core/tests/test_module.py
@@ -10,7 +10,8 @@
 import cuda.core
 from cuda.core import Device, Kernel, ObjectCode, Program, ProgramOptions
 from cuda.core._program import _can_load_generated_ptx
-from cuda.core._utils.cuda_utils import CUDAError, driver, get_binding_version, handle_return
+from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return
+from cuda.core._utils.version import binding_version, driver_version
 
 try:
     import numba
@@ -34,11 +35,7 @@
 
 @pytest.fixture(scope="module")
 def cuda12_4_prerequisite_check():
-    # binding availability depends on cuda-python version
-    # and version of underlying CUDA toolkit
-    _py_major_ver, _ = get_binding_version()
-    _driver_ver = handle_return(driver.cuDriverGetVersion())
-    return _py_major_ver >= 12 and _driver_ver >= 12040
+    return binding_version() >= (12, 0, 0) and driver_version() >= (12, 4, 0)
 
 
 def test_kernel_attributes_init_disabled():
diff --git a/cuda_core/tests/test_optional_dependency_imports.py b/cuda_core/tests/test_optional_dependency_imports.py
index ebdd10e4a7..730c6e7834 100644
--- a/cuda_core/tests/test_optional_dependency_imports.py
+++ b/cuda_core/tests/test_optional_dependency_imports.py
@@ -2,8 +2,6 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
-import types
-
 import pytest
 
 from cuda.core import _linker, _program
@@ -14,38 +12,26 @@ def restore_optional_import_state():
     saved_nvvm_module = _program._nvvm_module
     saved_nvvm_attempted = _program._nvvm_import_attempted
     saved_driver = _linker._driver
-    saved_driver_ver = _linker._driver_ver
     saved_inited = _linker._inited
     saved_use_nvjitlink = _linker._use_nvjitlink_backend
 
     _program._nvvm_module = None
     _program._nvvm_import_attempted = False
     _linker._driver = None
-    _linker._driver_ver = None
     _linker._inited = False
-    _linker._use_nvjitlink_backend = False
+    _linker._use_nvjitlink_backend = None
 
     yield
 
     _program._nvvm_module = saved_nvvm_module
     _program._nvvm_import_attempted = saved_nvvm_attempted
     _linker._driver = saved_driver
-    _linker._driver_ver = saved_driver_ver
     _linker._inited = saved_inited
     _linker._use_nvjitlink_backend = saved_use_nvjitlink
 
 
-def _patch_driver_version(monkeypatch, version=13000):
-    monkeypatch.setattr(
-        _linker,
-        "driver",
-        types.SimpleNamespace(cuDriverGetVersion=lambda: version),
-    )
-    monkeypatch.setattr(_linker, "handle_return", lambda value: value)
-
-
 def test_get_nvvm_module_reraises_nested_module_not_found(monkeypatch):
-    monkeypatch.setattr(_program, "get_binding_version", lambda: (12, 9))
+    monkeypatch.setattr(_program, "binding_version", lambda: (12, 9, 0))
 
     def fake__optional_cuda_import(modname, probe_function=None):
         assert modname == "cuda.bindings.nvvm"
@@ -62,7 +48,7 @@ def fake__optional_cuda_import(modname, probe_function=None):
 
 
 def test_get_nvvm_module_reports_missing_nvvm_module(monkeypatch):
-    monkeypatch.setattr(_program, "get_binding_version", lambda: (12, 9))
+    monkeypatch.setattr(_program, "binding_version", lambda: (12, 9, 0))
 
     def fake__optional_cuda_import(modname, probe_function=None):
         assert modname == "cuda.bindings.nvvm"
@@ -76,7 +62,7 @@ def fake__optional_cuda_import(modname, probe_function=None):
 
 
 def test_get_nvvm_module_handles_missing_libnvvm(monkeypatch):
-    monkeypatch.setattr(_program, "get_binding_version", lambda: (12, 9))
+    monkeypatch.setattr(_program, "binding_version", lambda: (12, 9, 0))
 
     def fake__optional_cuda_import(modname, probe_function=None):
         assert modname == "cuda.bindings.nvvm"
@@ -90,8 +76,6 @@ def fake__optional_cuda_import(modname, probe_function=None):
 
 
 def test_decide_nvjitlink_or_driver_reraises_nested_module_not_found(monkeypatch):
-    _patch_driver_version(monkeypatch)
-
     def fake__optional_cuda_import(modname, probe_function=None):
         assert modname == "cuda.bindings.nvjitlink"
         assert probe_function is not None
@@ -107,8 +91,6 @@ def fake__optional_cuda_import(modname, probe_function=None):
 
 
 def test_decide_nvjitlink_or_driver_falls_back_when_module_missing(monkeypatch):
-    _patch_driver_version(monkeypatch)
-
     def fake__optional_cuda_import(modname, probe_function=None):
         assert modname == "cuda.bindings.nvjitlink"
         assert probe_function is not None
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 2507b82f0d..ac40fb735d 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
 
+import contextlib
 import re
 import warnings
 
@@ -11,11 +12,10 @@
 from cuda.core._device import Device
 from cuda.core._module import Kernel, ObjectCode
 from cuda.core._program import Program, ProgramOptions
-from cuda.core._utils.cuda_utils import CUDAError, driver, handle_return
+from cuda.core._utils.cuda_utils import CUDAError, handle_return
 
 pytest_plugins = ("cuda_python_test_helpers.nvvm_bitcode",)
 
-cuda_driver_version = handle_return(driver.cuDriverGetVersion())
 is_culink_backend = _linker._decide_nvjitlink_or_driver()
 
 
@@ -34,12 +34,8 @@ def _is_nvvm_available():
     not _is_nvvm_available(), reason="NVVM not available (libNVVM not found or cuda-bindings < 12.9.0)"
 )
 
-try:
-    from cuda.core._utils.cuda_utils import driver, handle_return, nvrtc
-
-    _cuda_driver_version = handle_return(driver.cuDriverGetVersion())
-except Exception:
-    _cuda_driver_version = 0
+with contextlib.suppress(Exception):
+    from cuda.core._utils.cuda_utils import nvrtc
 
 
 def _get_nvrtc_version_for_tests():