NVIDIA
diff --git a/‎.bandit‎
Lines changed: 0 additions & 5 deletions b/‎.bandit‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎.github/workflows/bandit.yml‎
Lines changed: 24 additions & 7 deletions b/‎.github/workflows/bandit.yml‎
Lines changed: 24 additions & 7 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 12 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 12 deletions
diff --git a/‎cuda_bindings/cuda/bindings/utils/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎cuda_bindings/cuda/bindings/utils/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎cuda_bindings/tests/test_cuda.py‎
Lines changed: 5 additions & 3 deletions b/‎cuda_bindings/tests/test_cuda.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎cuda_bindings/tests/test_nvvm.py‎
Lines changed: 17 additions & 45 deletions b/‎cuda_bindings/tests/test_nvvm.py‎
Lines changed: 17 additions & 45 deletions
diff --git a/‎cuda_bindings/tests/test_utils.py‎
Lines changed: 3 additions & 3 deletions b/‎cuda_bindings/tests/test_utils.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cuda_core/cuda/core/experimental/_event.pyx‎
Lines changed: 9 additions & 4 deletions b/‎cuda_core/cuda/core/experimental/_event.pyx‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎cuda_core/cuda/core/experimental/_memory.pyx‎
Lines changed: 12 additions & 6 deletions b/‎cuda_core/cuda/core/experimental/_memory.pyx‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎cuda_core/cuda/core/experimental/_memoryview.pyx‎
Lines changed: 17 additions & 0 deletions b/‎cuda_core/cuda/core/experimental/_memoryview.pyx‎
Lines changed: 17 additions & 0 deletions
@@ -19,10 +19,27 @@ jobs:
     permissions:
       security-events: write
     steps:
-      - name: Perform Bandit Analysis
-        # KEEP IN SYNC WITH bandit rev in .pre-commit-config.yaml
-        # Current runner uses Python 3.8, so the action installs bandit==1.7.10
-        # via `pip install bandit[sarif]`. If runner Python moves to >=3.9,
-        # the action will resolve to 1.8.x and you'll need to bump pre-commit.
-        # (Bandit >=1.8.0 dropped Python 3.8 via Requires-Python metadata.)
-        uses: PyCQA/bandit-action@8a1b30610f61f3f792fe7556e888c9d7dffa52de  # v1.0.0
+      - name: Checkout
+        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@b75a909f75acd358c2196fb9a5f1299a9a8868a4  # v6.7.0
+
+      - name: Get ignore codes
+        id: ignore-codes
+        # This are computed so that we can run only the `S` (bandit)
+        # checks. Passing --select to ruff overrides any config files
+        # (ruff.toml, pyproject.toml, etc), so to avoid having keep everything
+        # in sync we grab them from the TOML programmatically
+        run: |
+          set -euxo pipefail
+
+          echo "codes=$(uvx toml2json ./ruff.toml | jq -r '.lint.ignore | map(select(test("^S\\d+"))) | join(",")')" >> "$GITHUB_OUTPUT"
+      - name: Perform Bandit Analysis using Ruff
+        uses: astral-sh/ruff-action@57714a7c8a2e59f32539362ba31877a1957dded1  # v3.5.1
+        with:
+          args: "check --select S --ignore ${{ steps.ignore-codes.outputs.codes }} --output-format sarif --output-file results.sarif"
+      - name: Upload SARIF file
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: results.sarif
@@ -15,7 +15,7 @@ ci:
 # pre-commit autoupdate --freeze
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: 0b19ef1fd6ad680ed7752d6daba883ce1265a6de  # frozen: v0.12.2
+    rev: f298305809c552671cc47e0fec0ba43e96c146a2  # frozen: v0.13.2
     hooks:
       - id: ruff
         args: [--fix, --show-fixes]
@@ -40,7 +40,7 @@ repos:
 
   # Standard hooks
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: "v5.0.0"
+    rev: "3e8a8703264a2f4a69428a0aa4dcb512790b2c8c"  # frozen: v6.0.0
     hooks:
     - id: check-added-large-files
     - id: check-case-conflict
@@ -58,22 +58,14 @@ repos:
 
   # Checking for common mistakes
   - repo: https://github.com/pre-commit/pygrep-hooks
-    rev: "v1.10.0"
+    rev: "3a6eb0fadf60b3cccfd80bad9dbb6fae7e47b316"  # frozen: v1.10.0
     hooks:
     - id: rst-backticks
     - id: rst-directive-colons
     - id: rst-inline-touching-normal
 
-  - repo: https://github.com/PyCQA/bandit
-    rev: "36fd65054fc8864b4037d0918904f9331512feb5"  # frozen: 1.7.10 KEEP IN SYNC WITH .github/workflows/bandit.yml
-    hooks:
-      - id: bandit
-        args:
-          - --ini
-          - .bandit
-
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 0f86793af5ef5f6dc63c8d04a3cabfa3ea8f9c6a  # frozen: v1.16.1
+    rev: 9f70dc58c23dfcca1b97af99eaeee3140a807c7e  # frozen: v1.18.2
     hooks:
       - id: mypy
         name: mypy-pathfinder
 
@@ -1,6 +1,5 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
-
 from typing import Any, Callable
 
 from ._ptx_utils import get_minimal_required_cuda_ver_from_ptx_ver, get_ptx_ver
 
@@ -653,7 +653,7 @@ def test_get_error_name_and_string():
 @pytest.mark.skipif(not callableBinary("nvidia-smi"), reason="Binary existance needed")
 def test_device_get_name():
     # TODO: Refactor this test once we have nvml bindings to avoid the use of subprocess
-    import subprocess  # nosec B404
+    import subprocess
 
     (err,) = cuda.cuInit(0)
     assert err == cuda.CUresult.CUDA_SUCCESS
@@ -663,8 +663,10 @@ def test_device_get_name():
     assert err == cuda.CUresult.CUDA_SUCCESS
 
     p = subprocess.check_output(
-        ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], shell=False, stderr=subprocess.PIPE
-    )  # nosec B603, B607
+        ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],  # noqa: S607
+        shell=False,
+        stderr=subprocess.PIPE,
+    )
 
     delimiter = b"\r\n" if platform.system() == "Windows" else b"\n"
     expect = p.split(delimiter)
 
@@ -4,21 +4,12 @@
 
 import binascii
 import re
-import textwrap
 from contextlib import contextmanager
 
 import pytest
 from cuda.bindings import nvvm
 
-MINIMAL_NVVMIR_FIXTURE_PARAMS = ["txt", "bitcode_static"]
-try:
-    import llvmlite.binding as llvmlite_binding  # Optional test dependency.
-except ImportError:
-    llvmlite_binding = None
-else:
-    MINIMAL_NVVMIR_FIXTURE_PARAMS.append("bitcode_dynamic")
-
-MINIMAL_NVVMIR_TXT = b"""\
+MINIMAL_NVVMIR_TXT_TEMPLATE = b"""\
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
 target triple = "nvptx64-nvidia-cuda"
@@ -130,43 +121,24 @@
     "6e673e0000000000",
 }
 
-MINIMAL_NVVMIR_CACHE = {}
-
 
-@pytest.fixture(params=MINIMAL_NVVMIR_FIXTURE_PARAMS)
+@pytest.fixture(params=("txt", "bitcode_static"))
 def minimal_nvvmir(request):
-    for pass_counter in range(2):
-        nvvmir = MINIMAL_NVVMIR_CACHE.get(request.param, -1)
-        if nvvmir != -1:
-            if nvvmir is None:
-                pytest.skip(f"UNAVAILABLE: {request.param}")
-            return nvvmir
-        if pass_counter:
-            raise AssertionError("This code path is meant to be unreachable.")
-        # Build cache entries, then try again (above).
-        major, minor, debug_major, debug_minor = nvvm.ir_version()
-        txt = MINIMAL_NVVMIR_TXT % (major, debug_major)
-        if llvmlite_binding is None:
-            bitcode_dynamic = None
-        else:
-            bitcode_dynamic = llvmlite_binding.parse_assembly(txt.decode()).as_bitcode()
-        bitcode_static = MINIMAL_NVVMIR_BITCODE_STATIC.get((major, debug_major))
-        if bitcode_static is not None:
-            bitcode_static = binascii.unhexlify(bitcode_static)
-        MINIMAL_NVVMIR_CACHE["txt"] = txt
-        MINIMAL_NVVMIR_CACHE["bitcode_dynamic"] = bitcode_dynamic
-        MINIMAL_NVVMIR_CACHE["bitcode_static"] = bitcode_static
-        if bitcode_static is None:
-            if bitcode_dynamic is None:
-                raise RuntimeError("Please `pip install llvmlite` to generate `bitcode_static` (see PR #443)")
-            bitcode_hex = binascii.hexlify(bitcode_dynamic).decode("ascii")
-            print("\n\nMINIMAL_NVVMIR_BITCODE_STATIC = { # PLEASE ADD TO test_nvvm.py")
-            print(f"    ({major}, {debug_major}):  # (major, debug_major)")
-            lines = textwrap.wrap(bitcode_hex, width=80)
-            for line in lines[:-1]:
-                print(f'    "{line}"')
-            print(f'    "{lines[-1]}",')
-            print("}\n", flush=True)
+    major, minor, debug_major, debug_minor = nvvm.ir_version()
+
+    if request.param == "txt":
+        return MINIMAL_NVVMIR_TXT_TEMPLATE % (major, debug_major)
+
+    bitcode_static_binascii = MINIMAL_NVVMIR_BITCODE_STATIC.get((major, debug_major))
+    if bitcode_static_binascii:
+        return binascii.unhexlify(bitcode_static_binascii)
+    raise RuntimeError(
+        "Static bitcode for NVVM IR version "
+        f"{major}.{debug_major} is not available in this test.\n"
+        "Maintainers: Please run the helper script to generate it and add the "
+        "output to the MINIMAL_NVVMIR_BITCODE_STATIC dict:\n"
+        "  ../../toolshed/build_static_bitcode_input.py"
+    )
 
 
 @pytest.fixture(params=[nvvm.compile_program, nvvm.verify_program])
 
@@ -3,7 +3,7 @@
 
 import platform
 import random
-import subprocess  # nosec B404
+import subprocess
 import sys
 from pathlib import Path
 
@@ -72,7 +72,7 @@ def test_ptx_utils(kernel, actual_ptx_ver, min_cuda_ver):
     ),
 )
 def test_get_handle(target):
-    ptr = random.randint(1, 1024)
+    ptr = random.randint(1, 1024)  # noqa: S311
     obj = target(ptr)
     handle = get_cuda_native_handle(obj)
     assert handle == ptr
@@ -105,6 +105,6 @@ def test_get_handle_error(target):
     ],
 )
 def test_cyclical_imports(module):
-    subprocess.check_call(  # nosec B603
+    subprocess.check_call(  # noqa: S603
         [sys.executable, Path(__file__).parent / "utils" / "check_cyclical_import.py", f"cuda.bindings.{module}"],
     )
@@ -18,7 +18,7 @@ from cuda.core.experimental._utils.cuda_utils import (
     driver,
     handle_return,
 )
-
+import sys
 if TYPE_CHECKING:
     import cuda.bindings
     from cuda.core.experimental._device import Device
@@ -108,15 +108,20 @@ cdef class Event:
         self._ctx_handle = ctx_handle
         return self
 
-    cpdef close(self):
-        """Destroy the event."""
+    cdef _shutdown_safe_close(self, is_shutting_down=sys.is_finalizing):
+        if is_shutting_down and is_shutting_down():
+            return
         if self._handle is not None:
             err, = driver.cuEventDestroy(self._handle)
             self._handle = None
             raise_if_driver_error(err)
 
+    cpdef close(self):
+        """Destroy the event."""
+        self._shutdown_safe_close(is_shutting_down=None)
+
     def __del__(self):
-        self.close()
+        self._shutdown_safe_close()
 
     def __isub__(self, other):
         return NotImplemented
 
@@ -9,6 +9,7 @@ from cuda.core.experimental._utils.cuda_utils cimport (
     _check_driver_error as raise_if_driver_error,
     check_or_create_options,
 )
+import sys
 
 from dataclasses import dataclass
 from typing import Optional, TypeVar, Union, TYPE_CHECKING
@@ -72,7 +73,16 @@ cdef class Buffer:
         return self
 
     def __del__(self):
-        self.close()
+        self._shutdown_safe_close()
+
+    cdef _shutdown_safe_close(self, stream: Stream = None, is_shutting_down=sys.is_finalizing):
+        if is_shutting_down and is_shutting_down():
+            return
+        if self._ptr and self._mr is not None:
+            self._mr.deallocate(self._ptr, self._size, stream)
+            self._ptr = 0
+            self._mr = None
+            self._ptr_obj = None
 
     def __reduce__(self):
         return Buffer.from_ipc_descriptor, (self.memory_resource, self.get_ipc_descriptor())
@@ -89,11 +99,7 @@ cdef class Buffer:
             The stream object to use for asynchronous deallocation. If None,
             the behavior depends on the underlying memory resource.
         """
-        if self._ptr and self._mr is not None:
-            self._mr.deallocate(self._ptr, self._size, stream)
-            self._ptr = 0
-            self._mr = None
-            self._ptr_obj = None
+        self._shutdown_safe_close(stream, is_shutting_down=None)
 
     @property
     def handle(self) -> DevicePointerT:
 
@@ -105,6 +105,23 @@ cdef class StridedMemoryView:
         else:
             pass
 
+    def __dealloc__(self):
+        if self.dl_tensor == NULL:
+            return
+
+        if cpython.PyCapsule_IsValid(
+                self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME):
+            data = cpython.PyCapsule_GetPointer(
+                self.metadata, DLPACK_VERSIONED_TENSOR_USED_NAME)
+            dlm_tensor_ver = <DLManagedTensorVersioned*>data
+            dlm_tensor_ver.deleter(dlm_tensor_ver)
+        elif cpython.PyCapsule_IsValid(
+                self.metadata, DLPACK_TENSOR_USED_NAME):
+            data = cpython.PyCapsule_GetPointer(
+                self.metadata, DLPACK_TENSOR_USED_NAME)
+            dlm_tensor = <DLManagedTensor*>data
+            dlm_tensor.deleter(dlm_tensor)
+
     @property
     def shape(self) -> tuple[int]:
         if self._shape is None and self.exporting_obj is not None: