Skip to content

Commit 467f108

Browse files
authored
Merge branch 'main' into faster-conversion
2 parents 0baf1fa + 73e2a79 commit 467f108

File tree

9 files changed

+204
-10
lines changed

9 files changed

+204
-10
lines changed

ci/tools/run-tests

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ elif [[ "${test_module}" == "bindings" ]]; then
4747
pip install $(ls "${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl)[all] --group test
4848
fi
4949
echo "Running bindings tests"
50-
${SANITIZER_CMD} pytest -rxXs -v --durations=0 tests/
50+
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/
5151
if [[ "${SKIP_CYTHON_TEST}" == 0 ]]; then
52-
${SANITIZER_CMD} pytest -rxXs -v --durations=0 tests/cython
52+
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/cython
5353
fi
5454
popd
5555
elif [[ "${test_module}" == "core" ]]; then
@@ -80,11 +80,11 @@ elif [[ "${test_module}" == "core" ]]; then
8080
pip install $(ls "${CUDA_CORE_ARTIFACTS_DIR}"/*.whl)["cu${TEST_CUDA_MAJOR}"] --group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}"
8181
fi
8282
echo "Running core tests"
83-
${SANITIZER_CMD} pytest -rxXs -v --durations=0 tests/
83+
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/
8484
# Currently our CI always installs the latest bindings (from either major version).
8585
# This is not compatible with the test requirements.
8686
if [[ "${SKIP_CYTHON_TEST}" == 0 ]]; then
87-
${SANITIZER_CMD} pytest -rxXs -v --durations=0 tests/cython
87+
${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/cython
8888
fi
8989
popd
9090
fi

cuda_bindings/cuda/bindings/driver.pyx.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7952,7 +7952,7 @@ cdef class CUgraphConditionalHandle:
79527952

79537953
{{if 'CUlaunchAttributeID_enum' in found_types}}
79547954

7955-
class CUlaunchAttributeID(_FastEnum):
7955+
class CUkernelNodeAttrID(_FastEnum):
79567956
"""
79577957
Launch attributes enum; used as id field of
79587958
:py:obj:`~.CUlaunchAttribute`
@@ -8184,7 +8184,7 @@ class CUlaunchAttributeID(_FastEnum):
81848184
{{endif}}
81858185
{{if 'CUlaunchAttributeID_enum' in found_types}}
81868186

8187-
class CUlaunchAttributeID(_FastEnum):
8187+
class CUstreamAttrID(_FastEnum):
81888188
"""
81898189
Launch attributes enum; used as id field of
81908190
:py:obj:`~.CUlaunchAttribute`

cuda_bindings/cuda/bindings/runtime.pyx.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6260,7 +6260,7 @@ class cudaGLMapFlags(_FastEnum):
62606260
{{endif}}
62616261
{{if 'cudaLaunchAttributeID' in found_types}}
62626262

6263-
class cudaLaunchAttributeID(_FastEnum):
6263+
class cudaStreamAttrID(_FastEnum):
62646264
"""
62656265
Launch attributes enum; used as id field of
62666266
:py:obj:`~.cudaLaunchAttribute`
@@ -6491,7 +6491,7 @@ class cudaLaunchAttributeID(_FastEnum):
64916491
{{endif}}
64926492
{{if 'cudaLaunchAttributeID' in found_types}}
64936493

6494-
class cudaLaunchAttributeID(_FastEnum):
6494+
class cudaKernelNodeAttrID(_FastEnum):
64956495
"""
64966496
Launch attributes enum; used as id field of
64976497
:py:obj:`~.cudaLaunchAttribute`

cuda_core/cuda/core/_memory/_managed_memory_resource.pyx

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

55
from __future__ import annotations
66

77
from cuda.bindings cimport cydriver
8+
89
from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions
910
from cuda.core._utils.cuda_utils cimport (
11+
HANDLE_RETURN,
1012
check_or_create_options,
1113
)
1214

1315
from dataclasses import dataclass
16+
import threading
17+
import warnings
1418

1519
__all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions']
1620

@@ -91,6 +95,7 @@ cdef class ManagedMemoryResource(_MemPool):
9195
opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED
9296

9397
super().__init__(device_id, opts_base)
98+
_check_concurrent_managed_access()
9499
ELSE:
95100
raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later")
96101

@@ -103,3 +108,47 @@ cdef class ManagedMemoryResource(_MemPool):
103108
def is_host_accessible(self) -> bool:
104109
"""Return True. This memory resource provides host-accessible buffers."""
105110
return True
111+
112+
113+
cdef bint _concurrent_access_warned = False
114+
cdef object _concurrent_access_lock = threading.Lock()
115+
116+
117+
cdef inline _check_concurrent_managed_access():
118+
"""Warn once if the platform lacks concurrent managed memory access."""
119+
global _concurrent_access_warned
120+
if _concurrent_access_warned:
121+
return
122+
123+
cdef int c_concurrent = 0
124+
with _concurrent_access_lock:
125+
if _concurrent_access_warned:
126+
return
127+
128+
# concurrent_managed_access is a system-level attribute for sm_60 and
129+
# later, so any device will do.
130+
with nogil:
131+
HANDLE_RETURN(cydriver.cuDeviceGetAttribute(
132+
&c_concurrent,
133+
cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS,
134+
0))
135+
if not c_concurrent:
136+
warnings.warn(
137+
"This platform does not support concurrent managed memory access "
138+
"(Device.properties.concurrent_managed_access is False). Host access to any managed "
139+
"allocation is forbidden while any GPU kernel is in flight, even "
140+
"if the kernel does not touch that allocation. Failing to "
141+
"synchronize before host access will cause a segfault. "
142+
"See: https://docs.nvidia.com/cuda/cuda-c-programming-guide/"
143+
"index.html#gpu-exclusive-access-to-managed-memory",
144+
UserWarning,
145+
stacklevel=3
146+
)
147+
148+
_concurrent_access_warned = True
149+
150+
151+
def reset_concurrent_access_warning():
152+
"""Reset the concurrent access warning flag for testing purposes."""
153+
global _concurrent_access_warned
154+
_concurrent_access_warned = False

cuda_core/tests/test_build_hooks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@
2424

2525
import pytest
2626

27-
# build_hooks.py imports Cython at the top level, so skip if not available
27+
# build_hooks.py imports Cython and setuptools at the top level, so skip if not available
2828
pytest.importorskip("Cython")
29+
pytest.importorskip("setuptools")
2930

3031

3132
def _load_build_hooks():
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
"""
5+
Test that a warning is emitted when ManagedMemoryResource is created on a
6+
platform without concurrent managed memory access.
7+
8+
These tests only run on affected platforms (concurrent_managed_access is False).
9+
"""
10+
11+
import warnings
12+
13+
import cuda.bindings
14+
import pytest
15+
from cuda.core import Device, ManagedMemoryResource, ManagedMemoryResourceOptions
16+
from cuda.core._memory._managed_memory_resource import reset_concurrent_access_warning
17+
18+
_cuda_major = int(cuda.bindings.__version__.split(".")[0])
19+
20+
requires_cuda_13 = pytest.mark.skipif(
21+
_cuda_major < 13,
22+
reason="ManagedMemoryResource requires CUDA 13.0 or later",
23+
)
24+
25+
26+
def _make_managed_mr(device_id):
27+
"""Create a ManagedMemoryResource with an explicit device preference."""
28+
return ManagedMemoryResource(options=ManagedMemoryResourceOptions(preferred_location=device_id))
29+
30+
31+
@pytest.fixture
32+
def device_without_concurrent_managed_access(init_cuda):
33+
"""Return a device that lacks concurrent managed access, or skip."""
34+
device = Device()
35+
device.set_current()
36+
37+
if not device.properties.memory_pools_supported:
38+
pytest.skip("Device does not support memory pools")
39+
40+
if device.properties.concurrent_managed_access:
41+
pytest.skip("Device supports concurrent managed access; warning not applicable")
42+
43+
return device
44+
45+
46+
@requires_cuda_13
47+
def test_warning_emitted(device_without_concurrent_managed_access):
48+
"""ManagedMemoryResource emits a warning when concurrent managed access is unsupported."""
49+
dev_id = device_without_concurrent_managed_access.device_id
50+
reset_concurrent_access_warning()
51+
52+
with warnings.catch_warnings(record=True) as w:
53+
warnings.simplefilter("always")
54+
mr = _make_managed_mr(dev_id)
55+
56+
concurrent_warnings = [
57+
warning for warning in w if "concurrent managed memory access" in str(warning.message).lower()
58+
]
59+
assert len(concurrent_warnings) == 1
60+
assert concurrent_warnings[0].category is UserWarning
61+
assert "segfault" in str(concurrent_warnings[0].message).lower()
62+
63+
mr.close()
64+
65+
66+
@requires_cuda_13
67+
def test_warning_emitted_only_once(device_without_concurrent_managed_access):
68+
"""Warning fires only once even when multiple ManagedMemoryResources are created."""
69+
dev_id = device_without_concurrent_managed_access.device_id
70+
reset_concurrent_access_warning()
71+
72+
with warnings.catch_warnings(record=True) as w:
73+
warnings.simplefilter("always")
74+
mr1 = _make_managed_mr(dev_id)
75+
mr2 = _make_managed_mr(dev_id)
76+
77+
concurrent_warnings = [
78+
warning for warning in w if "concurrent managed memory access" in str(warning.message).lower()
79+
]
80+
assert len(concurrent_warnings) == 1
81+
82+
mr1.close()
83+
mr2.close()

0 commit comments

Comments
 (0)