Skip to content

Commit 51e90e0

Browse files
authored
Merge branch 'main' into ipc_events2
2 parents dfc96c3 + ef4a9cc commit 51e90e0

File tree

15 files changed

+132
-109
lines changed

15 files changed

+132
-109
lines changed

.github/workflows/build-wheel.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ jobs:
3333
- "3.11"
3434
- "3.12"
3535
- "3.13"
36-
- "3.13t"
3736
- "3.14"
3837
- "3.14t"
3938
name: py${{ matrix.python-version }}
@@ -287,9 +286,7 @@ jobs:
287286
cuda-path: "./cuda_toolkit_prev"
288287

289288
- name: Download cuda.bindings build artifacts from the prior branch
290-
if: ${{ matrix.python-version == '3.13t'
291-
|| matrix.python-version == '3.14'
292-
|| matrix.python-version == '3.14t' }}
289+
if: startsWith(matrix.python-version, '3.14')
293290
env:
294291
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
295292
run: |

.github/workflows/ci.yml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,28 @@ jobs:
202202
- doc
203203
steps:
204204
- name: Exit
205-
run: exit 0
205+
run: |
206+
# if any dependencies were cancelled, that's a failure
207+
#
208+
# see https://docs.github.com/en/actions/reference/workflows-and-actions/expressions#always
209+
# and https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/troubleshooting-required-status-checks#handling-skipped-but-required-checks
210+
# for why this cannot be encoded in the job-level `if:` field
211+
#
212+
# TL; DR: `$REASONS`
213+
#
214+
# The intersection of skipped-as-success and required status checks
215+
# creates a scenario where if you DON'T `always()` run this job, the
216+
# status check UI will block merging and if you DO `always()` run and
217+
# a dependency is _cancelled_ (due to a critical failure, which is
218+
# somehow not considered a failure ¯\_(ツ)_/¯) then the critically
219+
# failing job(s) will timeout causing a cancellation here and the
220+
# build to succeed which we don't want (originally this was just
221+
# 'exit 0')
222+
if ${{ needs.test-linux-64.result == 'cancelled' ||
223+
needs.test-linux-aarch64.result == 'cancelled' ||
224+
needs.test-windows.result == 'cancelled' ||
225+
needs.doc.result == 'cancelled' }}; then
226+
exit 1
227+
else
228+
exit 0
229+
fi

ci/test-matrix.json

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
{ "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
1515
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
1616
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
17-
{ "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
1817
{ "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
1918
{ "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
2019
{ "ARCH": "arm64", "PY_VER": "3.9", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
@@ -27,7 +26,6 @@
2726
{ "ARCH": "arm64", "PY_VER": "3.12", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
2827
{ "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "a100", "DRIVER": "latest" },
2928
{ "ARCH": "arm64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
30-
{ "ARCH": "arm64", "PY_VER": "3.13t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
3129
{ "ARCH": "arm64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" },
3230
{ "ARCH": "arm64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "a100", "DRIVER": "latest" }
3331
],
@@ -95,8 +93,6 @@
9593
{ "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" },
9694
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
9795
{ "ARCH": "amd64", "PY_VER": "3.13", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
98-
{ "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
99-
{ "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
10096
{ "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
10197
{ "ARCH": "amd64", "PY_VER": "3.14", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
10298
{ "ARCH": "amd64", "PY_VER": "3.14t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
@@ -106,9 +102,7 @@
106102
{ "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "0", "GPU": "l4", "DRIVER": "latest" },
107103
{ "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "11.8.0", "LOCAL_CTK": "1", "GPU": "t4", "DRIVER": "latest" },
108104
{ "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
109-
{ "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" },
110-
{ "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "0", "GPU": "t4", "DRIVER": "latest" },
111-
{ "ARCH": "amd64", "PY_VER": "3.13t", "CUDA_VER": "13.0.2", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }
105+
{ "ARCH": "amd64", "PY_VER": "3.12", "CUDA_VER": "12.9.1", "LOCAL_CTK": "1", "GPU": "l4", "DRIVER": "latest" }
112106
]
113107
}
114108
}

cuda_bindings/docs/source/conf.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,11 @@
99

1010
# -- Path setup --------------------------------------------------------------
1111

12-
# If extensions (or modules to document with autodoc) are in another directory,
13-
# add these directories to sys.path here. If the directory is relative to the
14-
# documentation root, use os.path.abspath to make it absolute, like shown here.
1512
import os
13+
import sys
14+
from pathlib import Path
1615

17-
# import sys
18-
# sys.path.insert(0, os.path.abspath('.'))
16+
sys.path.insert(0, str((Path(__file__).parents[3] / "cuda_python" / "docs" / "exts").absolute()))
1917

2018

2119
# -- Project information -----------------------------------------------------
@@ -41,6 +39,7 @@
4139
"myst_nb",
4240
"enum_tools.autoenum",
4341
"sphinx_copybutton",
42+
"release_toc",
4443
]
4544

4645
nb_execution_mode = "off"

cuda_bindings/docs/source/release.rst

Lines changed: 2 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,6 @@ Release Notes
66

77
.. toctree::
88
:maxdepth: 3
9+
:glob:
910

10-
13.0.3 <release/13.0.3-notes.rst>
11-
13.0.2 <release/13.0.2-notes.rst>
12-
13.0.1 <release/13.0.1-notes.rst>
13-
13.0.0 <release/13.0.0-notes.rst>
14-
12.9.4 <release/12.9.4-notes.rst>
15-
12.9.3 <release/12.9.3-notes.rst>
16-
12.9.2 <release/12.9.2-notes.rst>
17-
12.9.1 <release/12.9.1-notes.rst>
18-
12.9.0 <release/12.9.0-notes.rst>
19-
12.8.0 <release/12.8.0-notes.rst>
20-
12.6.2 <release/12.6.2-notes.rst>
21-
12.6.1 <release/12.6.1-notes.rst>
22-
12.6.0 <release/12.6.0-notes.rst>
23-
12.5.0 <release/12.5.0-notes.rst>
24-
12.4.0 <release/12.4.0-notes.rst>
25-
12.3.0 <release/12.3.0-notes.rst>
26-
12.2.1 <release/12.2.1-notes.rst>
27-
12.2.0 <release/12.2.0-notes.rst>
28-
12.1.0 <release/12.1.0-notes.rst>
29-
12.0.0 <release/12.0.0-notes.rst>
30-
11.8.7 <release/11.8.7-notes.rst>
31-
11.8.6 <release/11.8.6-notes.rst>
32-
11.8.5 <release/11.8.5-notes.rst>
33-
11.8.4 <release/11.8.4-notes.rst>
34-
11.8.3 <release/11.8.3-notes.rst>
35-
11.8.2 <release/11.8.2-notes.rst>
36-
11.8.1 <release/11.8.1-notes.rst>
37-
11.8.0 <release/11.8.0-notes.rst>
38-
11.7.1 <release/11.7.1-notes.rst>
39-
11.7.0 <release/11.7.0-notes.rst>
40-
11.6.1 <release/11.6.1-notes.rst>
41-
11.6.0 <release/11.6.0-notes.rst>
42-
11.5.0 <release/11.5.0-notes.rst>
43-
11.4.0 <release/11.4.0-notes.rst>
11+
release/*[0-9]-notes

cuda_core/cuda/core/experimental/_memory.pyx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1132,7 +1132,7 @@ class VirtualMemoryResourceOptions:
11321132
location_type: VirtualMemoryLocationTypeT = "device"
11331133
handle_type: VirtualMemoryHandleTypeT = "posix_fd"
11341134
granularity: VirtualMemoryGranularityT = "recommended"
1135-
gpu_direct_rdma: bool = True
1135+
gpu_direct_rdma: bool = False
11361136
addr_hint: Optional[int] = 0
11371137
addr_align: Optional[int] = None
11381138
peers: Iterable[int] = field(default_factory=tuple)
@@ -1211,6 +1211,11 @@ class VirtualMemoryResource(MemoryResource):
12111211
if platform.system() == "Windows":
12121212
raise NotImplementedError("VirtualMemoryResource is not supported on Windows")
12131213

1214+
# Validate RDMA support if requested
1215+
if self.config.gpu_direct_rdma and self.device is not None:
1216+
if not self.device.properties.gpu_direct_rdma_supported:
1217+
raise RuntimeError("GPU Direct RDMA is not supported on this device")
1218+
12141219
@staticmethod
12151220
def _align_up(size: int, gran: int) -> int:
12161221
"""

cuda_core/docs/source/conf.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,11 @@
99

1010
# -- Path setup --------------------------------------------------------------
1111

12-
# If extensions (or modules to document with autodoc) are in another directory,
13-
# add these directories to sys.path here. If the directory is relative to the
14-
# documentation root, use os.path.abspath to make it absolute, like shown here.
1512
import os
13+
import sys
14+
from pathlib import Path
1615

17-
# sys.path.insert(0, os.path.abspath('.'))
16+
sys.path.insert(0, str((Path(__file__).parents[3] / "cuda_python" / "docs" / "exts").absolute()))
1817

1918

2019
# -- Project information -----------------------------------------------------
@@ -41,6 +40,7 @@
4140
"enum_tools.autoenum",
4241
"sphinx_copybutton",
4342
"sphinx_toolbox.more_autodoc.autoprotocol",
43+
"release_toc",
4444
]
4545

4646
# Add any paths that contain templates here, relative to this directory.

cuda_core/docs/source/release.rst

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,6 @@ Release Notes
66

77
.. toctree::
88
:maxdepth: 3
9+
:glob:
910

10-
0.4.0 <release/0.4.0-notes>
11-
0.3.2 <release/0.3.2-notes>
12-
0.3.1 <release/0.3.1-notes>
13-
0.3.0 <release/0.3.0-notes>
14-
0.2.0 <release/0.2.0-notes>
15-
0.1.1 <release/0.1.1-notes>
16-
0.1.0 <release/0.1.0-notes>
11+
release/*[0-9]-notes

cuda_core/examples/thread_block_cluster.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
launch,
2323
)
2424

25+
if np.lib.NumpyVersion(np.__version__) < "2.2.5":
26+
print("This example requires NumPy 2.2.5 or later", file=sys.stderr)
27+
sys.exit(0)
28+
2529
# prepare include
2630
cuda_path = os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME"))
2731
if cuda_path is None:

cuda_core/tests/test_memory.py

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from cuda.core.experimental.utils import StridedMemoryView
3030
from helpers.buffers import DummyUnifiedMemoryResource
3131

32-
from cuda_python_test_helpers import IS_WSL, supports_ipc_mempool
32+
from cuda_python_test_helpers import supports_ipc_mempool
3333

3434
POOL_SIZE = 2097152 # 2MB size
3535

@@ -299,13 +299,13 @@ def test_vmm_allocator_basic_allocation():
299299
This test verifies that VirtualMemoryResource can allocate memory
300300
using CUDA VMM APIs with default configuration.
301301
"""
302-
if platform.system() == "Windows":
303-
pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
304-
if IS_WSL:
305-
pytest.skip("VirtualMemoryResource is not supported on WSL")
306-
307302
device = Device()
308303
device.set_current()
304+
305+
# Skip if virtual memory management is not supported
306+
if not device.properties.virtual_memory_management_supported:
307+
pytest.skip("Virtual memory management is not supported on this device")
308+
309309
options = VirtualMemoryResourceOptions()
310310
# Create VMM allocator with default config
311311
vmm_mr = VirtualMemoryResource(device, config=options)
@@ -338,13 +338,17 @@ def test_vmm_allocator_policy_configuration():
338338
with different allocation policies and that the configuration affects
339339
the allocation behavior.
340340
"""
341-
if platform.system() == "Windows":
342-
pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
343-
if IS_WSL:
344-
pytest.skip("VirtualMemoryResource is not supported on WSL")
345341
device = Device()
346342
device.set_current()
347343

344+
# Skip if virtual memory management is not supported
345+
if not device.properties.virtual_memory_management_supported:
346+
pytest.skip("Virtual memory management is not supported on this device")
347+
348+
# Skip if GPU Direct RDMA is supported (we want to test the unsupported case)
349+
if not device.properties.gpu_direct_rdma_supported:
350+
pytest.skip("This test requires a device that doesn't support GPU Direct RDMA")
351+
348352
# Test with custom VMM config
349353
custom_config = VirtualMemoryResourceOptions(
350354
allocation_type="pinned",
@@ -397,13 +401,13 @@ def test_vmm_allocator_grow_allocation():
397401
This test verifies that VirtualMemoryResource can grow existing
398402
allocations while preserving the base pointer when possible.
399403
"""
400-
if platform.system() == "Windows":
401-
pytest.skip("VirtualMemoryResource is not supported on Windows TCC")
402-
if IS_WSL:
403-
pytest.skip("VirtualMemoryResource is not supported on WSL")
404404
device = Device()
405405
device.set_current()
406406

407+
# Skip if virtual memory management is not supported (we need it for VMM)
408+
if not device.properties.virtual_memory_management_supported:
409+
pytest.skip("Virtual memory management is not supported on this device")
410+
407411
options = VirtualMemoryResourceOptions()
408412

409413
vmm_mr = VirtualMemoryResource(device, config=options)
@@ -435,6 +439,29 @@ def test_vmm_allocator_grow_allocation():
435439
grown_buffer.close()
436440

437441

442+
def test_vmm_allocator_rdma_unsupported_exception():
443+
"""Test that VirtualMemoryResource throws an exception when RDMA is requested but device doesn't support it.
444+
445+
This test verifies that the VirtualMemoryResource constructor throws a RuntimeError
446+
when gpu_direct_rdma=True is requested but the device doesn't support virtual memory management.
447+
"""
448+
device = Device()
449+
device.set_current()
450+
451+
# Skip if virtual memory management is not supported (we need it for VMM)
452+
if not device.properties.virtual_memory_management_supported:
453+
pytest.skip("Virtual memory management is not supported on this device")
454+
455+
# Skip if GPU Direct RDMA is supported (we want to test the unsupported case)
456+
if device.properties.gpu_direct_rdma_supported:
457+
pytest.skip("This test requires a device that doesn't support GPU Direct RDMA")
458+
459+
# Test that requesting RDMA on an unsupported device throws an exception
460+
options = VirtualMemoryResourceOptions(gpu_direct_rdma=True)
461+
with pytest.raises(RuntimeError, match="GPU Direct RDMA is not supported on this device"):
462+
VirtualMemoryResource(device, config=options)
463+
464+
438465
def test_mempool(mempool_device):
439466
device = mempool_device
440467

0 commit comments

Comments
 (0)