Skip to content

Commit d316f8f

Browse files
authored
Merge branch 'main' into fix-docs-build-on-tag-trigger
2 parents e016d1c + e44a83c commit d316f8f

File tree

9 files changed

+40
-17
lines changed

9 files changed

+40
-17
lines changed

cuda_bindings/cuda/bindings/runtime.pyx.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21465,7 +21465,7 @@ def cudaDeviceRegisterAsyncNotification(int device, callbackFunc, userData):
2146521465

2146621466
if err != cyruntime.cudaSuccess:
2146721467
return (_cudaError_t(err), None)
21468-
return (_cudaError_t(err), callback)
21468+
return (_cudaError_t_SUCCESS, callback)
2146921469
{{endif}}
2147021470

2147121471
{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}

cuda_core/cuda/core/_context.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ cdef class Context:
3939
return None
4040
return as_py(self._h_context)
4141

42+
@property
43+
def _handle(self):
44+
return self.handle
45+
4246
def __eq__(self, other):
4347
if not isinstance(other, Context):
4448
return NotImplemented

cuda_core/cuda/core/_cpp/resource_handles.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -750,7 +750,8 @@ LibraryHandle create_library_handle_from_file(const char* path) {
750750
new LibraryBox{library},
751751
[](const LibraryBox* b) {
752752
GILReleaseGuard gil;
753-
p_cuLibraryUnload(b->resource);
753+
// TODO: re-enable once LibraryBox tracks its owning context
754+
// p_cuLibraryUnload(b->resource);
754755
delete b;
755756
}
756757
);
@@ -768,7 +769,8 @@ LibraryHandle create_library_handle_from_data(const void* data) {
768769
new LibraryBox{library},
769770
[](const LibraryBox* b) {
770771
GILReleaseGuard gil;
771-
p_cuLibraryUnload(b->resource);
772+
// TODO: re-enable once LibraryBox tracks its owning context
773+
// p_cuLibraryUnload(b->resource);
772774
delete b;
773775
}
774776
);

cuda_core/cuda/core/_event.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,9 @@ cdef class IPCEventDescriptor:
295295
def __init__(self, *arg, **kwargs):
296296
raise RuntimeError("IPCEventDescriptor objects cannot be instantiated directly. Please use Event APIs.")
297297

298-
@classmethod
299-
def _init(cls, reserved: bytes, busy_waited: cython.bint):
300-
cdef IPCEventDescriptor self = IPCEventDescriptor.__new__(cls)
298+
@staticmethod
299+
def _init(reserved: bytes, busy_waited: cython.bint):
300+
cdef IPCEventDescriptor self = IPCEventDescriptor.__new__(IPCEventDescriptor)
301301
self._reserved = reserved
302302
self._busy_waited = busy_waited
303303
return self
@@ -307,7 +307,7 @@ cdef class IPCEventDescriptor:
307307
return self._reserved == rhs._reserved
308308

309309
def __reduce__(self):
310-
return self._init, (self._reserved, self._busy_waited)
310+
return IPCEventDescriptor._init, (self._reserved, self._busy_waited)
311311

312312

313313
def _reduce_event(event):

cuda_core/cuda/core/_memory/_buffer.pyx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,13 @@ cdef class Buffer:
123123
self._mem_attrs_inited = False
124124
return self
125125

126+
@staticmethod
127+
def _reduce_helper(mr, ipc_descriptor):
128+
return Buffer.from_ipc_descriptor(mr, ipc_descriptor)
129+
126130
def __reduce__(self):
127131
# Must not serialize the parent's stream!
128-
return Buffer.from_ipc_descriptor, (self.memory_resource, self.get_ipc_descriptor())
132+
return Buffer._reduce_helper, (self.memory_resource, self.get_ipc_descriptor())
129133

130134
@staticmethod
131135
def from_handle(

cuda_core/cuda/core/_memory/_ipc.pyx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,15 +81,15 @@ cdef class IPCBufferDescriptor:
8181
def __init__(self, *arg, **kwargs):
8282
raise RuntimeError("IPCBufferDescriptor objects cannot be instantiated directly. Please use MemoryResource APIs.")
8383

84-
@classmethod
85-
def _init(cls, reserved: bytes, size: int):
86-
cdef IPCBufferDescriptor self = IPCBufferDescriptor.__new__(cls)
84+
@staticmethod
85+
def _init(reserved: bytes, size: int):
86+
cdef IPCBufferDescriptor self = IPCBufferDescriptor.__new__(IPCBufferDescriptor)
8787
self._payload = reserved
8888
self._size = size
8989
return self
9090

9191
def __reduce__(self):
92-
return self._init, (self._payload, self._size)
92+
return IPCBufferDescriptor._init, (self._payload, self._size)
9393

9494
@property
9595
def size(self):

cuda_core/cuda/core/_module.pyx

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,10 @@ cdef class Kernel:
561561
"""
562562
return as_py(self._h_kernel)
563563

564+
@property
565+
def _handle(self):
566+
return self.handle
567+
564568
@staticmethod
565569
def from_handle(handle, mod: ObjectCode = None) -> Kernel:
566570
"""Creates a new :obj:`Kernel` object from a foreign kernel handle.
@@ -657,10 +661,9 @@ cdef class ObjectCode:
657661

658662
return self
659663

660-
@classmethod
661-
def _reduce_helper(cls, module, code_type, name, symbol_mapping):
662-
# just for forwarding kwargs
663-
return cls._init(module, code_type, name=name if name else "", symbol_mapping=symbol_mapping)
664+
@staticmethod
665+
def _reduce_helper(module, code_type, name, symbol_mapping):
666+
return ObjectCode._init(module, code_type, name=name if name else "", symbol_mapping=symbol_mapping)
664667

665668
def __reduce__(self):
666669
return ObjectCode._reduce_helper, (self._module, self._code_type, self._name, self._sym_map)

cuda_core/docs/nv-versions.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
"version": "latest",
44
"url": "https://nvidia.github.io/cuda-python/cuda-core/latest/"
55
},
6+
{
7+
"version": "0.6.0",
8+
"url": "https://nvidia.github.io/cuda-python/cuda-core/0.6.0/"
9+
},
610
{
711
"version": "0.5.1",
812
"url": "https://nvidia.github.io/cuda-python/cuda-core/0.5.1/"

cuda_core/tests/test_utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818
except ImportError:
1919
torch = None
2020
import cuda.core
21-
import ml_dtypes
21+
22+
try:
23+
import ml_dtypes
24+
except ImportError:
25+
ml_dtypes = None
2226
import numpy as np
2327
import pytest
2428
from cuda.core import Device
@@ -545,6 +549,7 @@ def test_from_array_interface_unsupported_strides(init_cuda):
545549
param((slice(None, None, 2), slice(1, None, 2)), id="strided"),
546550
],
547551
)
552+
@pytest.mark.skipif(ml_dtypes is None, reason="ml_dtypes is not installed")
548553
@pytest.mark.skipif(cp is None, reason="CuPy is not installed")
549554
@pytest.mark.skipif(cp is not None and _get_cupy_version_major() < 14, reason="CuPy version is less than 14.0.0")
550555
def test_ml_dtypes_bfloat16_dlpack(init_cuda, slices):
@@ -575,6 +580,7 @@ def test_ml_dtypes_bfloat16_dlpack(init_cuda, slices):
575580
param((slice(None, None, 2), slice(1, None, 2)), id="strided"),
576581
],
577582
)
583+
@pytest.mark.skipif(ml_dtypes is None, reason="ml_dtypes is not installed")
578584
@pytest.mark.skipif(torch is None, reason="PyTorch is not installed")
579585
def test_ml_dtypes_bfloat16_torch_dlpack(init_cuda, slices):
580586
a = torch.tensor([1, 2, 3, 4, 5, 6], dtype=torch.bfloat16, device="cuda").reshape(2, 3)[slices]

0 commit comments

Comments
 (0)