Skip to content

Commit 3fb5e20

Browse files
[doc-only] Address review feedback on v0.6.0 release notes
Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 0382a28 commit 3fb5e20

File tree

4 files changed

+53
-27
lines changed

4 files changed

+53
-27
lines changed

cuda_core/cuda/core/_memoryview.pyx

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,33 +152,48 @@ cdef class StridedMemoryView:
152152

153153
@classmethod
154154
def from_dlpack(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
155+
"""Create a view from an object supporting the DLPack_ protocol.
156+
157+
Parameters
158+
----------
159+
obj : object
160+
An object implementing the DLPack_ protocol (via ``__dlpack__``).
161+
stream_ptr : int, optional
162+
Stream pointer for synchronization. If ``None``, no synchronization is performed.
163+
164+
.. _DLPack: https://dmlc.github.io/dlpack/latest/
165+
"""
155166
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
156167
view_as_dlpack(obj, stream_ptr, buf)
157168
return buf
158169

159170
@classmethod
160171
def from_cuda_array_interface(cls, obj: object, stream_ptr: int | None=None) -> StridedMemoryView:
161-
"""Create a view from an object supporting the ``__cuda_array_interface__`` protocol.
172+
"""Create a view from an object supporting the `__cuda_array_interface__`_ protocol.
162173

163174
Parameters
164175
----------
165176
obj : object
166-
An object implementing the ``__cuda_array_interface__`` protocol.
177+
An object implementing the `__cuda_array_interface__`_ protocol.
167178
stream_ptr : int, optional
168179
Stream pointer for synchronization. If ``None``, no synchronization is performed.
180+
181+
.. _`__cuda_array_interface__`: https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html
169182
"""
170183
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
171184
view_as_cai(obj, stream_ptr, buf)
172185
return buf
173186

174187
@classmethod
175188
def from_array_interface(cls, obj: object) -> StridedMemoryView:
176-
"""Create a view from an object supporting the ``__array_interface__`` protocol.
189+
"""Create a view from an object supporting the `__array_interface__`_ protocol.
177190

178191
Parameters
179192
----------
180193
obj : object
181-
An object implementing the ``__array_interface__`` protocol (e.g., a numpy array).
194+
An object implementing the `__array_interface__`_ protocol (e.g., a numpy array).
195+
196+
.. _`__array_interface__`: https://numpy.org/doc/stable/reference/arrays.interface.html
182197
"""
183198
cdef StridedMemoryView buf = StridedMemoryView.__new__(cls)
184199
view_as_array_interface(obj, buf)
@@ -188,14 +203,17 @@ cdef class StridedMemoryView:
188203
def from_any_interface(cls, obj: object, stream_ptr: int | None = None) -> StridedMemoryView:
189204
"""Create a view by automatically selecting the best available protocol.
190205

191-
Tries DLPack first, then falls back to ``__cuda_array_interface__``.
206+
Tries DLPack_ first, then falls back to `__cuda_array_interface__`_.
192207

193208
Parameters
194209
----------
195210
obj : object
196-
An object implementing DLPack or ``__cuda_array_interface__``.
211+
An object implementing DLPack_ or `__cuda_array_interface__`_.
197212
stream_ptr : int, optional
198213
Stream pointer for synchronization. If ``None``, no synchronization is performed.
214+
215+
.. _DLPack: https://dmlc.github.io/dlpack/latest/
216+
.. _`__cuda_array_interface__`: https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html
199217
"""
200218
if check_has_dlpack(obj):
201219
return cls.from_dlpack(obj, stream_ptr)

cuda_core/cuda/core/_program.pyx

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,12 +306,13 @@ class ProgramOptions:
306306
Default: False
307307
extra_sources : list of 2-tuples or tuple of 2-tuples, optional
308308
Additional NVVM IR modules to compile together with the main program, specified as
309-
``((name1, source1), (name2, source2), ...)``. Each source can be a string (textual
310-
LLVM IR), bytes, or bytearray. Only supported for the NVVM backend.
309+
``((name1, source1), (name2, source2), ...)``. Each name is a string identifier used
310+
in diagnostic messages. Each source can be a string (textual LLVM IR) or bytes/bytearray
311+
(LLVM bitcode). Only supported for the NVVM backend.
311312
Default: None
312313
use_libdevice : bool, optional
313-
Load NVIDIA's libdevice math builtins library during NVVM compilation. Only
314-
supported for the NVVM backend.
314+
Load NVIDIA's `libdevice <https://docs.nvidia.com/cuda/libdevice-users-guide/>`_
315+
math builtins library. Only supported for the NVVM backend.
315316
Default: False
316317
"""
317318

cuda_core/docs/source/api.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ CUDA runtime
2323
Graph
2424
GraphBuilder
2525
launch
26-
LEGACY_DEFAULT_STREAM
27-
PER_THREAD_DEFAULT_STREAM
2826

2927
:template: autosummary/cyclass.rst
3028

@@ -51,6 +49,18 @@ CUDA runtime
5149
LaunchConfig
5250
VirtualMemoryResourceOptions
5351

52+
.. data:: LEGACY_DEFAULT_STREAM
53+
54+
The legacy default CUDA stream. All devices share the same legacy default
55+
stream, and work launched on it is not concurrent with work on any other
56+
stream.
57+
58+
.. data:: PER_THREAD_DEFAULT_STREAM
59+
60+
The per-thread default CUDA stream. Each host thread has its own per-thread
61+
default stream, and work launched on it can execute concurrently with work
62+
on other non-blocking streams.
63+
5464

5565
CUDA compilation toolchain
5666
--------------------------

cuda_core/docs/source/release/0.6.0-notes.rst

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ New features
3535

3636
- Added ``from_array_interface`` constructor for creating views from numpy arrays.
3737
- Improved structured dtype array support.
38-
- Relaxed the power-of-two itemsize check in ``StridedLayout``.
3938
- Added bfloat16 dlpack support when the optional ``ml_dtypes`` package is installed.
4039

4140
- Added public access to default CUDA streams via module-level constants
@@ -56,32 +55,30 @@ New features
5655
``cuda.core`` and the installed ``cuda-bindings`` version.
5756

5857

59-
New examples
60-
------------
61-
62-
None.
63-
64-
6558
Fixes and enhancements
6659
----------------------
6760

68-
- Reduced wheel and installed package sizes by excluding Cython source files and build
69-
artifacts from distribution packages.
61+
- Eliminated spurious CUDA driver errors during interpreter shutdown by ensuring
62+
resources are destroyed in the correct order.
63+
64+
- Fixed a bug preventing weak references to core API objects.
65+
66+
- Fixed zero-sized allocations in legacy memory resources, which previously failed on
67+
certain platforms.
7068

7169
- Improved performance by Cythonizing :class:`Program` and :class:`ObjectCode` internals.
7270

7371
- Reduced :class:`~utils.StridedMemoryView` construction overhead.
7472

75-
- Legacy and per-thread default streams are now singletons, ensuring consistent identity
76-
across the application.
77-
7873
- ``__hash__`` and ``__eq__`` on core API classes no longer require a CUDA context.
7974

8075
- Device attribute queries now gracefully handle unsupported attributes on older CUDA
8176
drivers, returning sensible defaults instead of raising errors.
8277

83-
- Fixed zero-sized allocations in legacy memory resources, which previously failed on
84-
certain platforms.
85-
8678
- Added a warning when :class:`ManagedMemoryResource` is created on platforms without
8779
concurrent managed access support.
80+
81+
- Reduced wheel and installed package sizes by excluding Cython source files and build
82+
artifacts from distribution packages.
83+
84+
- Slightly improved typing support.

0 commit comments

Comments
 (0)