Skip to content

Commit dfe930e

Browse files
committed
Merge branch 'main' into abhilash1910→nvvm_enhance
2 parents 404963b + 463ed53 commit dfe930e

File tree

24 files changed

+11915
-8540
lines changed

24 files changed

+11915
-8540
lines changed

.git_archival.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
node: $Format:%H$
2+
node-date: $Format:%cI$
3+
ref-names: $Format:%D$

.gitattributes

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
cuda/_version.py export-subst
2-
31
* text eol=lf
42
*.cmd text eol=crlf
53

@@ -16,3 +14,9 @@ cuda_core/cuda/core/_cpp/*.hpp -binary text diff
1614
*.svg binary
1715
# SCM syntax highlighting & preventing 3-way merges
1816
pixi.lock merge=binary linguist-language=YAML linguist-generated=true
17+
18+
# "export-subst" specifies that this file will get filled in with the current
19+
# commit and tag information when running `git archive`. This allows
20+
# `setuptools-scm` to correctly determine the version when building from a git
21+
# archive. See #1609.
22+
.git_archival.txt export-subst

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ repos:
2929
language: python
3030
additional_dependencies:
3131
- https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl
32-
exclude: '.*pixi\.lock'
32+
exclude: '(.*pixi\.lock)|(\.git_archival\.txt)'
3333
args: ["--fix"]
3434

3535
- id: no-markdown-in-docs-source

ci/tools/run-tests

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ if [[ "${test_module}" == "pathfinder" ]]; then
3434
"FH:${CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS} " \
3535
"BC:${CUDA_PATHFINDER_TEST_FIND_NVIDIA_BITCODE_LIB_STRICTNESS}"
3636
pytest -ra -s -v --durations=0 tests/ |& tee /tmp/pathfinder_test_log.txt
37-
# Fail if no "INFO test_" lines are found; capture line count otherwise
38-
line_count=$(grep '^INFO test_' /tmp/pathfinder_test_log.txt | wc -l)
37+
# Report the number of "INFO test_" lines (including zero)
38+
# to support quick validations based on GHA log archives.
39+
line_count=$(awk '/^INFO test_/ {count++} END {print count+0}' /tmp/pathfinder_test_log.txt)
3940
echo "Number of \"INFO test_\" lines: $line_count"
4041
popd
4142
elif [[ "${test_module}" == "bindings" ]]; then

cuda_bindings/.git_archival.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../.git_archival.txt
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
3+
4+
5+
# This code was automatically generated across versions from 12.9.1 to 13.1.1. Do not modify it directly.
6+
7+
8+
"""
9+
This is a replacement for the stdlib enum.IntEnum.
10+
11+
Notably, it has much better import time performance, since it doesn't generate
12+
and evaluate Python code at startup time.
13+
14+
It supports the most important subset of the IntEnum API. See `test_enum` in
15+
`cuda_bindings/tests/test_basics.py` for details.
16+
"""
17+
18+
from typing import Any, Iterator
19+
20+
21+
class FastEnumMetaclass(type):
22+
def __init__(cls, name, bases, namespace):
23+
super().__init__(name, bases, namespace)
24+
25+
cls.__singletons__ = {}
26+
cls.__members__ = {}
27+
for name, value in cls.__dict__.items():
28+
if name.startswith("__") and name.endswith("__"):
29+
continue
30+
31+
if isinstance(value, tuple):
32+
value, doc = value
33+
elif isinstance(value, int):
34+
doc = None
35+
else:
36+
continue
37+
38+
singleton = int.__new__(cls, value)
39+
singleton.__doc__ = doc
40+
singleton._name = name
41+
cls.__singletons__[value] = singleton
42+
cls.__members__[name] = singleton
43+
44+
for name, member in cls.__members__.items():
45+
setattr(cls, name, member)
46+
47+
def __repr__(cls) -> str:
48+
return f"<enum '{cls.__name__}'>"
49+
50+
def __len__(cls) -> int:
51+
return len(cls.__members__)
52+
53+
def __iter__(cls) -> Iterator["FastEnum"]:
54+
return iter(cls.__members__.values())
55+
56+
def __contains__(cls, item: Any) -> bool:
57+
return item in cls.__singletons__
58+
59+
60+
class FastEnum(int, metaclass=FastEnumMetaclass):
61+
def __new__(cls, value: int) -> "FastEnum":
62+
singleton: FastEnum = cls.__singletons__.get(value)
63+
if singleton is None:
64+
raise ValueError(f"{value} is not a valid {cls.__name__}")
65+
return singleton
66+
67+
def __repr__(self) -> str:
68+
return f"<{self.__class__.__name__}.{self._name}: {int(self)}>"
69+
70+
@property
71+
def name(self) -> str:
72+
return self._name
73+
74+
@property
75+
def value(self) -> int:
76+
return int(self)

cuda_bindings/cuda/bindings/_lib/utils.pxi.in

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
33

44
from cpython.buffer cimport PyObject_CheckBuffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS
55
from libc.stdlib cimport calloc, free
66
from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t
77
from libc.stddef cimport wchar_t
88
from libc.string cimport memcpy
9-
from enum import Enum as _Enum
9+
from cuda.bindings._internal._fast_enum import FastEnum as _FastEnum
1010
import ctypes as _ctypes
1111
cimport cuda.bindings.cydriver as cydriver
1212
cimport cuda.bindings._lib.param_packer as param_packer
@@ -73,7 +73,7 @@ cdef class _HelperKernelParams:
7373
self._ckernelParams[idx] = <void*><void_ptr>value.getPtr()
7474
elif isinstance(value, (_ctypes.Structure)):
7575
self._ckernelParams[idx] = <void*><void_ptr>_ctypes.addressof(value)
76-
elif isinstance(value, (_Enum)):
76+
elif isinstance(value, (_FastEnum)):
7777
self._ckernelParams[idx] = &(self._ckernelParamsData[data_idx])
7878
(<int*>self._ckernelParams[idx])[0] = value.value
7979
data_idx += sizeof(int)

cuda_bindings/cuda/bindings/cufile.pyx

Lines changed: 89 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
44
#
@@ -8,7 +8,7 @@ cimport cython # NOQA
88
from libc cimport errno
99
from ._internal.utils cimport (get_buffer_pointer, get_nested_resource_ptr,
1010
nested_resource)
11-
from enum import IntEnum as _IntEnum
11+
from cuda.bindings._internal._fast_enum import FastEnum as _FastEnum
1212

1313
import cython
1414

@@ -2288,6 +2288,8 @@ cdef class StatsLevel2:
22882288
def read_size_kb_hist(self, val):
22892289
if self._readonly:
22902290
raise ValueError("This StatsLevel2 instance is read-only")
2291+
if len(val) != 32:
2292+
raise ValueError(f"Expected length { 32 } for field read_size_kb_hist, got {len(val)}")
22912293
cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c")
22922294
arr[:] = _numpy.asarray(val, dtype=_numpy.uint64)
22932295
memcpy(<void *>(&(self._ptr[0].read_size_kb_hist)), <void *>(arr.data), sizeof(uint64_t) * len(val))
@@ -2303,6 +2305,8 @@ cdef class StatsLevel2:
23032305
def write_size_kb_hist(self, val):
23042306
if self._readonly:
23052307
raise ValueError("This StatsLevel2 instance is read-only")
2308+
if len(val) != 32:
2309+
raise ValueError(f"Expected length { 32 } for field write_size_kb_hist, got {len(val)}")
23062310
cdef view.array arr = view.array(shape=(32,), itemsize=sizeof(uint64_t), format="Q", mode="c")
23072311
arr[:] = _numpy.asarray(val, dtype=_numpy.uint64)
23082312
memcpy(<void *>(&(self._ptr[0].write_size_kb_hist)), <void *>(arr.data), sizeof(uint64_t) * len(val))
@@ -2441,7 +2445,7 @@ cdef class StatsLevel3:
24412445
raise ValueError("This StatsLevel3 instance is read-only")
24422446
cdef PerGpuStats val_ = val
24432447
if len(val) != 16:
2444-
raise ValueError(f"Expected length 16 for field per_gpu_stats, got {len(val)}")
2448+
raise ValueError(f"Expected length { 16 } for field per_gpu_stats, got {len(val)}")
24452449
memcpy(<void *>&(self._ptr[0].per_gpu_stats), <void *>(val_._get_ptr()), sizeof(CUfilePerGpuStats_t) * 16)
24462450

24472451
@property
@@ -2496,8 +2500,10 @@ cdef class StatsLevel3:
24962500
# Enum
24972501
###############################################################################
24982502

2499-
class OpError(_IntEnum):
2500-
"""See `CUfileOpError`."""
2503+
class OpError(_FastEnum):
2504+
"""
2505+
See `CUfileOpError`.
2506+
"""
25012507
SUCCESS = CU_FILE_SUCCESS
25022508
DRIVER_NOT_INITIALIZED = CU_FILE_DRIVER_NOT_INITIALIZED
25032509
DRIVER_INVALID_PROPS = CU_FILE_DRIVER_INVALID_PROPS
@@ -2548,49 +2554,61 @@ class OpError(_IntEnum):
25482554
BATCH_NOCOMPAT_ERROR = CU_FILE_BATCH_NOCOMPAT_ERROR
25492555
IO_MAX_ERROR = CU_FILE_IO_MAX_ERROR
25502556

2551-
class DriverStatusFlags(_IntEnum):
2552-
"""See `CUfileDriverStatusFlags_t`."""
2553-
LUSTRE_SUPPORTED = CU_FILE_LUSTRE_SUPPORTED
2554-
WEKAFS_SUPPORTED = CU_FILE_WEKAFS_SUPPORTED
2555-
NFS_SUPPORTED = CU_FILE_NFS_SUPPORTED
2557+
class DriverStatusFlags(_FastEnum):
2558+
"""
2559+
See `CUfileDriverStatusFlags_t`.
2560+
"""
2561+
LUSTRE_SUPPORTED = (CU_FILE_LUSTRE_SUPPORTED, 'Support for DDN LUSTRE')
2562+
WEKAFS_SUPPORTED = (CU_FILE_WEKAFS_SUPPORTED, 'Support for WEKAFS')
2563+
NFS_SUPPORTED = (CU_FILE_NFS_SUPPORTED, 'Support for NFS')
25562564
GPFS_SUPPORTED = CU_FILE_GPFS_SUPPORTED
2557-
NVME_SUPPORTED = CU_FILE_NVME_SUPPORTED
2558-
NVMEOF_SUPPORTED = CU_FILE_NVMEOF_SUPPORTED
2559-
SCSI_SUPPORTED = CU_FILE_SCSI_SUPPORTED
2560-
SCALEFLUX_CSD_SUPPORTED = CU_FILE_SCALEFLUX_CSD_SUPPORTED
2561-
NVMESH_SUPPORTED = CU_FILE_NVMESH_SUPPORTED
2562-
BEEGFS_SUPPORTED = CU_FILE_BEEGFS_SUPPORTED
2563-
NVME_P2P_SUPPORTED = CU_FILE_NVME_P2P_SUPPORTED
2564-
SCATEFS_SUPPORTED = CU_FILE_SCATEFS_SUPPORTED
2565-
VIRTIOFS_SUPPORTED = CU_FILE_VIRTIOFS_SUPPORTED
2566-
MAX_TARGET_TYPES = CU_FILE_MAX_TARGET_TYPES
2567-
2568-
class DriverControlFlags(_IntEnum):
2569-
"""See `CUfileDriverControlFlags_t`."""
2570-
USE_POLL_MODE = CU_FILE_USE_POLL_MODE
2571-
ALLOW_COMPAT_MODE = CU_FILE_ALLOW_COMPAT_MODE
2572-
2573-
class FeatureFlags(_IntEnum):
2574-
"""See `CUfileFeatureFlags_t`."""
2575-
DYN_ROUTING_SUPPORTED = CU_FILE_DYN_ROUTING_SUPPORTED
2576-
BATCH_IO_SUPPORTED = CU_FILE_BATCH_IO_SUPPORTED
2577-
STREAMS_SUPPORTED = CU_FILE_STREAMS_SUPPORTED
2578-
PARALLEL_IO_SUPPORTED = CU_FILE_PARALLEL_IO_SUPPORTED
2579-
P2P_SUPPORTED = CU_FILE_P2P_SUPPORTED
2580-
2581-
class FileHandleType(_IntEnum):
2582-
"""See `CUfileFileHandleType`."""
2583-
OPAQUE_FD = CU_FILE_HANDLE_TYPE_OPAQUE_FD
2584-
OPAQUE_WIN32 = CU_FILE_HANDLE_TYPE_OPAQUE_WIN32
2565+
NVME_SUPPORTED = (CU_FILE_NVME_SUPPORTED, '< Support for GPFS Support for NVMe')
2566+
NVMEOF_SUPPORTED = (CU_FILE_NVMEOF_SUPPORTED, 'Support for NVMeOF')
2567+
SCSI_SUPPORTED = (CU_FILE_SCSI_SUPPORTED, 'Support for SCSI')
2568+
SCALEFLUX_CSD_SUPPORTED = (CU_FILE_SCALEFLUX_CSD_SUPPORTED, 'Support for Scaleflux CSD')
2569+
NVMESH_SUPPORTED = (CU_FILE_NVMESH_SUPPORTED, 'Support for NVMesh Block Dev')
2570+
BEEGFS_SUPPORTED = (CU_FILE_BEEGFS_SUPPORTED, 'Support for BeeGFS')
2571+
NVME_P2P_SUPPORTED = (CU_FILE_NVME_P2P_SUPPORTED, 'Do not use this macro. This is deprecated now')
2572+
SCATEFS_SUPPORTED = (CU_FILE_SCATEFS_SUPPORTED, 'Support for ScateFS')
2573+
VIRTIOFS_SUPPORTED = (CU_FILE_VIRTIOFS_SUPPORTED, 'Support for VirtioFS')
2574+
MAX_TARGET_TYPES = (CU_FILE_MAX_TARGET_TYPES, 'Maximum FS supported')
2575+
2576+
class DriverControlFlags(_FastEnum):
2577+
"""
2578+
See `CUfileDriverControlFlags_t`.
2579+
"""
2580+
USE_POLL_MODE = (CU_FILE_USE_POLL_MODE, 'use POLL mode. properties.use_poll_mode')
2581+
ALLOW_COMPAT_MODE = (CU_FILE_ALLOW_COMPAT_MODE, 'allow COMPATIBILITY mode. properties.allow_compat_mode')
2582+
2583+
class FeatureFlags(_FastEnum):
2584+
"""
2585+
See `CUfileFeatureFlags_t`.
2586+
"""
2587+
DYN_ROUTING_SUPPORTED = (CU_FILE_DYN_ROUTING_SUPPORTED, 'Support for Dynamic routing to handle devices across the PCIe bridges')
2588+
BATCH_IO_SUPPORTED = (CU_FILE_BATCH_IO_SUPPORTED, 'Supported')
2589+
STREAMS_SUPPORTED = (CU_FILE_STREAMS_SUPPORTED, 'Supported')
2590+
PARALLEL_IO_SUPPORTED = (CU_FILE_PARALLEL_IO_SUPPORTED, 'Supported')
2591+
P2P_SUPPORTED = (CU_FILE_P2P_SUPPORTED, 'Support for PCI P2PDMA')
2592+
2593+
class FileHandleType(_FastEnum):
2594+
"""
2595+
See `CUfileFileHandleType`.
2596+
"""
2597+
OPAQUE_FD = (CU_FILE_HANDLE_TYPE_OPAQUE_FD, 'Linux based fd')
2598+
OPAQUE_WIN32 = (CU_FILE_HANDLE_TYPE_OPAQUE_WIN32, 'Windows based handle (unsupported)')
25852599
USERSPACE_FS = CU_FILE_HANDLE_TYPE_USERSPACE_FS
25862600

2587-
class Opcode(_IntEnum):
2588-
"""See `CUfileOpcode_t`."""
2601+
class Opcode(_FastEnum):
2602+
"""
2603+
See `CUfileOpcode_t`.
2604+
"""
25892605
READ = CUFILE_READ
25902606
WRITE = CUFILE_WRITE
25912607

2592-
class Status(_IntEnum):
2593-
"""See `CUfileStatus_t`."""
2608+
class Status(_FastEnum):
2609+
"""
2610+
See `CUfileStatus_t`.
2611+
"""
25942612
WAITING = CUFILE_WAITING
25952613
PENDING = CUFILE_PENDING
25962614
INVALID = CUFILE_INVALID
@@ -2599,12 +2617,16 @@ class Status(_IntEnum):
25992617
TIMEOUT = CUFILE_TIMEOUT
26002618
FAILED = CUFILE_FAILED
26012619

2602-
class BatchMode(_IntEnum):
2603-
"""See `CUfileBatchMode_t`."""
2620+
class BatchMode(_FastEnum):
2621+
"""
2622+
See `CUfileBatchMode_t`.
2623+
"""
26042624
BATCH = CUFILE_BATCH
26052625

2606-
class SizeTConfigParameter(_IntEnum):
2607-
"""See `CUFileSizeTConfigParameter_t`."""
2626+
class SizeTConfigParameter(_FastEnum):
2627+
"""
2628+
See `CUFileSizeTConfigParameter_t`.
2629+
"""
26082630
PROFILE_STATS = CUFILE_PARAM_PROFILE_STATS
26092631
EXECUTION_MAX_IO_QUEUE_DEPTH = CUFILE_PARAM_EXECUTION_MAX_IO_QUEUE_DEPTH
26102632
EXECUTION_MAX_IO_THREADS = CUFILE_PARAM_EXECUTION_MAX_IO_THREADS
@@ -2618,8 +2640,10 @@ class SizeTConfigParameter(_IntEnum):
26182640
POLLTHRESHOLD_SIZE_KB = CUFILE_PARAM_POLLTHRESHOLD_SIZE_KB
26192641
PROPERTIES_BATCH_IO_TIMEOUT_MS = CUFILE_PARAM_PROPERTIES_BATCH_IO_TIMEOUT_MS
26202642

2621-
class BoolConfigParameter(_IntEnum):
2622-
"""See `CUFileBoolConfigParameter_t`."""
2643+
class BoolConfigParameter(_FastEnum):
2644+
"""
2645+
See `CUFileBoolConfigParameter_t`.
2646+
"""
26232647
PROPERTIES_USE_POLL_MODE = CUFILE_PARAM_PROPERTIES_USE_POLL_MODE
26242648
PROPERTIES_ALLOW_COMPAT_MODE = CUFILE_PARAM_PROPERTIES_ALLOW_COMPAT_MODE
26252649
FORCE_COMPAT_MODE = CUFILE_PARAM_FORCE_COMPAT_MODE
@@ -2633,24 +2657,30 @@ class BoolConfigParameter(_IntEnum):
26332657
SKIP_TOPOLOGY_DETECTION = CUFILE_PARAM_SKIP_TOPOLOGY_DETECTION
26342658
STREAM_MEMOPS_BYPASS = CUFILE_PARAM_STREAM_MEMOPS_BYPASS
26352659

2636-
class StringConfigParameter(_IntEnum):
2637-
"""See `CUFileStringConfigParameter_t`."""
2660+
class StringConfigParameter(_FastEnum):
2661+
"""
2662+
See `CUFileStringConfigParameter_t`.
2663+
"""
26382664
LOGGING_LEVEL = CUFILE_PARAM_LOGGING_LEVEL
26392665
ENV_LOGFILE_PATH = CUFILE_PARAM_ENV_LOGFILE_PATH
26402666
LOG_DIR = CUFILE_PARAM_LOG_DIR
26412667

2642-
class ArrayConfigParameter(_IntEnum):
2643-
"""See `CUFileArrayConfigParameter_t`."""
2668+
class ArrayConfigParameter(_FastEnum):
2669+
"""
2670+
See `CUFileArrayConfigParameter_t`.
2671+
"""
26442672
POSIX_POOL_SLAB_SIZE_KB = CUFILE_PARAM_POSIX_POOL_SLAB_SIZE_KB
26452673
POSIX_POOL_SLAB_COUNT = CUFILE_PARAM_POSIX_POOL_SLAB_COUNT
26462674

2647-
class P2PFlags(_IntEnum):
2648-
"""See `CUfileP2PFlags_t`."""
2649-
P2PDMA = CUFILE_P2PDMA
2650-
NVFS = CUFILE_NVFS
2651-
DMABUF = CUFILE_DMABUF
2652-
C2C = CUFILE_C2C
2653-
NVIDIA_PEERMEM = CUFILE_NVIDIA_PEERMEM
2675+
class P2PFlags(_FastEnum):
2676+
"""
2677+
See `CUfileP2PFlags_t`.
2678+
"""
2679+
P2PDMA = (CUFILE_P2PDMA, 'Support for PCI P2PDMA')
2680+
NVFS = (CUFILE_NVFS, 'Support for nvidia-fs')
2681+
DMABUF = (CUFILE_DMABUF, 'Support for DMA Buffer')
2682+
C2C = (CUFILE_C2C, 'Support for Chip-to-Chip (Grace-based systems)')
2683+
NVIDIA_PEERMEM = (CUFILE_NVIDIA_PEERMEM, 'Only for IBM Spectrum Scale and WekaFS')
26542684

26552685

26562686
###############################################################################

0 commit comments

Comments
 (0)