Skip to content

Commit 701f54b

Browse files
rwgkcursoragent
andcommitted
Serialize NVVM compile/libdevice mutation per Program instance.
Add a per-instance threading lock around the NVVM verify/libdevice-add/compile path to prevent concurrent compile races from double-adding libdevice. The lock is taken under the GIL and held across nogil sections, and we verified this ordering avoids introducing a lock/GIL deadlock cycle. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 51f6009 commit 701f54b

File tree

2 files changed

+36
-31
lines changed

2 files changed

+36
-31
lines changed

cuda_core/cuda/core/_program.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ cdef class Program:
1313
object _linker # Linker
1414
object _options # ProgramOptions
1515
object __weakref__
16+
object _compile_lock # Per-instance lock for compile-time mutation
1617
bint _use_libdevice # Flag for libdevice loading
1718
bint _libdevice_added

cuda_core/cuda/core/_program.pyx

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ This module provides :class:`Program` for compiling source code into
1010
from __future__ import annotations
1111

1212
from dataclasses import dataclass
13+
import threading
1314
from warnings import warn
1415

1516
from cuda.bindings import driver, nvrtc
@@ -533,6 +534,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
533534

534535
self._options = options = check_or_create_options(ProgramOptions, options, "Program options")
535536
code_type = code_type.lower()
537+
self._compile_lock = threading.Lock()
536538
self._use_libdevice = False
537539
self._libdevice_added = False
538540

@@ -723,41 +725,43 @@ cdef object Program_compile_nvvm(Program self, str target_type, object logs):
723725
for i in range(len(options_list)):
724726
options_vec[i] = <const char*>(<bytes>options_list[i])
725727

726-
with nogil:
727-
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmVerifyProgram(prog, <int>options_vec.size(), options_vec.data()))
728-
729-
# Load libdevice if requested - following numba-cuda
730-
if self._use_libdevice and not self._libdevice_added:
731-
libdevice_path = _find_libdevice_path()
732-
with open(libdevice_path, "rb") as f:
733-
libdevice_bytes = f.read()
734-
libdevice_ptr = <const char*>libdevice_bytes
735-
libdevice_len = len(libdevice_bytes)
728+
# Serialize NVVM program mutation/use per Program instance.
729+
with self._compile_lock:
736730
with nogil:
737-
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmLazyAddModuleToProgram(
738-
prog, libdevice_ptr, libdevice_len, NULL))
739-
self._libdevice_added = True
740-
741-
with nogil:
742-
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmCompileProgram(prog, <int>options_vec.size(), options_vec.data()))
731+
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmVerifyProgram(prog, <int>options_vec.size(), options_vec.data()))
732+
733+
# Load libdevice if requested - following numba-cuda.
734+
if self._use_libdevice and not self._libdevice_added:
735+
libdevice_path = _find_libdevice_path()
736+
with open(libdevice_path, "rb") as f:
737+
libdevice_bytes = f.read()
738+
libdevice_ptr = <const char*>libdevice_bytes
739+
libdevice_len = len(libdevice_bytes)
740+
with nogil:
741+
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmLazyAddModuleToProgram(
742+
prog, libdevice_ptr, libdevice_len, NULL))
743+
self._libdevice_added = True
743744

744-
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetCompiledResultSize(prog, &output_size))
745-
data = bytearray(output_size)
746-
data_ptr = <char*>(<bytearray>data)
747-
with nogil:
748-
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetCompiledResult(prog, data_ptr))
745+
with nogil:
746+
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmCompileProgram(prog, <int>options_vec.size(), options_vec.data()))
749747

750-
# Get compilation log if requested
751-
if logs is not None:
752-
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetProgramLogSize(prog, &logsize))
753-
if logsize > 1:
754-
log = bytearray(logsize)
755-
data_ptr = <char*>(<bytearray>log)
756-
with nogil:
757-
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetProgramLog(prog, data_ptr))
758-
logs.write(log.decode("utf-8", errors="backslashreplace"))
748+
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetCompiledResultSize(prog, &output_size))
749+
data = bytearray(output_size)
750+
data_ptr = <char*>(<bytearray>data)
751+
with nogil:
752+
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetCompiledResult(prog, data_ptr))
753+
754+
# Get compilation log if requested
755+
if logs is not None:
756+
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetProgramLogSize(prog, &logsize))
757+
if logsize > 1:
758+
log = bytearray(logsize)
759+
data_ptr = <char*>(<bytearray>log)
760+
with nogil:
761+
HANDLE_RETURN_NVVM(prog, cynvvm.nvvmGetProgramLog(prog, data_ptr))
762+
logs.write(log.decode("utf-8", errors="backslashreplace"))
759763

760-
return ObjectCode._init(bytes(data), target_type, name=self._options.name)
764+
return ObjectCode._init(bytes(data), target_type, name=self._options.name)
761765

762766
# Supported target types per backend
763767
cdef dict SUPPORTED_TARGETS = {

0 commit comments

Comments
 (0)