From 4dee095b0df4b8146830141cef4ecb5bc198bf30 Mon Sep 17 00:00:00 2001
From: Irina Demeshko <idemeshko@nvidia.com>
Date: Fri, 1 Dec 2023 21:24:43 +0000
Subject: [PATCH 01/12] Add partial Numba typings

---
 pyproject.toml                            |  4 +++-
 typings/numba/__init__.pyi                | 10 +++++++++
 typings/numba/core/__init__.pyi           |  0
 typings/numba/core/ccallback/__init__.pyi |  8 ++++++++
 typings/numba/core/types/__init__.pyi     | 25 +++++++++++++++++++++++
 typings/numba/cuda/__init__.pyi           |  5 +++++
 typings/numba/cuda/compiler.pyi           | 12 +++++++++++
 typings/numba/types/CPointer.pyi          |  5 +++++
 typings/numba/types/__init__.pyi          | 12 +++++++++++
 9 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 typings/numba/__init__.pyi
 create mode 100644 typings/numba/core/__init__.pyi
 create mode 100644 typings/numba/core/ccallback/__init__.pyi
 create mode 100644 typings/numba/core/types/__init__.pyi
 create mode 100644 typings/numba/cuda/__init__.pyi
 create mode 100644 typings/numba/cuda/compiler.pyi
 create mode 100644 typings/numba/types/CPointer.pyi
 create mode 100644 typings/numba/types/__init__.pyi

diff --git a/pyproject.toml b/pyproject.toml
index 04c31fb7ae..c766329ec3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,12 +39,14 @@ exclude = '''
     _build |
     buck-out |
     build |
-    dist
+    dist |
+    typings
 )/
 '''
 
 [tool.mypy]
 python_version = "3.10"
+mypy_path = "typings/"
 
 pretty = true
 show_error_codes = true
diff --git a/typings/numba/__init__.pyi b/typings/numba/__init__.pyi
new file mode 100644
index 0000000000..3aa25ebbd1
--- /dev/null
+++ b/typings/numba/__init__.pyi
@@ -0,0 +1,10 @@
+from typing import Any, Callable
+
+import numba.core.types as types
+import numba.cuda  # import compile_ptx
+from numba.core import types
+from numba.core.ccallback import CFunc
+from numba.core.types import CPointer, uint64
+
+def cfunc(sig: Any) -> Any:
+    def wrapper(func: Callable[[Any], Any]) -> tuple[Any]: ...
diff --git a/typings/numba/core/__init__.pyi b/typings/numba/core/__init__.pyi
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/typings/numba/core/ccallback/__init__.pyi b/typings/numba/core/ccallback/__init__.pyi
new file mode 100644
index 0000000000..81b5030b9c
--- /dev/null
+++ b/typings/numba/core/ccallback/__init__.pyi
@@ -0,0 +1,8 @@
+from typing import Any
+
+class CFunc(object):
+    def __init__(
+        self, pyfunc: Any, sig: Any, locals: Any, options: Any
+    ) -> None: ...
+    @property
+    def address(self) -> int: ...
diff --git a/typings/numba/core/types/__init__.pyi b/typings/numba/core/types/__init__.pyi
new file mode 100644
index 0000000000..8bb1e2b103
--- /dev/null
+++ b/typings/numba/core/types/__init__.pyi
@@ -0,0 +1,25 @@
+class Opaque: ...
+
+class NoneType(Opaque):
+    def __init__(self, name: str) -> None: ...
+
+class Type:
+    def __init__(self, name: str) -> None: ...
+
+class Number(Type): ...
+
+class Integer(Number):
+    def __init__(self, name: str) -> None: ...
+
+class RawPointer:
+    def __init__(self, name: str) -> None: ...
+
+class CPointer(Type):
+    def __init__(self, dtype: Type) -> None: ...
+
+none = NoneType("none")
+
+uint32 = Integer("uint32")
+uint64 = Integer("uint64")
+void = none
+voidptr = Type("void*")
diff --git a/typings/numba/cuda/__init__.pyi b/typings/numba/cuda/__init__.pyi
new file mode 100644
index 0000000000..d66e40c5f4
--- /dev/null
+++ b/typings/numba/cuda/__init__.pyi
@@ -0,0 +1,5 @@
+from typing import Any
+
+from numba.cuda.compiler import compile_ptx as compile_ptx
+
+def get_current_device() -> Any: ...
diff --git a/typings/numba/cuda/compiler.pyi b/typings/numba/cuda/compiler.pyi
new file mode 100644
index 0000000000..56e02dd3e2
--- /dev/null
+++ b/typings/numba/cuda/compiler.pyi
@@ -0,0 +1,12 @@
+from typing import Any, Callable, Optional
+
+def compile_ptx(
+    pyfunc: Callable[[Any], Any],
+    args: Any,
+    debug: bool = False,
+    lineinfo: bool = False,
+    device: bool = False,
+    fastmath: bool = False,
+    cc: Optional[Any] = None,
+    opt: bool = True,
+) -> tuple[Any]: ...
diff --git a/typings/numba/types/CPointer.pyi b/typings/numba/types/CPointer.pyi
new file mode 100644
index 0000000000..249a23f191
--- /dev/null
+++ b/typings/numba/types/CPointer.pyi
@@ -0,0 +1,5 @@
+# import numpy as np
+from numba.core.types.abstract import Type
+
+class CPointer(Type):
+    def __init__(self, dtype: Type) -> None: ...
diff --git a/typings/numba/types/__init__.pyi b/typings/numba/types/__init__.pyi
new file mode 100644
index 0000000000..14c90eca2a
--- /dev/null
+++ b/typings/numba/types/__init__.pyi
@@ -0,0 +1,12 @@
+class Type: ...
+class Number(Type): ...
+
+class Integer(Number):
+    def __init__(self, name: str) -> None: ...
+
+class CPointer(Type):
+    def __init__(self, dtype: Type) -> None: ...
+
+uint32 = Integer("uint32")
+uint64 = Integer("uint64")
+void = None

From 895edf8042e0472e6243137e64d0dae50a6ab6b0 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 6 Dec 2023 17:33:23 +0000
Subject: [PATCH 02/12] Additional Numba and llvmlite typings for SoA
 compilation

---
 typings/llvmlite/__init__.pyi         |  1 +
 typings/llvmlite/ir/__init__.pyi      |  4 ++++
 typings/llvmlite/ir/_utils.pyi        |  3 +++
 typings/llvmlite/ir/builder.pyi       | 18 ++++++++++++++++++
 typings/llvmlite/ir/instructions.pyi  |  6 ++++++
 typings/llvmlite/ir/module.pyi        |  1 +
 typings/llvmlite/ir/types.pyi         | 13 +++++++++++++
 typings/llvmlite/ir/values.pyi        | 23 +++++++++++++++++++++++
 typings/numba/__init__.pyi            |  3 ++-
 typings/numba/core/callconv.pyi       | 25 +++++++++++++++++++++++++
 typings/numba/core/compiler_lock.pyi  |  6 ++++++
 typings/numba/core/sigutils.pyi       |  8 ++++++++
 typings/numba/core/types/__init__.pyi |  9 +++++++++
 typings/numba/cuda/codegen.pyi        | 12 ++++++++++++
 typings/numba/cuda/compiler.pyi       | 16 +++++++++++++++-
 typings/numba/cuda/target.pyi         | 10 ++++++++++
 16 files changed, 156 insertions(+), 2 deletions(-)
 create mode 100644 typings/llvmlite/__init__.pyi
 create mode 100644 typings/llvmlite/ir/__init__.pyi
 create mode 100644 typings/llvmlite/ir/_utils.pyi
 create mode 100644 typings/llvmlite/ir/builder.pyi
 create mode 100644 typings/llvmlite/ir/instructions.pyi
 create mode 100644 typings/llvmlite/ir/module.pyi
 create mode 100644 typings/llvmlite/ir/types.pyi
 create mode 100644 typings/llvmlite/ir/values.pyi
 create mode 100644 typings/numba/core/callconv.pyi
 create mode 100644 typings/numba/core/compiler_lock.pyi
 create mode 100644 typings/numba/core/sigutils.pyi
 create mode 100644 typings/numba/cuda/codegen.pyi
 create mode 100644 typings/numba/cuda/target.pyi

diff --git a/typings/llvmlite/__init__.pyi b/typings/llvmlite/__init__.pyi
new file mode 100644
index 0000000000..ccdc2e65eb
--- /dev/null
+++ b/typings/llvmlite/__init__.pyi
@@ -0,0 +1 @@
+from llvmlite import ir
diff --git a/typings/llvmlite/ir/__init__.pyi b/typings/llvmlite/ir/__init__.pyi
new file mode 100644
index 0000000000..04641b6ad9
--- /dev/null
+++ b/typings/llvmlite/ir/__init__.pyi
@@ -0,0 +1,4 @@
+from llvmlite.ir.builder import *
+from llvmlite.ir.module import *
+from llvmlite.ir.types import *
+from llvmlite.ir.values import *
diff --git a/typings/llvmlite/ir/_utils.pyi b/typings/llvmlite/ir/_utils.pyi
new file mode 100644
index 0000000000..25ff9a7150
--- /dev/null
+++ b/typings/llvmlite/ir/_utils.pyi
@@ -0,0 +1,3 @@
+class _HasMetadata: ...
+class _StrCaching: ...
+class _StringReferenceCaching: ...
diff --git a/typings/llvmlite/ir/builder.pyi b/typings/llvmlite/ir/builder.pyi
new file mode 100644
index 0000000000..0e93a33db9
--- /dev/null
+++ b/typings/llvmlite/ir/builder.pyi
@@ -0,0 +1,18 @@
+from typing import Iterable, Optional, Union
+
+from llvmlite.ir.instructions import Instruction, Ret
+from llvmlite.ir.values import Block, Value
+
+class IRBuilder:
+    def __init__(self, block: Optional[Block]): ...
+    def ret(self, return_value: Value) -> Ret: ...
+    def extract_value(
+        self,
+        agg: Value,
+        idx: Union[Iterable[int], int],
+        name: Optional[str] = "",
+    ) -> Instruction: ...
+    def store(
+        self, value: Value, ptr: Value, align: Optional[int] = None
+    ) -> Instruction: ...
+    def ret_void(self) -> Ret: ...
diff --git a/typings/llvmlite/ir/instructions.pyi b/typings/llvmlite/ir/instructions.pyi
new file mode 100644
index 0000000000..2430503d06
--- /dev/null
+++ b/typings/llvmlite/ir/instructions.pyi
@@ -0,0 +1,6 @@
+from llvmlite.ir._utils import _HasMetadata
+from llvmlite.ir.values import NamedValue
+
+class Instruction(NamedValue, _HasMetadata): ...
+class Terminator(Instruction): ...
+class Ret(Terminator): ...
diff --git a/typings/llvmlite/ir/module.pyi b/typings/llvmlite/ir/module.pyi
new file mode 100644
index 0000000000..584de21b42
--- /dev/null
+++ b/typings/llvmlite/ir/module.pyi
@@ -0,0 +1 @@
+class Module: ...
diff --git a/typings/llvmlite/ir/types.pyi b/typings/llvmlite/ir/types.pyi
new file mode 100644
index 0000000000..2f41e3cd14
--- /dev/null
+++ b/typings/llvmlite/ir/types.pyi
@@ -0,0 +1,13 @@
+from typing import List
+
+from llvmlite.ir._utils import _StrCaching
+
+class Type(_StrCaching): ...
+
+class FunctionType(Type):
+    def __init__(
+        self, return_type: Type, args: List[Type], var_arg: bool = False
+    ): ...
+
+class PointerType(Type): ...
+class VoidType(Type): ...
diff --git a/typings/llvmlite/ir/values.pyi b/typings/llvmlite/ir/values.pyi
new file mode 100644
index 0000000000..4ba7a4789b
--- /dev/null
+++ b/typings/llvmlite/ir/values.pyi
@@ -0,0 +1,23 @@
+from typing import Tuple
+
+from llvmlite.ir._utils import (
+    _HasMetadata,
+    _StrCaching,
+    _StringReferenceCaching,
+)
+from llvmlite.ir.module import Module
+from llvmlite.ir.types import FunctionType
+
+class Value: ...
+class NamedValue(_StrCaching, _StringReferenceCaching, Value): ...
+class Block(NamedValue): ...
+class _BaseArgument(NamedValue): ...
+class Argument(_BaseArgument): ...
+class _ConstOpMixin: ...
+class GlobalValue(NamedValue, _ConstOpMixin, _HasMetadata): ...
+
+class Function(GlobalValue):
+    args: Tuple[Argument]
+
+    def __init__(self, module: Module, ftype: FunctionType, name: str): ...
+    def append_basic_block(self, name: str) -> Block: ...
diff --git a/typings/numba/__init__.pyi b/typings/numba/__init__.pyi
index 3aa25ebbd1..c9cd61c5c3 100644
--- a/typings/numba/__init__.pyi
+++ b/typings/numba/__init__.pyi
@@ -1,6 +1,5 @@
 from typing import Any, Callable
 
-import numba.core.types as types
 import numba.cuda  # import compile_ptx
 from numba.core import types
 from numba.core.ccallback import CFunc
@@ -8,3 +7,5 @@ from numba.core.types import CPointer, uint64
 
 def cfunc(sig: Any) -> Any:
     def wrapper(func: Callable[[Any], Any]) -> tuple[Any]: ...
+
+__all__ = ["types"]
diff --git a/typings/numba/core/callconv.pyi b/typings/numba/core/callconv.pyi
new file mode 100644
index 0000000000..29f775a46a
--- /dev/null
+++ b/typings/numba/core/callconv.pyi
@@ -0,0 +1,25 @@
+from typing import Iterable, Optional, Tuple
+
+from llvmlite.ir.builder import IRBuilder
+from llvmlite.ir.types import FunctionType, PointerType
+from llvmlite.ir.values import Function, Value
+from numba.core.base import BaseContext
+from numba.core.datamodel import ArgPacker
+from numba.core.types import Type
+
+class BaseCallConv:
+    def __init__(self, context: BaseContext): ...
+    def _get_arg_packer(self, argtypes: Iterable[Type]) -> ArgPacker: ...
+    def get_return_type(self, ty: Type) -> PointerType: ...
+    def get_function_type(
+        self, restype: Type, argtypes: Iterable[Type]
+    ) -> FunctionType: ...
+    def call_function(
+        self,
+        builder: IRBuilder,
+        callee: Function,
+        resty: Type,
+        argtys: Iterable[Type],
+        args: Iterable[Value],
+        attrs: Optional[Tuple[str, ...]] = None,
+    ) -> Tuple[Value, Value]: ...
diff --git a/typings/numba/core/compiler_lock.pyi b/typings/numba/core/compiler_lock.pyi
new file mode 100644
index 0000000000..322d4021de
--- /dev/null
+++ b/typings/numba/core/compiler_lock.pyi
@@ -0,0 +1,6 @@
+from typing import Any, Callable
+
+class _CompilerLock:
+    def __call__(self, func: Callable[..., Any]) -> Callable[[Any], Any]: ...
+
+global_compiler_lock = _CompilerLock()
diff --git a/typings/numba/core/sigutils.pyi b/typings/numba/core/sigutils.pyi
new file mode 100644
index 0000000000..7db44dc02d
--- /dev/null
+++ b/typings/numba/core/sigutils.pyi
@@ -0,0 +1,8 @@
+from typing import Tuple, Union
+
+from numba.core.types import Type
+from numba.core.typing.signature import Signature
+
+def normalize_signature(
+    sig: Union[Tuple[Type, ...], str, Signature]
+) -> Tuple[Tuple[Type, ...], Type]: ...
diff --git a/typings/numba/core/types/__init__.pyi b/typings/numba/core/types/__init__.pyi
index 8bb1e2b103..21e2b5e004 100644
--- a/typings/numba/core/types/__init__.pyi
+++ b/typings/numba/core/types/__init__.pyi
@@ -1,3 +1,5 @@
+from typing import Tuple
+
 class Opaque: ...
 
 class NoneType(Opaque):
@@ -17,6 +19,13 @@ class RawPointer:
 class CPointer(Type):
     def __init__(self, dtype: Type) -> None: ...
 
+class Sized(Type): ...
+class ConstSized(Type): ...
+class Hashable(Type): ...
+
+class BaseTuple(ConstSized, Hashable):
+    types: Tuple[Type]
+
 none = NoneType("none")
 
 uint32 = Integer("uint32")
diff --git a/typings/numba/cuda/codegen.pyi b/typings/numba/cuda/codegen.pyi
new file mode 100644
index 0000000000..1a7a448a9c
--- /dev/null
+++ b/typings/numba/cuda/codegen.pyi
@@ -0,0 +1,12 @@
+from typing import Any, Optional, Tuple
+
+from numba.core.codegen import Codegen, CodeLibrary
+
+class CUDACodeLibrary(CodeLibrary):
+    codegen: "JITCUDACodegen"
+    name: str
+
+    def get_asm_str(self, cc: Optional[Tuple[int, int]] = None) -> str: ...
+
+class JITCUDACodegen(Codegen):
+    def create_library(self, name: str, **kwargs: Any) -> CUDACodeLibrary: ...
diff --git a/typings/numba/cuda/compiler.pyi b/typings/numba/cuda/compiler.pyi
index 56e02dd3e2..317f7c5458 100644
--- a/typings/numba/cuda/compiler.pyi
+++ b/typings/numba/cuda/compiler.pyi
@@ -1,4 +1,7 @@
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Dict, Optional, Tuple, Union
+
+from numba.core.compiler import CompileResult
+from numba.core.types import Type
 
 def compile_ptx(
     pyfunc: Callable[[Any], Any],
@@ -10,3 +13,14 @@ def compile_ptx(
     cc: Optional[Any] = None,
     opt: bool = True,
 ) -> tuple[Any]: ...
+def compile_cuda(
+    pyfunc: Callable[[Any], Any],
+    return_type: Type,
+    args: Tuple[Type, ...],
+    debug: bool = False,
+    lineinfo: bool = False,
+    inline: bool = False,
+    fastmath: bool = False,
+    nvvm_options: Optional[Dict[str, Optional[Union[str, int]]]] = None,
+    cc: Optional[Tuple[int, int]] = None,
+) -> CompileResult: ...
diff --git a/typings/numba/cuda/target.pyi b/typings/numba/cuda/target.pyi
new file mode 100644
index 0000000000..f1a346ed71
--- /dev/null
+++ b/typings/numba/cuda/target.pyi
@@ -0,0 +1,10 @@
+from llvmlite import ir
+from numba.core.base import BaseContext
+from numba.core.callconv import BaseCallConv
+
+class CUDACallConv(BaseCallConv): ...
+
+class CUDATargetContext(BaseContext):
+    call_conv: CUDACallConv
+
+    def create_module(self, name: str) -> ir.Module: ...

From 316e227f09c8f5b44a1ee51bf7e9cdf3be67c665 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 6 Dec 2023 17:33:45 +0000
Subject: [PATCH 03/12] Add compilation to SoA ABI for CUDA

---
 cunumeric/numba_utils.py                 | 256 +++++++++++++++++++++++
 tests/unit/cunumeric/test_numba_utils.py | 208 ++++++++++++++++++
 2 files changed, 464 insertions(+)
 create mode 100644 cunumeric/numba_utils.py
 create mode 100644 tests/unit/cunumeric/test_numba_utils.py

diff --git a/cunumeric/numba_utils.py b/cunumeric/numba_utils.py
new file mode 100644
index 0000000000..105a4d8ddc
--- /dev/null
+++ b/cunumeric/numba_utils.py
@@ -0,0 +1,256 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    cast,
+)
+
+from llvmlite import ir
+from llvmlite.ir.builder import IRBuilder
+from llvmlite.ir.instructions import Ret
+from llvmlite.ir.types import FunctionType
+from llvmlite.ir.values import Value
+from numba import types
+from numba.core import sigutils
+from numba.core.base import BaseContext
+from numba.core.callconv import BaseCallConv
+from numba.core.codegen import CodeLibrary
+from numba.core.compiler_lock import global_compiler_lock
+from numba.core.funcdesc import FunctionDescriptor
+from numba.core.typing.signature import Signature
+from numba.cuda.codegen import CUDACodeLibrary
+from numba.cuda.compiler import compile_cuda
+
+
+class SoACallConv(BaseCallConv):
+    """
+    Calling convention where returned values are stored through pointers
+    provided as arguments.
+
+    - If the return type is a scalar, the first argument is a pointer to the
+      return type.
+    - If the return type is a tuple of length N, then the first N arguments are
+      pointers to each of the elements of the tuple.
+
+    In equivalent C, the prototype of a function with this calling convention
+    would take the following form:
+
+        void <func_name>(<Tuple item 1>*, ..., <Tuple item N>*,
+                         <Python arguments... >);
+    """
+
+    def _make_call_helper(self, builder: Any) -> None:
+        # Call helpers are used for the exception implementation. This is not
+        # needed when only wrapping functions.
+        msg = "Python exceptions are unsupported when returning in SoA form"
+        raise NotImplementedError(msg)
+
+    def return_value(self, builder: IRBuilder, retval: Value) -> Ret:
+        return builder.ret(retval)
+
+    def return_user_exc(
+        self,
+        builder: IRBuilder,
+        exc: Any,
+        exc_args: Any = None,
+        loc: Any = None,
+        func_name: Any = None,
+    ) -> None:
+        msg = "Python exceptions are unsupported when returning in SoA form"
+        raise NotImplementedError(msg)
+
+    def return_status_propagate(self, builder: IRBuilder, status: Any) -> None:
+        msg = "Return status is unsupported when returning in SoA form"
+        raise NotImplementedError(msg)
+
+    def get_function_type(
+        self, restype: types.Type, argtypes: Iterable[types.Type]
+    ) -> FunctionType:
+        """
+        Get the LLVM IR Function type for *restype* and *argtypes*.
+        """
+        arginfo = self._get_arg_packer(argtypes)
+        be_argtypes = list(arginfo.argument_types)
+        if isinstance(restype, types.BaseTuple):
+            return_types = [self.get_return_type(t) for t in restype.types]
+        else:
+            return_types = [self.get_return_type(restype)]
+        fnty = ir.FunctionType(ir.VoidType(), return_types + be_argtypes)
+        return fnty
+
+    def decorate_function(
+        self,
+        fn: Callable[[Any], Any],
+        args: Iterable[str],
+        fe_argtypes: List[types.Type],
+        noalias: bool = False,
+    ) -> None:
+        """
+        Set names and attributes of function arguments.
+        """
+        raise NotImplementedError("Function decoration not used for SoA ABI")
+
+    def get_arguments(
+        self, func: ir.Function, restype: types.Type
+    ) -> Tuple[ir.Argument, ...]:
+        """
+        Get the Python-level arguments of LLVM *func*.
+        """
+        if isinstance(restype, types.BaseTuple):
+            n_returns = len(restype.types)
+        else:
+            n_returns = 1
+
+        return func.args[n_returns:]
+
+    def call_function(
+        self,
+        builder: ir.IRBuilder,
+        callee: ir.Function,
+        resty: types.Type,
+        argtys: Iterable[types.Type],
+        args: Iterable[ir.Value],
+        attrs: Optional[Tuple[str, ...]] = None,
+    ) -> Tuple[ir.Value, ir.Value]:
+        """
+        Call the Numba-compiled *callee*.
+        """
+        raise NotImplementedError("Can't call SoA return function directly")
+
+
+def soa_wrap_function(
+    context: BaseContext,
+    lib: CodeLibrary,
+    fndesc: FunctionDescriptor,
+    nvvm_options: Dict[str, Union[int, str, None]],
+    wrapper_name: str,
+) -> CUDACodeLibrary:
+    """
+    Wrap a Numba ABI function such that it returns tuple values into SoA
+    arguments.
+    """
+    new_library = lib.codegen.create_library(
+        f"{lib.name}_function_",
+        entry_name=wrapper_name,
+        nvvm_options=nvvm_options,
+    )
+    library = cast(CUDACodeLibrary, new_library)
+    library.add_linking_library(lib)
+
+    # Determine the caller (C ABI) and wrapper (Numba ABI) function types
+    argtypes = fndesc.argtypes
+    restype = fndesc.restype
+    soa_call_conv = SoACallConv(context)
+    wrapperty = soa_call_conv.get_function_type(restype, argtypes)
+    calleety = context.call_conv.get_function_type(restype, argtypes)
+
+    # Create a new module and declare the callee
+    wrapper_module = context.create_module("cuda.soa.wrapper")
+    callee = ir.Function(wrapper_module, calleety, fndesc.llvm_func_name)
+
+    # Define the caller - populate it with a call to the callee and return
+    # its return value
+
+    wrapper = ir.Function(wrapper_module, wrapperty, wrapper_name)
+    builder = ir.IRBuilder(wrapper.append_basic_block(""))
+
+    arginfo = context.get_arg_packer(argtypes)
+    wrapper_args = soa_call_conv.get_arguments(wrapper, restype)
+    callargs = arginfo.as_arguments(builder, wrapper_args)
+    # We get (status, return_value), but we ignore the status since we
+    # can't propagate it through the SoA ABI anyway
+    _, return_value = context.call_conv.call_function(
+        builder, callee, restype, argtypes, callargs
+    )
+
+    if isinstance(restype, types.BaseTuple):
+        for i in range(len(restype.types)):
+            val = builder.extract_value(return_value, i)
+            builder.store(val, wrapper.args[i])
+    else:
+        builder.store(return_value, wrapper.args[0])
+    builder.ret_void()
+
+    library.add_ir_module(wrapper_module)
+    library.finalize()
+    return library
+
+
+@global_compiler_lock
+def compile_ptx_soa(
+    pyfunc: Callable[..., Any],
+    sig: Union[Tuple[types.Type], str, Signature],
+    debug: bool = False,
+    lineinfo: bool = False,
+    device: bool = False,
+    fastmath: bool = False,
+    cc: Optional[Tuple[int, int]] = None,
+    opt: bool = True,
+    abi_info: Optional[Dict[str, str]] = None,
+) -> Tuple[str, types.Type]:
+    # This is just a copy of Numba's compile_ptx, with a modification to return
+    # values as SoA and some simplifications to keep it short
+    if not device:
+        raise NotImplementedError(
+            "Only device functions can be compiled for " "the SoA ABI"
+        )
+
+    nvvm_options: Dict[str, Union[int, str, None]] = {
+        "fastmath": fastmath,
+        "opt": 3 if opt else 0,
+    }
+
+    # Use the Python function name as the function name in PTX if it is not
+    # specified - otherwise, use the specified name.
+    if abi_info:
+        wrapper_name = abi_info["abi_name"]
+    else:
+        wrapper_name = pyfunc.__name__
+
+    args, return_type = sigutils.normalize_signature(sig)
+
+    # Default to Compute Capability 5.0 if not specified
+    cc = cc or (5, 0)
+
+    cres = compile_cuda(
+        pyfunc,
+        return_type,
+        args,
+        debug=debug,
+        lineinfo=lineinfo,
+        fastmath=fastmath,
+        nvvm_options=nvvm_options,
+        cc=cc,
+    )
+
+    lib = soa_wrap_function(
+        cres.target_context,
+        cres.library,
+        cres.fndesc,
+        nvvm_options,
+        wrapper_name,
+    )
+
+    ptx = lib.get_asm_str(cc=cc)
+    resty = cres.signature.return_type
+
+    return ptx, resty
diff --git a/tests/unit/cunumeric/test_numba_utils.py b/tests/unit/cunumeric/test_numba_utils.py
new file mode 100644
index 0000000000..269be629c3
--- /dev/null
+++ b/tests/unit/cunumeric/test_numba_utils.py
@@ -0,0 +1,208 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import re
+import typing
+
+import pytest
+from numba.types import Tuple, UniTuple, float32, float64, int32, int64
+
+from cunumeric.numba_utils import compile_ptx_soa
+
+
+@pytest.fixture
+def addsub() -> typing.Callable:
+    def addsub(x, y):
+        return x + y, x - y
+
+    return addsub
+
+
+@pytest.fixture
+def addsubmul() -> typing.Callable:
+    def addsubmul(a, x, y):
+        return a * (x + y), a * (x - y)
+
+    return addsubmul
+
+
+@pytest.fixture
+def addsubconst() -> typing.Callable:
+    def addsubconst(x, y):
+        return x + y, x - y, 3
+
+    return addsubconst
+
+
+def param_pattern(
+    function_name: str, param_index: int, param_type: str, reg_prefix: str
+) -> str:
+    """
+    A helper function to generate patterns for recognizing parameter references
+    in PTX functions - an example of a parameter reference looks like:
+
+        ld.param.u64 %rd1, [addsubconst_param_0];
+
+    These usually appear at the beginning of a function.
+    """
+    return (
+        rf"ld\.param\.{param_type}"
+        rf"\s+%{reg_prefix}[0-9]+,\s+"
+        rf"\[{function_name}_param_"
+        rf"{param_index}\]"
+    )
+
+
+def test_soa(addsub) -> None:
+    # A basic test of compilation with an SoA interface
+
+    # Compile for two int32 inputs and two int32 outputs
+    signature = UniTuple(int32, 2)(int32, int32)
+    ptx, resty = compile_ptx_soa(addsub, signature, device=True)
+
+    # The function definition should use the name of the Python function
+    fn_def_pattern = r"\.visible\s+\.func\s+addsub"
+    assert re.search(fn_def_pattern, ptx)
+
+    # The return type should match that of the signature's return type
+    assert resty == signature.return_type
+
+    # The function should have 4 parameters (numbered 0 to 3)
+    assert re.search("addsub_param_3", ptx)
+    assert not re.search("addsub_param_4", ptx)
+
+    # The first two parameters should be treated as pointers (u64 values)
+    assert re.search(param_pattern("addsub", 0, "u64", "rd"), ptx)
+    assert re.search(param_pattern("addsub", 1, "u64", "rd"), ptx)
+
+    # The remaining two parameters should be treated as 32 bit integers
+    assert re.search(param_pattern("addsub", 2, "u32", "r"), ptx)
+    assert re.search(param_pattern("addsub", 3, "u32", "r"), ptx)
+
+
+def test_soa_fn_name(addsub) -> None:
+    # Ensure that when a wrapper function name is specified, it is used in the
+    # PTX.
+    signature = UniTuple(int32, 2)(int32, int32)
+    abi_info = {"abi_name": "addsub_soa"}
+    ptx, resty = compile_ptx_soa(
+        addsub, signature, device=True, abi_info=abi_info
+    )
+    fn_def_pattern = r"\.visible\s+\.func\s+addsub_soa"
+    assert re.search(fn_def_pattern, ptx)
+
+
+def test_soa_arg_types(addsub) -> None:
+    # Ensure that specifying a different argument type is reflected
+    # appropriately in the generated PTX
+    signature = UniTuple(int32, 2)(int32, int64)
+    ptx, resty = compile_ptx_soa(addsub, signature, device=True)
+
+    # The final two parameters should now be a 32- and a 64-bit values
+    # respectively. Note that the load of the last parameter may be an
+    # instruction with a 32-bit destination type that effectively chops off the
+    # upper 32 bits, so we cannot test for a load of a 64-bit value, which
+    # would look like:
+    #
+    #    ld.param.u64 	%rd2, [addsub_param_3];
+    #
+    # but instead we'd potentially get
+    #
+    #    ld.param.u32 	%r2, [addsub_param_3];
+    #
+    # So we test the bit width of the parameters only:
+    assert re.search(r".param\s+.b32\s+addsub_param_2", ptx)
+    assert re.search(r".param\s+.b64\s+addsub_param_3", ptx)
+
+
+def test_soa_more_args(addsubmul) -> None:
+    # A test with three arguments, but only two return values
+
+    signature = UniTuple(int32, 2)(int32, int32, int32)
+    ptx, resty = compile_ptx_soa(addsubmul, signature, device=True)
+
+    # The function should have 5 parameters (numbered 0 to 4)
+    assert re.search("addsubmul_param_4", ptx)
+    assert not re.search("addsubmul_param_5", ptx)
+
+    # The first two parameters should be treated as pointers (u64 values)
+    assert re.search(param_pattern("addsubmul", 0, "u64", "rd"), ptx)
+    assert re.search(param_pattern("addsubmul", 1, "u64", "rd"), ptx)
+
+    # The remaining three parameters should be treated as 32 bit integers
+    assert re.search(param_pattern("addsubmul", 2, "u32", "r"), ptx)
+    assert re.search(param_pattern("addsubmul", 3, "u32", "r"), ptx)
+    assert re.search(param_pattern("addsubmul", 4, "u32", "r"), ptx)
+
+
+def test_soa_more_returns(addsubconst) -> None:
+    # Test with two arguments and three return values
+
+    signature = UniTuple(int32, 3)(int32, int32)
+    ptx, resty = compile_ptx_soa(addsubconst, signature, device=True)
+
+    # The function should have 5 parameters (numbered 0 to 4)
+    assert re.search("addsubconst_param_4", ptx)
+    assert not re.search("addsubconst_param_5", ptx)
+
+    # The first three parameters should be treated as pointers (u64 values)
+    assert re.search(param_pattern("addsubconst", 0, "u64", "rd"), ptx)
+    assert re.search(param_pattern("addsubconst", 1, "u64", "rd"), ptx)
+    assert re.search(param_pattern("addsubconst", 2, "u64", "rd"), ptx)
+
+    # The remaining two parameters should be treated as 32 bit integers
+    assert re.search(param_pattern("addsubconst", 3, "u32", "r"), ptx)
+    assert re.search(param_pattern("addsubconst", 4, "u32", "r"), ptx)
+
+
+def test_soa_varying_types(addsub) -> None:
+    # Argument types differ from each other and the return type
+
+    signature = UniTuple(float64, 2)(int32, float32)
+    ptx, resty = compile_ptx_soa(addsub, signature, device=True)
+
+    # The first two parameters should be treated as pointers (u64 values)
+    assert re.search(param_pattern("addsub", 0, "u64", "rd"), ptx)
+    assert re.search(param_pattern("addsub", 1, "u64", "rd"), ptx)
+
+    # The remaining two parameters should be a 32-bit integer and a 32-bit
+    # float
+    assert re.search(param_pattern("addsub", 2, "u32", "r"), ptx)
+    assert re.search(param_pattern("addsub", 3, "f32", "f"), ptx)
+
+    # There should be a 64-bit floating point store for the result
+    assert re.search(r"st\.f64", ptx)
+
+
+def test_soa_heterogeneous_return_type(addsubconst) -> None:
+    # Test with return values of different types
+
+    signature = Tuple((float32, float64, int32))(float32, float32)
+    ptx, resty = compile_ptx_soa(addsubconst, signature, device=True)
+
+    # There should be 32- and 64-bit floating point, and 32-bit integer stores
+    # for the result
+    assert re.search(r"st\.f32", ptx)
+    assert re.search(r"st\.f64", ptx)
+    assert re.search(r"st\.u32", ptx)
+
+
+# Test of one return value
+
+# Test of not putting device in
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(pytest.main(sys.argv))

From 35502dd101c54ac62b467951a901a8a29203c66c Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Mon, 11 Dec 2023 21:58:46 +0000
Subject: [PATCH 04/12] Add Numba as a dependency

---
 conda/conda-build/meta.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml
index 85ea88d741..92e9acfab3 100644
--- a/conda/conda-build/meta.yaml
+++ b/conda/conda-build/meta.yaml
@@ -144,6 +144,7 @@ requirements:
     - libnvjitlink
     - libcusparse
 {% endif %}
+    - numba >=0.57.1
     - opt_einsum >=3.3
     - scipy
     - typing_extensions

From 5bbe4d66c1896135ed749e5417ef8edbf9c9516a Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Mon, 18 Dec 2023 23:36:58 +0000
Subject: [PATCH 05/12] WIP

---
 .gitignore                              | 1 +
 cunumeric/numba_utils.py                | 2 +-
 typings/llvmlite/ir/types.pyi           | 4 ++--
 typings/numba/core/base.pyi             | 1 +
 typings/numba/core/callconv.pyi         | 4 ++--
 typings/numba/core/codegen.pyi          | 4 ++++
 typings/numba/core/compiler.pyi         | 1 +
 typings/numba/core/datamodel.pyi        | 6 ++++++
 typings/numba/core/funcdesc.pyi         | 1 +
 typings/numba/core/sigutils.pyi         | 2 +-
 typings/numba/core/typing/templates.pyi | 1 +
 11 files changed, 21 insertions(+), 6 deletions(-)
 create mode 100644 typings/numba/core/base.pyi
 create mode 100644 typings/numba/core/codegen.pyi
 create mode 100644 typings/numba/core/compiler.pyi
 create mode 100644 typings/numba/core/datamodel.pyi
 create mode 100644 typings/numba/core/funcdesc.pyi
 create mode 100644 typings/numba/core/typing/templates.pyi

diff --git a/.gitignore b/.gitignore
index 84244ce827..bbe6c52920 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@
 *.gcda
 *.gcov
 core
+!core/
 *.fluid
 *.pyc
 *.swp
diff --git a/cunumeric/numba_utils.py b/cunumeric/numba_utils.py
index 105a4d8ddc..12d25c6d73 100644
--- a/cunumeric/numba_utils.py
+++ b/cunumeric/numba_utils.py
@@ -36,7 +36,7 @@
 from numba.core.codegen import CodeLibrary
 from numba.core.compiler_lock import global_compiler_lock
 from numba.core.funcdesc import FunctionDescriptor
-from numba.core.typing.signature import Signature
+from numba.core.typing.templates import Signature
 from numba.cuda.codegen import CUDACodeLibrary
 from numba.cuda.compiler import compile_cuda
 
diff --git a/typings/llvmlite/ir/types.pyi b/typings/llvmlite/ir/types.pyi
index 2f41e3cd14..793175d89b 100644
--- a/typings/llvmlite/ir/types.pyi
+++ b/typings/llvmlite/ir/types.pyi
@@ -1,4 +1,4 @@
-from typing import List
+from typing import Sequence
 
 from llvmlite.ir._utils import _StrCaching
 
@@ -6,7 +6,7 @@ class Type(_StrCaching): ...
 
 class FunctionType(Type):
     def __init__(
-        self, return_type: Type, args: List[Type], var_arg: bool = False
+        self, return_type: Type, args: Sequence[Type], var_arg: bool = False
     ): ...
 
 class PointerType(Type): ...
diff --git a/typings/numba/core/base.pyi b/typings/numba/core/base.pyi
new file mode 100644
index 0000000000..1a5f4cece7
--- /dev/null
+++ b/typings/numba/core/base.pyi
@@ -0,0 +1 @@
+class BaseContext: ...
diff --git a/typings/numba/core/callconv.pyi b/typings/numba/core/callconv.pyi
index 29f775a46a..bc4b071348 100644
--- a/typings/numba/core/callconv.pyi
+++ b/typings/numba/core/callconv.pyi
@@ -1,7 +1,7 @@
 from typing import Iterable, Optional, Tuple
 
 from llvmlite.ir.builder import IRBuilder
-from llvmlite.ir.types import FunctionType, PointerType
+from llvmlite.ir.types import FunctionType, PointerType, Type as LLType
 from llvmlite.ir.values import Function, Value
 from numba.core.base import BaseContext
 from numba.core.datamodel import ArgPacker
@@ -10,7 +10,7 @@ from numba.core.types import Type
 class BaseCallConv:
     def __init__(self, context: BaseContext): ...
     def _get_arg_packer(self, argtypes: Iterable[Type]) -> ArgPacker: ...
-    def get_return_type(self, ty: Type) -> PointerType: ...
+    def get_return_type(self, ty: Type) -> LLType: ...
     def get_function_type(
         self, restype: Type, argtypes: Iterable[Type]
     ) -> FunctionType: ...
diff --git a/typings/numba/core/codegen.pyi b/typings/numba/core/codegen.pyi
new file mode 100644
index 0000000000..a4288cce7b
--- /dev/null
+++ b/typings/numba/core/codegen.pyi
@@ -0,0 +1,4 @@
+class CodeLibrary:
+    codegen: "Codegen"
+
+class Codegen: ...
diff --git a/typings/numba/core/compiler.pyi b/typings/numba/core/compiler.pyi
new file mode 100644
index 0000000000..0097ad9fac
--- /dev/null
+++ b/typings/numba/core/compiler.pyi
@@ -0,0 +1 @@
+class CompileResult: ...
diff --git a/typings/numba/core/datamodel.pyi b/typings/numba/core/datamodel.pyi
new file mode 100644
index 0000000000..3122d80f08
--- /dev/null
+++ b/typings/numba/core/datamodel.pyi
@@ -0,0 +1,6 @@
+from typing import Sequence
+
+from llvmlite.ir.types import Type
+
+class ArgPacker:
+    argument_types: Sequence[Type]
diff --git a/typings/numba/core/funcdesc.pyi b/typings/numba/core/funcdesc.pyi
new file mode 100644
index 0000000000..0e19a1ef32
--- /dev/null
+++ b/typings/numba/core/funcdesc.pyi
@@ -0,0 +1 @@
+class FunctionDescriptor: ...
diff --git a/typings/numba/core/sigutils.pyi b/typings/numba/core/sigutils.pyi
index 7db44dc02d..106c5ca50b 100644
--- a/typings/numba/core/sigutils.pyi
+++ b/typings/numba/core/sigutils.pyi
@@ -1,7 +1,7 @@
 from typing import Tuple, Union
 
 from numba.core.types import Type
-from numba.core.typing.signature import Signature
+from numba.core.typing.templates import Signature
 
 def normalize_signature(
     sig: Union[Tuple[Type, ...], str, Signature]
diff --git a/typings/numba/core/typing/templates.pyi b/typings/numba/core/typing/templates.pyi
new file mode 100644
index 0000000000..ae8c064822
--- /dev/null
+++ b/typings/numba/core/typing/templates.pyi
@@ -0,0 +1 @@
+class Signature: ...

From 5cbbbb44549e33b832b7b520b136b560daf8c941 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 1 May 2024 10:47:58 +0100
Subject: [PATCH 06/12] Correct string literal formatting and mypy checking

---
 cunumeric/numba_utils.py                  |  2 +-
 typings/llvmlite/__init__.pyi             | 14 ++++++++++++
 typings/llvmlite/ir/__init__.pyi          | 14 ++++++++++++
 typings/llvmlite/ir/_utils.pyi            | 14 ++++++++++++
 typings/llvmlite/ir/builder.pyi           | 14 ++++++++++++
 typings/llvmlite/ir/instructions.pyi      | 14 ++++++++++++
 typings/llvmlite/ir/module.pyi            | 14 ++++++++++++
 typings/llvmlite/ir/types.pyi             | 14 ++++++++++++
 typings/llvmlite/ir/values.pyi            | 14 ++++++++++++
 typings/numba/__init__.pyi                | 14 ++++++++++++
 typings/numba/core/__init__.pyi           | 13 +++++++++++
 typings/numba/core/base.pyi               | 27 +++++++++++++++++++++-
 typings/numba/core/callconv.pyi           | 14 ++++++++++++
 typings/numba/core/ccallback/__init__.pyi | 14 ++++++++++++
 typings/numba/core/codegen.pyi            | 28 ++++++++++++++++++++++-
 typings/numba/core/compiler.pyi           | 25 +++++++++++++++++++-
 typings/numba/core/compiler_lock.pyi      | 14 ++++++++++++
 typings/numba/core/datamodel.pyi          | 23 ++++++++++++++++++-
 typings/numba/core/funcdesc.pyi           | 23 ++++++++++++++++++-
 typings/numba/core/sigutils.pyi           | 14 ++++++++++++
 typings/numba/core/types/__init__.pyi     | 14 ++++++++++++
 typings/numba/core/typing/templates.pyi   | 19 ++++++++++++++-
 22 files changed, 349 insertions(+), 7 deletions(-)

diff --git a/cunumeric/numba_utils.py b/cunumeric/numba_utils.py
index 12d25c6d73..4aa46cfabd 100644
--- a/cunumeric/numba_utils.py
+++ b/cunumeric/numba_utils.py
@@ -211,7 +211,7 @@ def compile_ptx_soa(
     # values as SoA and some simplifications to keep it short
     if not device:
         raise NotImplementedError(
-            "Only device functions can be compiled for " "the SoA ABI"
+            "Only device functions can be compiled for the SoA ABI"
         )
 
     nvvm_options: Dict[str, Union[int, str, None]] = {
diff --git a/typings/llvmlite/__init__.pyi b/typings/llvmlite/__init__.pyi
index ccdc2e65eb..711280d9e0 100644
--- a/typings/llvmlite/__init__.pyi
+++ b/typings/llvmlite/__init__.pyi
@@ -1 +1,15 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from llvmlite import ir
diff --git a/typings/llvmlite/ir/__init__.pyi b/typings/llvmlite/ir/__init__.pyi
index 04641b6ad9..048ddc711e 100644
--- a/typings/llvmlite/ir/__init__.pyi
+++ b/typings/llvmlite/ir/__init__.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from llvmlite.ir.builder import *
 from llvmlite.ir.module import *
 from llvmlite.ir.types import *
diff --git a/typings/llvmlite/ir/_utils.pyi b/typings/llvmlite/ir/_utils.pyi
index 25ff9a7150..d02b145ad4 100644
--- a/typings/llvmlite/ir/_utils.pyi
+++ b/typings/llvmlite/ir/_utils.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 class _HasMetadata: ...
 class _StrCaching: ...
 class _StringReferenceCaching: ...
diff --git a/typings/llvmlite/ir/builder.pyi b/typings/llvmlite/ir/builder.pyi
index 0e93a33db9..581f1d390c 100644
--- a/typings/llvmlite/ir/builder.pyi
+++ b/typings/llvmlite/ir/builder.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Iterable, Optional, Union
 
 from llvmlite.ir.instructions import Instruction, Ret
diff --git a/typings/llvmlite/ir/instructions.pyi b/typings/llvmlite/ir/instructions.pyi
index 2430503d06..75f1c2872b 100644
--- a/typings/llvmlite/ir/instructions.pyi
+++ b/typings/llvmlite/ir/instructions.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from llvmlite.ir._utils import _HasMetadata
 from llvmlite.ir.values import NamedValue
 
diff --git a/typings/llvmlite/ir/module.pyi b/typings/llvmlite/ir/module.pyi
index 584de21b42..e9d24b059c 100644
--- a/typings/llvmlite/ir/module.pyi
+++ b/typings/llvmlite/ir/module.pyi
@@ -1 +1,15 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 class Module: ...
diff --git a/typings/llvmlite/ir/types.pyi b/typings/llvmlite/ir/types.pyi
index 793175d89b..40d78c0b4d 100644
--- a/typings/llvmlite/ir/types.pyi
+++ b/typings/llvmlite/ir/types.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Sequence
 
 from llvmlite.ir._utils import _StrCaching
diff --git a/typings/llvmlite/ir/values.pyi b/typings/llvmlite/ir/values.pyi
index 4ba7a4789b..6ea7db4772 100644
--- a/typings/llvmlite/ir/values.pyi
+++ b/typings/llvmlite/ir/values.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Tuple
 
 from llvmlite.ir._utils import (
diff --git a/typings/numba/__init__.pyi b/typings/numba/__init__.pyi
index c9cd61c5c3..33afd9e7ac 100644
--- a/typings/numba/__init__.pyi
+++ b/typings/numba/__init__.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Any, Callable
 
 import numba.cuda  # import compile_ptx
diff --git a/typings/numba/core/__init__.pyi b/typings/numba/core/__init__.pyi
index e69de29bb2..7cc93af294 100644
--- a/typings/numba/core/__init__.pyi
+++ b/typings/numba/core/__init__.pyi
@@ -0,0 +1,13 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/typings/numba/core/base.pyi b/typings/numba/core/base.pyi
index 1a5f4cece7..fdc9e08746 100644
--- a/typings/numba/core/base.pyi
+++ b/typings/numba/core/base.pyi
@@ -1 +1,26 @@
-class BaseContext: ...
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Sequence
+
+from llvmlite import ir
+from numba.core.callconv import BaseCallConv
+from numba.core.datamodel import ArgPacker
+from numba.core.types import Type
+
+class BaseContext:
+    call_conv: BaseCallConv
+
+    def create_module(self, name: str) -> ir.Module: ...
+    def get_arg_packer(self, fe_args: Sequence[Type]) -> ArgPacker: ...
diff --git a/typings/numba/core/callconv.pyi b/typings/numba/core/callconv.pyi
index bc4b071348..01d410261a 100644
--- a/typings/numba/core/callconv.pyi
+++ b/typings/numba/core/callconv.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Iterable, Optional, Tuple
 
 from llvmlite.ir.builder import IRBuilder
diff --git a/typings/numba/core/ccallback/__init__.pyi b/typings/numba/core/ccallback/__init__.pyi
index 81b5030b9c..f9b9c57e89 100644
--- a/typings/numba/core/ccallback/__init__.pyi
+++ b/typings/numba/core/ccallback/__init__.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Any
 
 class CFunc(object):
diff --git a/typings/numba/core/codegen.pyi b/typings/numba/core/codegen.pyi
index a4288cce7b..dad46d2ffe 100644
--- a/typings/numba/core/codegen.pyi
+++ b/typings/numba/core/codegen.pyi
@@ -1,4 +1,30 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any
+
+from llvmlite import ir
+
 class CodeLibrary:
     codegen: "Codegen"
+    name: str
+
+    def add_linking_library(self, library: CodeLibrary) -> None: ...
+    def add_ir_module(self, module: ir.Module) -> None: ...
+    def finalize(self) -> None: ...
+
+class Codegen:
+    name: str
 
-class Codegen: ...
+    def create_library(self, name: str, **kwargs: Any) -> CodeLibrary: ...
diff --git a/typings/numba/core/compiler.pyi b/typings/numba/core/compiler.pyi
index 0097ad9fac..6734887789 100644
--- a/typings/numba/core/compiler.pyi
+++ b/typings/numba/core/compiler.pyi
@@ -1 +1,24 @@
-class CompileResult: ...
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from numba.core.base import BaseContext
+from numba.core.codegen import CodeLibrary
+from numba.core.funcdesc import FunctionDescriptor
+from numba.core.typing.templates import Signature
+
+class CompileResult:
+    target_context: BaseContext
+    library: CodeLibrary
+    fndesc: FunctionDescriptor
+    signature: Signature
diff --git a/typings/numba/core/compiler_lock.pyi b/typings/numba/core/compiler_lock.pyi
index 322d4021de..24616c2e18 100644
--- a/typings/numba/core/compiler_lock.pyi
+++ b/typings/numba/core/compiler_lock.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Any, Callable
 
 class _CompilerLock:
diff --git a/typings/numba/core/datamodel.pyi b/typings/numba/core/datamodel.pyi
index 3122d80f08..40f38f97f2 100644
--- a/typings/numba/core/datamodel.pyi
+++ b/typings/numba/core/datamodel.pyi
@@ -1,6 +1,27 @@
-from typing import Sequence
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
+from typing import Sequence, Tuple
+
+from llvmlite.ir import Argument
+from llvmlite.ir.builder import IRBuilder
 from llvmlite.ir.types import Type
+from llvmlite.ir.values import Value
 
 class ArgPacker:
     argument_types: Sequence[Type]
+
+    def as_arguments(
+        self, builder: IRBuilder, values: Tuple[Argument, ...]
+    ) -> Tuple[Value]: ...
diff --git a/typings/numba/core/funcdesc.pyi b/typings/numba/core/funcdesc.pyi
index 0e19a1ef32..fe83b3d268 100644
--- a/typings/numba/core/funcdesc.pyi
+++ b/typings/numba/core/funcdesc.pyi
@@ -1 +1,22 @@
-class FunctionDescriptor: ...
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Tuple
+
+from numba.core.types import Type
+
+class FunctionDescriptor:
+    argtypes: Tuple[Type]
+    restype: Type
+    llvm_func_name: str
diff --git a/typings/numba/core/sigutils.pyi b/typings/numba/core/sigutils.pyi
index 106c5ca50b..2c122b5a78 100644
--- a/typings/numba/core/sigutils.pyi
+++ b/typings/numba/core/sigutils.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Tuple, Union
 
 from numba.core.types import Type
diff --git a/typings/numba/core/types/__init__.pyi b/typings/numba/core/types/__init__.pyi
index 21e2b5e004..9b5cbdfdc0 100644
--- a/typings/numba/core/types/__init__.pyi
+++ b/typings/numba/core/types/__init__.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Tuple
 
 class Opaque: ...
diff --git a/typings/numba/core/typing/templates.pyi b/typings/numba/core/typing/templates.pyi
index ae8c064822..3eac0629a0 100644
--- a/typings/numba/core/typing/templates.pyi
+++ b/typings/numba/core/typing/templates.pyi
@@ -1 +1,18 @@
-class Signature: ...
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from numba.core.types import Type
+
+class Signature:
+    return_type: Type

From 2d9790225b82e907b9b056adcb86a0e156a6f4be Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 1 May 2024 11:58:46 +0100
Subject: [PATCH 07/12] Attempt to fix other mypy error

---
 cunumeric/_sphinxext/_cunumeric_directive.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cunumeric/_sphinxext/_cunumeric_directive.py b/cunumeric/_sphinxext/_cunumeric_directive.py
index ef6402f6c4..03fb3be32b 100644
--- a/cunumeric/_sphinxext/_cunumeric_directive.py
+++ b/cunumeric/_sphinxext/_cunumeric_directive.py
@@ -15,14 +15,14 @@
 from __future__ import annotations
 
 from docutils import nodes
-from docutils.statemachine import ViewList
+from docutils.statemachine import StringList
 from sphinx.util.docutils import SphinxDirective
 from sphinx.util.nodes import nested_parse_with_titles
 
 
 class CunumericDirective(SphinxDirective):
     def parse(self, rst_text: str, annotation: str) -> list[nodes.Node]:
-        result = ViewList()
+        result = StringList()
         for line in rst_text.split("\n"):
             result.append(line, annotation)
         node = nodes.paragraph()

From ee4869420623dab724201dcf3934eb1901d41d5b Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 1 May 2024 13:15:11 +0100
Subject: [PATCH 08/12] Try adding cuda-nvcc to test env to provide NVVM

---
 continuous_integration/scripts/test-cunumeric | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/continuous_integration/scripts/test-cunumeric b/continuous_integration/scripts/test-cunumeric
index 698179b31d..605583f340 100755
--- a/continuous_integration/scripts/test-cunumeric
+++ b/continuous_integration/scripts/test-cunumeric
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 setup_env() {
-    mamba create -yn legate -c ~/.artifacts/conda-build/legate_core -c ~/.artifacts/conda-build/cunumeric -c conda-forge -c "nvidia/label/cuda-12.0.0" legate-core cunumeric
+    mamba create -yn legate -c ~/.artifacts/conda-build/legate_core -c ~/.artifacts/conda-build/cunumeric -c conda-forge -c "nvidia/label/cuda-12.0.0" legate-core cunumeric cuda-nvcc
 }
 
 setup_test_env() {

From 3dc7343e929dcc3f2f1b178a26cd9a0d9ad270e4 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 1 May 2024 14:36:16 +0100
Subject: [PATCH 09/12] Attempt to fix flake8

---
 cunumeric/module.py                    | 2 +-
 install.py                             | 2 +-
 tests/integration/utils/comparisons.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cunumeric/module.py b/cunumeric/module.py
index 424f89df4a..e87670b756 100644
--- a/cunumeric/module.py
+++ b/cunumeric/module.py
@@ -1748,7 +1748,7 @@ def check_list_depth(arr: Any, prefix: NdShape = (0,)) -> int:
                     "List depths are mismatched. First element was at depth "
                     f"{first_depth}, but there is an element at"
                     f" depth {other_depth}, "
-                    f"arrays{convert_to_array_form(prefix+(idx+1,))}"
+                    f"arrays{convert_to_array_form(prefix + (idx + 1,))}"
                 )
 
     return depths[0] + 1
diff --git a/install.py b/install.py
index 92d16ad55f..7dcc6e588d 100755
--- a/install.py
+++ b/install.py
@@ -338,7 +338,7 @@ def validate_path(path):
 
     cmake_flags += f"""\
 -DCMAKE_BUILD_TYPE={(
-    "Debug" if debug else "RelWithDebInfo" if debug_release else "Release"
+"Debug" if debug else "RelWithDebInfo" if debug_release else "Release"
 )}
 -DBUILD_SHARED_LIBS=ON
 -DCMAKE_CUDA_ARCHITECTURES={str(arch)}
diff --git a/tests/integration/utils/comparisons.py b/tests/integration/utils/comparisons.py
index 65571b38c1..9ab5247e9f 100644
--- a/tests/integration/utils/comparisons.py
+++ b/tests/integration/utils/comparisons.py
@@ -50,7 +50,7 @@ def allclose(
         inds = islice(zip(*np.where(~close)), diff_limit)
         diffs = [f"  index {i}: {a[i]} {b[i]}" for i in inds]
         N = len(diffs)
-        print(f"First {N} difference{'s' if N>1 else ''} for allclose:\n")
+        print(f"First {N} difference{'s' if N > 1 else ''} for allclose:\n")
         print("\n".join(diffs))
         print(f"\nWith diff_limit={diff_limit}\n")
 

From 8265c47c3bc3dcf1154f16e9a6b50846a59567e3 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 1 May 2024 14:44:50 +0100
Subject: [PATCH 10/12] Another attempt to fix flake8

---
 install.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/install.py b/install.py
index 7dcc6e588d..b35eb36812 100755
--- a/install.py
+++ b/install.py
@@ -336,10 +336,15 @@ def validate_path(path):
     if debug or verbose:
         cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")]
 
+    if debug:
+        build_type = "Debug"
+    elif debug_release:
+        build_type = "RelWithDebInfo"
+    else:
+        build_type = "Release"
+
     cmake_flags += f"""\
--DCMAKE_BUILD_TYPE={(
-"Debug" if debug else "RelWithDebInfo" if debug_release else "Release"
-)}
+-DCMAKE_BUILD_TYPE={build_type}
 -DBUILD_SHARED_LIBS=ON
 -DCMAKE_CUDA_ARCHITECTURES={str(arch)}
 -DLegion_MAX_DIM={str(maxdim)}

From 0af1f57cec63ecf1a3185117c5a5fa8e6fc2babf Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 1 May 2024 16:43:13 +0100
Subject: [PATCH 11/12] Fix typing nit

---
 typings/llvmlite/ir/builder.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/typings/llvmlite/ir/builder.pyi b/typings/llvmlite/ir/builder.pyi
index 581f1d390c..29b4a89597 100644
--- a/typings/llvmlite/ir/builder.pyi
+++ b/typings/llvmlite/ir/builder.pyi
@@ -24,7 +24,7 @@ class IRBuilder:
         self,
         agg: Value,
         idx: Union[Iterable[int], int],
-        name: Optional[str] = "",
+        name: str = "",
     ) -> Instruction: ...
     def store(
         self, value: Value, ptr: Value, align: Optional[int] = None

From f8794107fcad7529d83da79d20b220c5b0b9c814 Mon Sep 17 00:00:00 2001
From: Graham Markall <gmarkall@nvidia.com>
Date: Wed, 1 May 2024 16:46:24 +0100
Subject: [PATCH 12/12] Add missing copyright headers

---
 typings/numba/cuda/__init__.pyi | 14 ++++++++++++++
 typings/numba/cuda/codegen.pyi  | 14 ++++++++++++++
 typings/numba/cuda/compiler.pyi | 14 ++++++++++++++
 typings/numba/cuda/target.pyi   | 14 ++++++++++++++
 4 files changed, 56 insertions(+)

diff --git a/typings/numba/cuda/__init__.pyi b/typings/numba/cuda/__init__.pyi
index d66e40c5f4..77610ef8ca 100644
--- a/typings/numba/cuda/__init__.pyi
+++ b/typings/numba/cuda/__init__.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Any
 
 from numba.cuda.compiler import compile_ptx as compile_ptx
diff --git a/typings/numba/cuda/codegen.pyi b/typings/numba/cuda/codegen.pyi
index 1a7a448a9c..297a07a31e 100644
--- a/typings/numba/cuda/codegen.pyi
+++ b/typings/numba/cuda/codegen.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Any, Optional, Tuple
 
 from numba.core.codegen import Codegen, CodeLibrary
diff --git a/typings/numba/cuda/compiler.pyi b/typings/numba/cuda/compiler.pyi
index 317f7c5458..b7c51bc47e 100644
--- a/typings/numba/cuda/compiler.pyi
+++ b/typings/numba/cuda/compiler.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from typing import Any, Callable, Dict, Optional, Tuple, Union
 
 from numba.core.compiler import CompileResult
diff --git a/typings/numba/cuda/target.pyi b/typings/numba/cuda/target.pyi
index f1a346ed71..cc43b37b59 100644
--- a/typings/numba/cuda/target.pyi
+++ b/typings/numba/cuda/target.pyi
@@ -1,3 +1,17 @@
+# Copyright 2024 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from llvmlite import ir
 from numba.core.base import BaseContext
 from numba.core.callconv import BaseCallConv