IntelPython
diff --git a/‎numba_dpex/core/codegen.py‎
Lines changed: 50 additions & 5 deletions b/‎numba_dpex/core/codegen.py‎
Lines changed: 50 additions & 5 deletions
diff --git a/‎numba_dpex/core/descriptor.py‎
Lines changed: 14 additions & 0 deletions b/‎numba_dpex/core/descriptor.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎numba_dpex/core/targets/kernel_target.py‎
Lines changed: 23 additions & 0 deletions b/‎numba_dpex/core/targets/kernel_target.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎numba_dpex/core/types/dpctl_types.py‎
Lines changed: 3 additions & 1 deletion b/‎numba_dpex/core/types/dpctl_types.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎numba_dpex/experimental/decorators.py‎
Lines changed: 52 additions & 1 deletion b/‎numba_dpex/experimental/decorators.py‎
Lines changed: 52 additions & 1 deletion
diff --git a/‎numba_dpex/experimental/kernel_dispatcher.py‎
Lines changed: 48 additions & 17 deletions b/‎numba_dpex/experimental/kernel_dispatcher.py‎
Lines changed: 48 additions & 17 deletions
diff --git a/‎numba_dpex/tests/experimental/codegen/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎numba_dpex/tests/experimental/codegen/__init__.py‎
Lines changed: 3 additions & 0 deletions
@@ -29,6 +29,25 @@ class SPIRVCodeLibrary(CPUCodeLibrary):
     def _optimize_functions(self, ll_module):
         pass
 
+    @property
+    def inline_threshold(self):
+        """The inlining threshold value to be used to optimize the final library"""
+        if hasattr(self, "_inline_threshold"):
+            return self._inline_threshold
+        else:
+            return 0
+
+    @inline_threshold.setter
+    def inline_threshold(self, value: int):
+        """Returns the current inlining threshold level for the library."""
+        if value < 0 or value > 3:
+            logging.warning(
+                "Unsupported inline threshold. Set a value between 0 and 3"
+            )
+            self._inline_threshold = 0
+        else:
+            self._inline_threshold = value
+
     def _optimize_final_module(self):
         # Run some lightweight optimization to simplify the module.
         pmb = ll.PassManagerBuilder()
@@ -43,12 +62,38 @@ def _optimize_final_module(self):
             )
 
         pmb.disable_unit_at_a_time = False
+
         if config.INLINE_THRESHOLD is not None:
-            logging.warning(
-                "Setting INLINE_THRESHOLD leads to very aggressive "
-                + "optimizations that may produce incorrect binary."
-            )
-            pmb.inlining_threshold = config.INLINE_THRESHOLD
+            # Check if a decorator-level inline threshold was set and use that
+            # instead of the global configuration.
+            if (
+                hasattr(self, "_inline_threshold")
+                and self._inline_threshold > 0
+                and self._inline_threshold <= 3
+            ):
+                logging.warning(
+                    "Setting INLINE_THRESHOLD leads to very aggressive "
+                    + "optimizations that may produce incorrect binary."
+                )
+                pmb.inlining_threshold = self._inline_threshold
+            elif not hasattr(self, "_inline_threshold"):
+                logging.warning(
+                    "Setting INLINE_THRESHOLD leads to very aggressive "
+                    + "optimizations that may produce incorrect binary."
+                )
+                pmb.inlining_threshold = config.INLINE_THRESHOLD
+        else:
+            if (
+                hasattr(self, "_inline_threshold")
+                and self._inline_threshold > 0
+                and self._inline_threshold <= 3
+            ):
+                logging.warning(
+                    "Setting INLINE_THRESHOLD leads to very aggressive "
+                    + "optimizations that may produce incorrect binary."
+                )
+                pmb.inlining_threshold = self._inline_threshold
+
         pmb.disable_unroll_loops = True
         pmb.loop_vectorize = False
         pmb.slp_vectorize = False
 
@@ -8,9 +8,12 @@
 from numba.core.cpu import CPUTargetOptions
 from numba.core.descriptors import TargetDescriptor
 
+from numba_dpex import config
+
 from .targets.dpjit_target import DPEX_TARGET_NAME, DpexTargetContext
 from .targets.kernel_target import (
     DPEX_KERNEL_TARGET_NAME,
+    CompilationMode,
     DpexKernelTargetContext,
     DpexKernelTypingContext,
 )
@@ -40,13 +43,24 @@ class DpexTargetOptions(CPUTargetOptions):
     release_gil = _option_mapping("release_gil")
     no_compile = _option_mapping("no_compile")
     use_mlir = _option_mapping("use_mlir")
+    inline_threshold = _option_mapping("inline_threshold")
+    _compilation_mode = _option_mapping("_compilation_mode")
 
     def finalize(self, flags, options):
         super().finalize(flags, options)
         _inherit_if_not_set(flags, options, "experimental", False)
         _inherit_if_not_set(flags, options, "release_gil", False)
         _inherit_if_not_set(flags, options, "no_compile", True)
         _inherit_if_not_set(flags, options, "use_mlir", False)
+        if config.INLINE_THRESHOLD is not None:
+            _inherit_if_not_set(
+                flags, options, "inline_threshold", config.INLINE_THRESHOLD
+            )
+        else:
+            _inherit_if_not_set(flags, options, "inline_threshold", 0)
+        _inherit_if_not_set(
+            flags, options, "_compilation_mode", CompilationMode.KERNEL
+        )
 
 
 class DpexKernelTarget(TargetDescriptor):
 
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
+from enum import IntEnum
 from functools import cached_property
 
 import dpnp
@@ -30,6 +31,28 @@
 LLVM_SPIRV_ARGS = 112
 
 
+class CompilationMode(IntEnum):
+    """Flags used to determine how a function should be compiled by the
+    numba_dpex.experimental.dispatcher.KernelDispatcher. Note the functionality
+    will be merged into numba_dpex.core.kernel_interface.dispatcher in the
+    future.
+
+        KERNEL :         Indicates that the function will be compiled into an
+                         LLVM function that has ``spir_kernel`` calling
+                         convention and is compiled down to SPIR-V.
+                         Additionally, the function cannot return any value and
+                         input arguments to the function have to adhere to
+                         "compute follows data" to ensure execution queue
+                         inference.
+        DEVICE_FUNCTION: Indicates that the function will be compiled into an
+                         LLVM function that has ``spir_func`` calling convention
+                         and will be compiled only into LLVM bitcode.
+    """
+
+    KERNEL = 1
+    DEVICE_FUNC = 2
+
+
 class DpexKernelTypingContext(typing.BaseContext):
     """Custom typing context to support kernel compilation.
 
 
@@ -28,7 +28,9 @@ def __init__(self, sycl_queue):
             self._unique_id = hash(sycl_queue)
         except Exception:
             self._unique_id = self.rand_digit_str(16)
-        super(DpctlSyclQueue, self).__init__(name="DpctlSyclQueue")
+        super(DpctlSyclQueue, self).__init__(
+            name=f"DpctlSyclQueue on {self._device}"
+        )
 
     def rand_digit_str(self, n):
         return "".join(
 
@@ -6,6 +6,7 @@
 ready to move to numba_dpex.core.
 """
 import inspect
+from warnings import warn
 
 from numba.core import sigutils
 from numba.core.target_extension import (
@@ -14,6 +15,8 @@
     target_registry,
 )
 
+from numba_dpex.core.targets.kernel_target import CompilationMode
+
 from .target import DPEX_KERNEL_EXP_TARGET_NAME
 
 
@@ -30,6 +33,14 @@ def kernel(func_or_sig=None, **options):
     """
 
     dispatcher = resolve_dispatcher_from_str(DPEX_KERNEL_EXP_TARGET_NAME)
+    if "_compilation_mode" in options:
+        user_compilation_mode = options["_compilation_mode"]
+        warn(
+            "_compilation_mode is an internal flag that should not be set "
+            "in the decorator. The decorator defined option "
+            f"{user_compilation_mode} is going to be ignored."
+        )
+    options["_compilation_mode"] = CompilationMode.KERNEL
 
     # FIXME: The options need to be evaluated and checked here like it is
     # done in numba.core.decorators.jit
@@ -80,4 +91,44 @@ def _specialized_kernel_dispatcher(pyfunc):
     return _kernel_dispatcher(func)
 
 
-jit_registry[target_registry[DPEX_KERNEL_EXP_TARGET_NAME]] = kernel
+def device_func(func_or_sig=None, **options):
+    """Generates a function with a device-only calling convention, e.g.,
+    spir_func for SPIR-V based devices.
+
+    The decorator is used to compile overloads in the DpexKernelTarget and
+    users should use the decorator to define functions that are only callable
+    from inside another device_func or a kernel.
+
+    A device_func is not compiled down to device binary IR and instead left as
+    LLVM IR. It is done so that the function can be inlined fully into the
+    kernel module from where it is used at the LLVM level, leading to more
+    optimization opportunities.
+
+    Returns:
+        KernelDispatcher: A KernelDispatcher instance with the
+        _compilation_mode option set to DEVICE_FUNC.
+    """
+    dispatcher = resolve_dispatcher_from_str(DPEX_KERNEL_EXP_TARGET_NAME)
+
+    if "_compilation_mode" in options:
+        user_compilation_mode = options["_compilation_mode"]
+        warn(
+            "_compilation_mode is an internal flag that should not be set "
+            "in the decorator. The decorator defined option "
+            f"{user_compilation_mode} is going to be ignored."
+        )
+    options["_compilation_mode"] = CompilationMode.DEVICE_FUNC
+
+    def _kernel_dispatcher(pyfunc):
+        return dispatcher(
+            pyfunc=pyfunc,
+            targetoptions=options,
+        )
+
+    if func_or_sig is None:
+        return _kernel_dispatcher
+
+    return _kernel_dispatcher(func_or_sig)
+
+
+jit_registry[target_registry[DPEX_KERNEL_EXP_TARGET_NAME]] = device_func
@@ -11,18 +11,21 @@
 
 import numba.core.event as ev
 from numba.core import errors, sigutils, types
-from numba.core.compiler import CompileResult
+from numba.core.compiler import CompileResult, Flags
 from numba.core.compiler_lock import global_compiler_lock
 from numba.core.dispatcher import Dispatcher, _FunctionCompiler
 from numba.core.target_extension import dispatcher_registry, target_registry
+from numba.core.types import void
 from numba.core.typing.typeof import Purpose, typeof
 
 from numba_dpex import config, spirv_generator
 from numba_dpex.core.exceptions import (
     ExecutionQueueInferenceError,
+    KernelHasReturnValueError,
     UnsupportedKernelArgumentError,
 )
 from numba_dpex.core.pipelines import kernel_compiler
+from numba_dpex.core.targets.kernel_target import CompilationMode
 from numba_dpex.core.types import DpnpNdArray
 
 from .target import DPEX_KERNEL_EXP_TARGET_NAME, dpex_exp_kernel_target
@@ -81,10 +84,19 @@ def _compile_to_spirv(
         kernel_fn = kernel_targetctx.prepare_spir_kernel(
             kernel_func, kernel_fndesc.argtypes
         )
-
-        # makes sure that the spir_func is completely inlined into the
-        # spir_kernel wrapper
-        kernel_library.optimize_final_module()
+        # Get the compiler flags that were passed through the target descriptor
+        flags = Flags()
+        self.targetdescr.options.parse_as_flags(flags, self.targetoptions)
+
+        # If the inline_threshold option was set then set the property in the
+        # kernel_library to force inlining ``overload`` calls into a kernel.
+        inline_threshold = flags.inline_threshold  # pylint: disable=E1101
+        kernel_library.inline_threshold = inline_threshold
+
+        # Call finalize on the LLVM module. Finalization will result in
+        # all linking libraries getting linked together and final optimization
+        # including inlining of functions if an inlining level is specified.
+        kernel_library.finalize()
         # Compiled the LLVM IR to SPIR-V
         kernel_spirv_module = spirv_generator.llvm_to_spirv(
             kernel_targetctx,
@@ -144,9 +156,15 @@ def _compile_cached(
         try:
             cres: CompileResult = self._compile_core(args, return_type)
 
-            kernel_device_ir_module = self._compile_to_spirv(
-                cres.library, cres.fndesc, cres.target_context
-            )
+            if (
+                self.targetoptions["_compilation_mode"]
+                == CompilationMode.KERNEL
+            ):
+                kernel_device_ir_module: _KernelModule = self._compile_to_spirv(
+                    cres.library, cres.fndesc, cres.target_context
+                )
+            else:
+                kernel_device_ir_module = None
 
             kcres_attrs = []
 
@@ -185,9 +203,6 @@ class KernelDispatcher(Dispatcher):
     an executable binary, the dispatcher compiles it to SPIR-V and then caches
     that SPIR-V bitcode.
 
-    FIXME: Fix issues identified by pylint with this class.
-    https://github.com/IntelPython/numba-dpex/issues/1196
-
     """
 
     targetdescr = dpex_exp_kernel_target
@@ -282,12 +297,28 @@ def cb_llvm(dur):
             with self._compiling_counter:
                 args, return_type = sigutils.normalize_signature(sig)
 
-                try:
-                    self._compiler.check_queue_equivalence_of_args(
-                        self._kernel_name, args
-                    )
-                except ExecutionQueueInferenceError as eqie:
-                    raise eqie
+                if (
+                    self.targetoptions["_compilation_mode"]
+                    == CompilationMode.KERNEL
+                ):
+                    # Compute follows data based queue equivalence is only
+                    # evaluated for kernel functions whose arguments are
+                    # supposed to be arrays. For device_func decorated
+                    # functions, the arguments can be scalar and we skip queue
+                    # equivalence check.
+                    try:
+                        self._compiler.check_queue_equivalence_of_args(
+                            self._kernel_name, args
+                        )
+                    except ExecutionQueueInferenceError as eqie:
+                        raise eqie
+
+                    # A function being compiled in the KERNEL compilation mode
+                    # cannot have a non-void return value
+                    if return_type and return_type != void:
+                        raise KernelHasReturnValueError(
+                            kernel_name=None, return_type=return_type, sig=sig
+                        )
 
                 # Don't recompile if signature already exists
                 existing = self.overloads.get(tuple(args))
 
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2023 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# SPDX-FileCopyrightText: 2023 Intel Corporation`
	`2`	`+#`
	`3`	`+# SPDX-License-Identifier: Apache-2.0`