|
11 | 11 |
|
12 | 12 | import numba.core.event as ev |
13 | 13 | from numba.core import errors, sigutils, types |
14 | | -from numba.core.compiler import CompileResult |
| 14 | +from numba.core.compiler import CompileResult, Flags |
15 | 15 | from numba.core.compiler_lock import global_compiler_lock |
16 | 16 | from numba.core.dispatcher import Dispatcher, _FunctionCompiler |
17 | 17 | from numba.core.target_extension import dispatcher_registry, target_registry |
| 18 | +from numba.core.types import void |
18 | 19 | from numba.core.typing.typeof import Purpose, typeof |
19 | 20 |
|
20 | 21 | from numba_dpex import config, spirv_generator |
21 | 22 | from numba_dpex.core.exceptions import ( |
22 | 23 | ExecutionQueueInferenceError, |
| 24 | + KernelHasReturnValueError, |
23 | 25 | UnsupportedKernelArgumentError, |
24 | 26 | ) |
25 | 27 | from numba_dpex.core.pipelines import kernel_compiler |
| 28 | +from numba_dpex.core.targets.kernel_target import CompilationMode |
26 | 29 | from numba_dpex.core.types import DpnpNdArray |
27 | 30 |
|
28 | 31 | from .target import DPEX_KERNEL_EXP_TARGET_NAME, dpex_exp_kernel_target |
@@ -81,10 +84,19 @@ def _compile_to_spirv( |
81 | 84 | kernel_fn = kernel_targetctx.prepare_spir_kernel( |
82 | 85 | kernel_func, kernel_fndesc.argtypes |
83 | 86 | ) |
84 | | - |
85 | | - # makes sure that the spir_func is completely inlined into the |
86 | | - # spir_kernel wrapper |
87 | | - kernel_library.optimize_final_module() |
| 87 | + # Get the compiler flags that were passed through the target descriptor |
| 88 | + flags = Flags() |
| 89 | + self.targetdescr.options.parse_as_flags(flags, self.targetoptions) |
| 90 | + |
| 91 | + # If the inline_threshold option was set then set the property in the |
| 92 | + # kernel_library to force inlining ``overload`` calls into a kernel. |
| 93 | + inline_threshold = flags.inline_threshold # pylint: disable=E1101 |
| 94 | + kernel_library.inline_threshold = inline_threshold |
| 95 | + |
| 96 | + # Call finalize on the LLVM module. Finalization will result in |
| 97 | + # all linking libraries getting linked together and final optimization |
| 98 | + # including inlining of functions if an inlining level is specified. |
| 99 | + kernel_library.finalize() |
88 | 100 | # Compiled the LLVM IR to SPIR-V |
89 | 101 | kernel_spirv_module = spirv_generator.llvm_to_spirv( |
90 | 102 | kernel_targetctx, |
@@ -144,9 +156,15 @@ def _compile_cached( |
144 | 156 | try: |
145 | 157 | cres: CompileResult = self._compile_core(args, return_type) |
146 | 158 |
|
147 | | - kernel_device_ir_module = self._compile_to_spirv( |
148 | | - cres.library, cres.fndesc, cres.target_context |
149 | | - ) |
| 159 | + if ( |
| 160 | + self.targetoptions["_compilation_mode"] |
| 161 | + == CompilationMode.KERNEL |
| 162 | + ): |
| 163 | + kernel_device_ir_module: _KernelModule = self._compile_to_spirv( |
| 164 | + cres.library, cres.fndesc, cres.target_context |
| 165 | + ) |
| 166 | + else: |
| 167 | + kernel_device_ir_module = None |
150 | 168 |
|
151 | 169 | kcres_attrs = [] |
152 | 170 |
|
@@ -185,9 +203,6 @@ class KernelDispatcher(Dispatcher): |
185 | 203 | an executable binary, the dispatcher compiles it to SPIR-V and then caches |
186 | 204 | that SPIR-V bitcode. |
187 | 205 |
|
188 | | - FIXME: Fix issues identified by pylint with this class. |
189 | | - https://github.com/IntelPython/numba-dpex/issues/1196 |
190 | | -
|
191 | 206 | """ |
192 | 207 |
|
193 | 208 | targetdescr = dpex_exp_kernel_target |
@@ -282,12 +297,28 @@ def cb_llvm(dur): |
282 | 297 | with self._compiling_counter: |
283 | 298 | args, return_type = sigutils.normalize_signature(sig) |
284 | 299 |
|
285 | | - try: |
286 | | - self._compiler.check_queue_equivalence_of_args( |
287 | | - self._kernel_name, args |
288 | | - ) |
289 | | - except ExecutionQueueInferenceError as eqie: |
290 | | - raise eqie |
| 300 | + if ( |
| 301 | + self.targetoptions["_compilation_mode"] |
| 302 | + == CompilationMode.KERNEL |
| 303 | + ): |
| 304 | + # Compute follows data based queue equivalence is only |
| 305 | + # evaluated for kernel functions whose arguments are |
| 306 | + # supposed to be arrays. For device_func decorated |
| 307 | + # functions, the arguments can be scalar and we skip queue |
| 308 | + # equivalence check. |
| 309 | + try: |
| 310 | + self._compiler.check_queue_equivalence_of_args( |
| 311 | + self._kernel_name, args |
| 312 | + ) |
| 313 | + except ExecutionQueueInferenceError as eqie: |
| 314 | + raise eqie |
| 315 | + |
| 316 | + # A function being compiled in the KERNEL compilation mode |
| 317 | + # cannot have a non-void return value |
| 318 | + if return_type and return_type != void: |
| 319 | + raise KernelHasReturnValueError( |
| 320 | + kernel_name=None, return_type=return_type, sig=sig |
| 321 | + ) |
291 | 322 |
|
292 | 323 | # Don't recompile if signature already exists |
293 | 324 | existing = self.overloads.get(tuple(args)) |
|
0 commit comments