diff --git a/docs/_templates/autoapi/index.rst b/docs/_templates/autoapi/index.rst index 6f37ea6912..f111f90ed9 100644 --- a/docs/_templates/autoapi/index.rst +++ b/docs/_templates/autoapi/index.rst @@ -7,8 +7,8 @@ This page contains auto-generated API reference documentation [#f1]_. :maxdepth: 1 numba_dpex/kernel_api/index - numba_dpex/experimental/decorators/index - numba_dpex/experimental/launcher/index + numba_dpex/core/decorators/index + numba_dpex/core/kernel_launcher/index {% for page in pages %} {% if page.top_level_object and page.display %} diff --git a/docs/source/conf.py b/docs/source/conf.py index 6b053075c3..4276307360 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -115,7 +115,7 @@ autoapi_dirs = [ "../../numba_dpex/kernel_api", - "../../numba_dpex/experimental", + "../../numba_dpex/core", ] autoapi_type = "python" diff --git a/numba_dpex/__init__.py b/numba_dpex/__init__.py index f77566dec4..999a0a6dd9 100644 --- a/numba_dpex/__init__.py +++ b/numba_dpex/__init__.py @@ -108,8 +108,9 @@ def parse_sem_version(version_string: str) -> Tuple[int, int, int]: # backward compatibility from numba_dpex.kernel_api import NdRange, Range # noqa E402 +from .core.decorators import device_func, dpjit, kernel # noqa E402 +from .core.kernel_launcher import call_kernel, call_kernel_async # noqa E402 from .core.targets import dpjit_target # noqa E402 -from .decorators import dpjit, func, kernel # noqa E402 from .ocl.stubs import ( # noqa E402 GLOBAL_MEM_FENCE, LOCAL_MEM_FENCE, @@ -137,4 +138,11 @@ def parse_sem_version(version_string: str) -> Tuple[int, int, int]: __version__ = get_versions()["version"] del get_versions -__all__ = types.__all__ + ["Range", "NdRange", "call_kernel"] +__all__ = types.__all__ + [ + "call_kernel", + "device_func", + "dpjit", + "kernel", + "Range", + "NdRange", +] diff --git a/numba_dpex/core/decorators.py b/numba_dpex/core/decorators.py new file mode 100644 index 0000000000..3779526b9f --- /dev/null +++ b/numba_dpex/core/decorators.py @@ -0,0 +1,350 @@ +# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +import inspect +from warnings import warn + +from numba.core import decorators, sigutils, typeinfer +from numba.core.target_extension import ( + jit_registry, + resolve_dispatcher_from_str, + target_registry, +) + +from numba_dpex.core import config +from numba_dpex.core.pipelines.dpjit_compiler import get_compiler +from numba_dpex.core.targets.dpjit_target import DPEX_TARGET_NAME +from numba_dpex.experimental.target import DPEX_KERNEL_EXP_TARGET_NAME +from numba_dpex.kernel_api_impl.spirv.dispatcher import SPIRVKernelDispatcher +from numba_dpex.kernel_api_impl.spirv.target import CompilationMode + + +def _parse_func_or_sig(signature_or_function): + # Handle signature (borrowed from numba). swapped signature and list check + if signature_or_function is None: + # No signature, no function + pyfunc = None + sigs = [] + elif sigutils.is_signature(signature_or_function): + # A single signature is passed + pyfunc = None + sigs = [signature_or_function] + elif isinstance(signature_or_function, list): + # A list of signatures is passed + pyfunc = None + sigs = signature_or_function + else: + # A function is passed + pyfunc = signature_or_function + sigs = [] + + return pyfunc, sigs + + +def kernel(function_or_signature=None, **options): + """A decorator to compile a function written using :py:mod:`numba_dpex.kernel_api`. + + The ``kernel`` decorator triggers the compilation of a function written + using the data-parallel kernel programming API exposed by + :py:mod:`numba_dpex.kernel_api`. Such a function is conceptually + equivalent to a kernel function written in the C++ SYCL eDSL. The + decorator will compile the function based on the types of the arguments + to a SPIR-V binary that can be executed either on OpenCL CPU, GPU + devices or Intel Level Zero GPU devices. + + Any function to be compilable using the kernel decorator should + adhere to the following semantic rules: + + - The first argument to the function should be either an instance of the + :class:`numba_dpex.kernel_api.Item` class or an instance of the + :class:`numba_dpex.kernel_api.NdItem`. + + - The function should not return any value. + + - The function should have at least one array type argument that can + either be an instance of ``dpnp.ndarray`` or an instance of + ``dpctl.tensor.usm_ndarray``. + + + Args: + signature_or_function (optional): An optional signature or list of + signatures for which a function is to be compiled. Passing in a + signature "specializes" the decorated function and no other versions + of the function will be compiled. A function can also be + directly passed instead of a signature and the signature will get + inferred from the function. The actual compilation happens on every + invocation of the :func:`numba_dpex.experimental.call_kernel` + function where the decorated function is passed in as an argument + along with the argument values for the decorated function. + options (optional): + - **debug** (bool): Whether the compilation should happen in debug + mode. *(Default = False)* + - **inline_threshold** (int): Specifies the level of inlining that + the compiler should attempt. *(Default = 2)* + Returns: + An instance of + :class:`numba_dpex.kernel_api_impl.spirv.dispatcher.KernelDispatcher`. + The ``KernelDispatcher`` object compiles the decorated function when + passed in to :func:`numba_dpex.experimental.call_kernel`. + + Examples: + + 1. Decorate a function and pass it to ``call_kernel`` for compilation and + execution. + + .. code-block:: python + + import dpnp + import numba_dpex as dpex + from numba_dpex import kernel_api as kapi + + + # Data parallel kernel implementing vector sum + @dpex.kernel + def vecadd(item: kapi.Item, a, b, c): + i = item.get_id(0) + c[i] = a[i] + b[i] + + + N = 1024 + a = dpnp.ones(N) + b = dpnp.ones_like(a) + c = dpnp.zeros_like(a) + dpex.call_kernel(vecadd, kapi.Range(N), a, b, c) + + 2. Specializes a kernel and then compiles it directly before executing it + via ``call_kernel``. The kernel is specialized to expect a 1-D + ``dpnp.ndarray`` with either ``float32`` type elements or ``int64`` type + elements. + + .. code-block:: python + + import dpnp + import numba_dpex as dpex + from numba_dpex import kernel_api as kapi + from numba_dpex import DpnpNdArray, float32, int64 + from numba_dpex.core.types.kernel_api.index_space_ids import ItemType + + i64arrty = DpnpNdArray(ndim=1, dtype=int64, layout="C") + f32arrty = DpnpNdArray(ndim=1, dtype=float32, layout="C") + item_ty = ItemType(ndim=1) + + specialized_kernel = dpex.kernel( + [ + (item_ty, i64arrty, i64arrty, i64arrty), + (item_ty, f32arrty, f32arrty, f32arrty), + ] + ) + + + def vecadd(item: kapi.Item, a, b, c): + i = item.get_id(0) + c[i] = a[i] + b[i] + + + # Compile all specializations for vecadd + precompiled_kernels = specialized_kernel(vecadd) + N = 1024 + a = dpnp.ones(N, dtype=dpnp.int64) + b = dpnp.ones_like(a) + c = dpnp.zeros_like(a) + # Call a specific pre-compiled version of vecadd + dpex.call_kernel(precompiled_kernels, kapi.Range(N), a, b, c) + + """ + + # dispatcher is a type: + # + dispatcher = resolve_dispatcher_from_str(DPEX_KERNEL_EXP_TARGET_NAME) + if "_compilation_mode" in options: + user_compilation_mode = options["_compilation_mode"] + warn( + "_compilation_mode is an internal flag that should not be set " + "in the decorator. The decorator defined option " + f"{user_compilation_mode} is going to be ignored." + ) + options["_compilation_mode"] = CompilationMode.KERNEL + + # TODO: The options need to be evaluated and checked here like it is + # done in numba.core.decorators.jit + + func, sigs = _parse_func_or_sig(function_or_signature) + for sig in sigs: + if isinstance(sig, str): + raise NotImplementedError( + "Specifying signatures as string is not yet supported" + ) + + def _kernel_dispatcher(pyfunc): + disp: SPIRVKernelDispatcher = dispatcher( + pyfunc=pyfunc, + targetoptions=options, + ) + + if len(sigs) > 0: + with typeinfer.register_dispatcher(disp): + for sig in sigs: + disp.compile(sig) + disp.disable_compile() + + return disp + + if func is None: + return _kernel_dispatcher + + if not inspect.isfunction(func): + raise ValueError( + "Argument passed to the kernel decorator is neither a " + "function object, nor a signature. If you are trying to " + "specialize the kernel that takes a single argument, specify " + "the return type as None explicitly." + ) + return _kernel_dispatcher(func) + + +def device_func(function_or_signature=None, **options): + """Compiles a device-callable function that can be only invoked from a kernel. + + The decorator is used to express auxiliary device-only functions that can + be called from a kernel or another device function, but are not callable + from the host. This decorator :func:`numba_dpex.experimental.device_func` + has no direct analogue in SYCL and primarily is provided to help programmers + make their kapi applications modular. + + A ``device_func`` decorated function does not require the first argument to + be a :class:`numba_dpex.kernel_api.Item` object or a + :class:`numba_dpex.kernel_api.NdItem` object, and unlike a ``kernel`` + decorated function is allowed to return any value. + All :py:mod:`numba_dpex.kernel_api` functionality can be used in a + ``device_func`` decorated function. + + The decorator is also used to compile overloads in the ``DpexKernelTarget``. + + A ``device_func`` decorated function is not compiled down to device binary + and instead is compiled down to LLVM IR. Final compilation to binary happens + when the function is invoked from a ``kernel`` decorated function. The + compilation happens this was to allow a ``device_func`` decorated function + to be internally linked into the kernel module at the LLVM level, leading to + more optimization opportunities. + + Args: + signature_or_function (optional): An optional signature or list of + signatures for which a function is to be compiled. Passing in a + signature "specializes" the decorated function and no other versions + of the function will be compiled. A function can also be + directly passed instead of a signature and the signature will get + inferred from the function. The actual compilation happens on every + invocation of the decorated function from another ``device_func`` or + ``kernel`` decorated function. + options (optional): + - **debug** (bool): Whether the compilation should happen in debug + mode. *(Default = False)* + - **inline_threshold** (int): Specifies the level of inlining that + the compiler should attempt. *(Default = 2)* + + Returns: + An instance of + :class:`numba_dpex.kernel_api_impl.spirv.dispatcher.KernelDispatcher`. + The ``KernelDispatcher`` object compiles the decorated function when + it is called from another function. + + + Example: + + .. code-block:: python + + import dpnp + + from numba_dpex import experimental as dpex_exp + from numba_dpex import kernel_api as kapi + + + @dpex_exp.device_func + def increment_value(nd_item: NdItem, a): + i = nd_item.get_global_id(0) + + a[i] += 1 + group_barrier(nd_item.get_group(), MemoryScope.DEVICE) + + if i == 0: + for idx in range(1, a.size): + a[0] += a[idx] + + + @dpex_exp.kernel + def another_kernel(nd_item: NdItem, a): + increment_value(nd_item, a) + + + N = 16 + b = dpnp.ones(N, dtype=dpnp.int32) + + dpex_exp.call_kernel(another_kernel, dpex.NdRange((N,), (N,)), b) + """ + dispatcher = resolve_dispatcher_from_str(DPEX_KERNEL_EXP_TARGET_NAME) + + if "_compilation_mode" in options: + user_compilation_mode = options["_compilation_mode"] + warn( + "_compilation_mode is an internal flag that should not be set " + "in the decorator. The decorator defined option " + f"{user_compilation_mode} is going to be ignored." + ) + options["_compilation_mode"] = CompilationMode.DEVICE_FUNC + + func, sigs = _parse_func_or_sig(function_or_signature) + for sig in sigs: + if isinstance(sig, str): + raise NotImplementedError( + "Specifying signatures as string is not yet supported" + ) + + def _kernel_dispatcher(pyfunc): + disp: SPIRVKernelDispatcher = dispatcher( + pyfunc=pyfunc, + targetoptions=options, + ) + + if len(sigs) > 0: + with typeinfer.register_dispatcher(disp): + for sig in sigs: + disp.compile(sig) + disp.disable_compile() + + return disp + + if func is None: + return _kernel_dispatcher + + return _kernel_dispatcher(function_or_signature) + + +# ----------------- Experimental dpjit decorator ------------------------------# + + +def dpjit(*args, **kws): + if "nopython" in kws and kws["nopython"] is not True: + warn("nopython is set for dpjit and is ignored", RuntimeWarning) + if "forceobj" in kws: + warn("forceobj is set for dpjit and is ignored", RuntimeWarning) + del kws["forceobj"] + if "pipeline_class" in kws: + warn("pipeline class is set for dpjit and is ignored", RuntimeWarning) + del kws["pipeline_class"] + + use_mlir = kws.pop("use_mlir", bool(config.USE_MLIR)) + + kws.update({"nopython": True}) + kws.update({"parallel": True}) + kws.update({"pipeline_class": get_compiler(use_mlir)}) + + kws.update({"_target": DPEX_TARGET_NAME}) + + return decorators.jit(*args, **kws) + + +# add it to the decorator registry, this is so e.g. @overload can look up a +# JIT function to do the compilation work. +jit_registry[target_registry[DPEX_TARGET_NAME]] = dpjit +jit_registry[target_registry[DPEX_KERNEL_EXP_TARGET_NAME]] = device_func diff --git a/numba_dpex/experimental/launcher.py b/numba_dpex/core/kernel_launcher.py similarity index 100% rename from numba_dpex/experimental/launcher.py rename to numba_dpex/core/kernel_launcher.py diff --git a/numba_dpex/core/parfors/kernel_builder.py b/numba_dpex/core/parfors/kernel_builder.py index 588a617684..b43b5fa31c 100644 --- a/numba_dpex/core/parfors/kernel_builder.py +++ b/numba_dpex/core/parfors/kernel_builder.py @@ -26,9 +26,10 @@ import numba_dpex as dpex from numba_dpex.core import config +from numba_dpex.core.kernel_interface.spirv_kernel import SpirvKernel from ..descriptor import dpex_kernel_target -from ..types import DpnpNdArray, USMNdArray +from ..types import DpnpNdArray from ..utils.kernel_templates import RangeKernelTemplate @@ -66,9 +67,7 @@ def _compile_kernel_parfor( sycl_queue, kernel_name, func_ir, argtypes, debug=False ): # Create a SPIRVKernel object - kernel = dpex.core.kernel_interface.spirv_kernel.SpirvKernel( - func_ir, kernel_name - ) + kernel = SpirvKernel(func_ir, kernel_name) # compile the kernel kernel.compile( diff --git a/numba_dpex/core/utils/kernel_launcher.py b/numba_dpex/core/utils/kernel_launcher.py index 7c48589d49..8bc181d1ab 100644 --- a/numba_dpex/core/utils/kernel_launcher.py +++ b/numba_dpex/core/utils/kernel_launcher.py @@ -328,8 +328,6 @@ def set_kernel_from_spirv( "unexpected behavior" ) - print(build_kernel_options) - if build_kernel_options != "": spv_compiler_options = self.context.insert_const_string( self.builder.module, build_kernel_options diff --git a/numba_dpex/decorators.py b/numba_dpex/decorators.py deleted file mode 100644 index 97e1ed8028..0000000000 --- a/numba_dpex/decorators.py +++ /dev/null @@ -1,171 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -import inspect -import warnings - -from numba.core import decorators, sigutils -from numba.core.target_extension import jit_registry, target_registry - -from numba_dpex.core import config -from numba_dpex.core.kernel_interface.dispatcher import JitKernel -from numba_dpex.core.kernel_interface.func import ( - compile_func, - compile_func_template, -) -from numba_dpex.core.pipelines.dpjit_compiler import get_compiler -from numba_dpex.core.targets.dpjit_target import DPEX_TARGET_NAME - - -def kernel( - func_or_sig=None, - debug=False, - enable_cache=True, -): - """A decorator to define a kernel function. - - A kernel function is conceptually equivalent to a SYCL kernel function, and - gets compiled into either an OpenCL or a LevelZero SPIR-V binary kernel. - A kernel decorated Python function has the following restrictions: - - * The function can not return any value. - * All array arguments passed to a kernel should adhere to compute - follows data programming model. - """ - - def _kernel_dispatcher(pyfunc, sigs=None): - return JitKernel( - pyfunc=pyfunc, - debug_flags=debug, - enable_cache=enable_cache, - specialization_sigs=sigs, - ) - - if func_or_sig is None: - return _kernel_dispatcher - elif isinstance(func_or_sig, str): - raise NotImplementedError( - "Specifying signatures as string is not yet supported by numba-dpex" - ) - elif isinstance(func_or_sig, list) or sigutils.is_signature(func_or_sig): - # String signatures are not supported as passing usm_ndarray type as - # a string is not possible. Numba's sigutils relies on the type being - # available in Numba's `types.__dict__` and dpex types are not registered - # there yet. - if isinstance(func_or_sig, list): - for sig in func_or_sig: - if isinstance(sig, str): - raise NotImplementedError( - "Specifying signatures as string is not yet supported " - "by numba-dpex" - ) - # Specialized signatures can either be a single signature or a list. - # In case only one signature is provided convert it to a list - if not isinstance(func_or_sig, list): - func_or_sig = [func_or_sig] - - def _specialized_kernel_dispatcher(pyfunc): - return JitKernel( - pyfunc=pyfunc, - debug_flags=debug, - enable_cache=enable_cache, - specialization_sigs=func_or_sig, - ) - - return _specialized_kernel_dispatcher - else: - func = func_or_sig - if not inspect.isfunction(func): - raise ValueError( - "Argument passed to the kernel decorator is neither a " - "function object, nor a signature. If you are trying to " - "specialize the kernel that takes a single argument, specify " - "the return type as void explicitly." - ) - return _kernel_dispatcher(func) - - -def func(func_or_sig=None, debug=False, enable_cache=True): - """A decorator to define a kernel device function. - - Device functions are functions that can be only invoked from a kernel - and not from a host function. This provides a special decorator - `numba_dpex.func` specifically to implement a device function. - - A device function can be invoked from another device function and - unlike a kernel function, a device function can return a value like - normal functions. - """ - - def _func_autojit(pyfunc): - return compile_func_template( - pyfunc, debug=debug, enable_cache=enable_cache - ) - - if func_or_sig is None: - return _func_autojit - elif isinstance(func_or_sig, str): - raise NotImplementedError( - "Specifying signatures as string is not yet supported by numba-dpex" - ) - elif isinstance(func_or_sig, list) or sigutils.is_signature(func_or_sig): - # String signatures are not supported as passing usm_ndarray type as - # a string is not possible. Numba's sigutils relies on the type being - # available in Numba's types.__dict__ and dpex types are not registered - # there yet. - if isinstance(func_or_sig, list): - for sig in func_or_sig: - if isinstance(sig, str): - raise NotImplementedError( - "Specifying signatures as string is not yet supported " - "by numba-dpex" - ) - # Specialized signatures can either be a single signature or a list. - # In case only one signature is provided convert it to a list - if not isinstance(func_or_sig, list): - func_or_sig = [func_or_sig] - - def _wrapped(pyfunc): - return compile_func(pyfunc, func_or_sig, debug=debug) - - return _wrapped - else: - # no signature - func = func_or_sig - return _func_autojit(func) - - -# ----------------- Experimental dpjit decorator ------------------------------# - - -def dpjit(*args, **kws): - if "nopython" in kws and kws["nopython"] is not True: - warnings.warn( - "nopython is set for dpjit and is ignored", RuntimeWarning - ) - if "forceobj" in kws: - warnings.warn( - "forceobj is set for dpjit and is ignored", RuntimeWarning - ) - del kws["forceobj"] - if "pipeline_class" in kws: - warnings.warn( - "pipeline class is set for dpjit and is ignored", RuntimeWarning - ) - del kws["pipeline_class"] - - use_mlir = kws.pop("use_mlir", bool(config.USE_MLIR)) - - kws.update({"nopython": True}) - kws.update({"parallel": True}) - kws.update({"pipeline_class": get_compiler(use_mlir)}) - - kws.update({"_target": DPEX_TARGET_NAME}) - - return decorators.jit(*args, **kws) - - -# add it to the decorator registry, this is so e.g. @overload can look up a -# JIT function to do the compilation work. -jit_registry[target_registry[DPEX_TARGET_NAME]] = dpjit diff --git a/numba_dpex/dpnp_iface/_intrinsic.py b/numba_dpex/dpnp_iface/_intrinsic.py index d37402079b..0e402331f0 100644 --- a/numba_dpex/dpnp_iface/_intrinsic.py +++ b/numba_dpex/dpnp_iface/_intrinsic.py @@ -281,7 +281,7 @@ def _empty_nd_impl(context, builder, arrtype, shapes, queue_ref): types.uint64, types.voidptr, ) - from numba_dpex.decorators import dpjit + from numba_dpex.core.decorators import dpjit op = dpjit(_call_usm_allocator) fnop = context.typing_context.resolve_value_type(op) diff --git a/numba_dpex/examples/debug/dpex_func.py b/numba_dpex/examples/debug/dpex_func.py index 321eaeb004..f7573f3c51 100644 --- a/numba_dpex/examples/debug/dpex_func.py +++ b/numba_dpex/examples/debug/dpex_func.py @@ -2,21 +2,20 @@ # # SPDX-License-Identifier: Apache-2.0 -import dpctl import dpnp as np import numba_dpex as ndpx -@ndpx.func(debug=True) +@ndpx.device_func(debug=True) def func_sum(a_in_func, b_in_func): result = a_in_func + b_in_func return result @ndpx.kernel(debug=True) -def kernel_sum(a_in_kernel, b_in_kernel, c_in_kernel): - i = ndpx.get_global_id(0) +def kernel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel): + i = item.get_id(0) c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) @@ -24,7 +23,7 @@ def driver(a, b, c, global_size): print("a = ", a) print("b = ", b) print("c = ", c) - kernel_sum[ndpx.Range(global_size)](a, b, c) + ndpx.call_kernel(kernel_sum, ndpx.Range(global_size), a, b, c) print("a + b = ", c) diff --git a/numba_dpex/examples/debug/side-by-side-2.py b/numba_dpex/examples/debug/side-by-side-2.py index 15301b7a3e..3295ca1d3a 100644 --- a/numba_dpex/examples/debug/side-by-side-2.py +++ b/numba_dpex/examples/debug/side-by-side-2.py @@ -4,7 +4,6 @@ import argparse -import dpctl import dpnp import numba import numpy as np @@ -57,17 +56,17 @@ def numba_func_driver(a, b, c): def ndpx_func_driver(a, b, c): - kernel[ndpx.Range(len(c))](a, b, c) + ndpx.call_kernel(_kernel, ndpx.Range(len(c)), a, b, c) @ndpx.kernel(debug=True) -def kernel(a_in_kernel, b_in_kernel, c_in_kernel): - i = ndpx.get_global_id(0) +def _kernel(item, a_in_kernel, b_in_kernel, c_in_kernel): + i = item.get_id(0) c_in_kernel[i] = ndpx_loop_body(i, a_in_kernel, b_in_kernel) numba_loop_body = numba.njit(debug=True)(common_loop_body) -ndpx_loop_body = ndpx.func(debug=True)(common_loop_body) +ndpx_loop_body = ndpx.device_func(debug=True)(common_loop_body) def main(): diff --git a/numba_dpex/examples/debug/side-by-side.py b/numba_dpex/examples/debug/side-by-side.py index 0c26d5be71..972b05844e 100644 --- a/numba_dpex/examples/debug/side-by-side.py +++ b/numba_dpex/examples/debug/side-by-side.py @@ -4,7 +4,6 @@ import argparse -import dpctl import dpnp import numba import numpy as np @@ -55,17 +54,17 @@ def numba_func_driver(a, b, c): def ndpx_func_driver(a, b, c): - kernel[ndpx.Range(len(c))](a, b, c) + ndpx.call_kernel(_kernel, ndpx.Range(len(c)), a, b, c) @ndpx.kernel(debug=True) -def kernel(a_in_kernel, b_in_kernel, c_in_kernel): - i = ndpx.get_global_id(0) +def _kernel(item, a_in_kernel, b_in_kernel, c_in_kernel): + i = item.get_id(0) c_in_kernel[i] = ndpx_loop_body(a_in_kernel[i], b_in_kernel[i]) numba_loop_body = numba.njit(debug=True)(common_loop_body) -ndpx_loop_body = ndpx.func(debug=True)(common_loop_body) +ndpx_loop_body = ndpx.device_func(debug=True)(common_loop_body) def main(): diff --git a/numba_dpex/examples/debug/simple_dpex_func.py b/numba_dpex/examples/debug/simple_dpex_func.py index a7e2c241cb..e16cc1224b 100644 --- a/numba_dpex/examples/debug/simple_dpex_func.py +++ b/numba_dpex/examples/debug/simple_dpex_func.py @@ -7,15 +7,15 @@ import numba_dpex as ndpx -@ndpx.func(debug=True) +@ndpx.device_func(debug=True) def func_sum(a_in_func, b_in_func): result = a_in_func + b_in_func # breakpoint location return result @ndpx.kernel(debug=True) -def kernel_sum(a_in_kernel, b_in_kernel, c_in_kernel): - i = ndpx.get_global_id(0) +def kernel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel): + i = item.get_id(0) c_in_kernel[i] = func_sum(a_in_kernel[i], b_in_kernel[i]) @@ -24,6 +24,6 @@ def kernel_sum(a_in_kernel, b_in_kernel, c_in_kernel): b = np.arange(global_size, dtype=np.float32) c = np.empty_like(a) -kernel_sum[ndpx.Range(global_size)](a, b, c) +ndpx.call_kernel(kernel_sum, ndpx.Range(global_size), a, b, c) print("Done...") diff --git a/numba_dpex/examples/debug/simple_sum.py b/numba_dpex/examples/debug/simple_sum.py index 0bfa88396f..b676b927fe 100644 --- a/numba_dpex/examples/debug/simple_sum.py +++ b/numba_dpex/examples/debug/simple_sum.py @@ -8,8 +8,8 @@ @ndpx.kernel(debug=True) -def data_parallel_sum(a, b, c): - i = ndpx.get_global_id(0) +def data_parallel_sum(item, a, b, c): + i = item.get_id(0) c[i] = a[i] + b[i] # Condition breakpoint location @@ -20,6 +20,6 @@ def data_parallel_sum(a, b, c): b = np.array(np.random.random(N), dtype=np.float32) c = np.ones_like(a) -data_parallel_sum[ndpx.Range(global_size)](a, b, c) +ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) print("Done...") diff --git a/numba_dpex/examples/debug/sum.py b/numba_dpex/examples/debug/sum.py index 9974a65a96..9bed988eab 100644 --- a/numba_dpex/examples/debug/sum.py +++ b/numba_dpex/examples/debug/sum.py @@ -8,8 +8,8 @@ @ndpx.kernel(debug=True) -def data_parallel_sum(a_in_kernel, b_in_kernel, c_in_kernel): - i = ndpx.get_global_id(0) # numba-kernel-breakpoint +def data_parallel_sum(item, a_in_kernel, b_in_kernel, c_in_kernel): + i = item.get_id(0) # numba-kernel-breakpoint l1 = a_in_kernel[i] # second-line l2 = b_in_kernel[i] # third-line c_in_kernel[i] = l1 + l2 # fourth-line @@ -19,7 +19,7 @@ def driver(a, b, c, global_size): print("before : ", a) print("before : ", b) print("before : ", c) - data_parallel_sum[ndpx.Range(global_size)](a, b, c) + ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) print("after : ", c) diff --git a/numba_dpex/examples/debug/sum_local_vars.py b/numba_dpex/examples/debug/sum_local_vars.py index 537aab40ce..0440e6857e 100644 --- a/numba_dpex/examples/debug/sum_local_vars.py +++ b/numba_dpex/examples/debug/sum_local_vars.py @@ -2,14 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 -import dpctl import dpnp as np import numba_dpex as ndpx @ndpx.kernel(debug=True) -def data_parallel_sum(a, b, c): +def data_parallel_sum(item, a, b, c): i = ndpx.get_global_id(0) l1 = a[i] + 2.5 l2 = b[i] * 0.3 @@ -23,6 +22,6 @@ def data_parallel_sum(a, b, c): b = np.array(np.random.random(N), dtype=np.float32) c = np.ones_like(a) -data_parallel_sum[ndpx.Range(global_size)](a, b, c) +ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) print("Done...") diff --git a/numba_dpex/examples/debug/sum_local_vars_revive.py b/numba_dpex/examples/debug/sum_local_vars_revive.py index 11b53d99f3..47e4ad3a8e 100644 --- a/numba_dpex/examples/debug/sum_local_vars_revive.py +++ b/numba_dpex/examples/debug/sum_local_vars_revive.py @@ -7,14 +7,14 @@ import numba_dpex as ndpx -@ndpx.func +@ndpx.device_func def revive(x): return x @ndpx.kernel(debug=True) -def data_parallel_sum(a, b, c): - i = ndpx.get_global_id(0) +def data_parallel_sum(item, a, b, c): + i = item.get_id(0) l1 = a[i] + 2.5 l2 = b[i] * 0.3 c[i] = l1 + l2 @@ -28,6 +28,6 @@ def data_parallel_sum(a, b, c): b = np.array(np.random.random(N), dtype=np.float32) c = np.ones_like(a) -data_parallel_sum[ndpx.Range(global_size)](a, b, c) +ndpx.call_kernel(data_parallel_sum, ndpx.Range(global_size), a, b, c) print("Done...") diff --git a/numba_dpex/examples/blacksholes_njit.py b/numba_dpex/examples/dpjit/blacksholes_njit.py similarity index 100% rename from numba_dpex/examples/blacksholes_njit.py rename to numba_dpex/examples/dpjit/blacksholes_njit.py diff --git a/numba_dpex/examples/kernel/atomic_op.py b/numba_dpex/examples/kernel/atomic_op.py index f0e22aa6aa..59ffe18a7e 100644 --- a/numba_dpex/examples/kernel/atomic_op.py +++ b/numba_dpex/examples/kernel/atomic_op.py @@ -2,36 +2,45 @@ # # SPDX-License-Identifier: Apache-2.0 -import dpnp as np +"""The example demonstrates the use of :class:`numba_dpex.kernel_api.AtomicRef`. -import numba_dpex as ndpx +The kernel shows the implementation of a reduction operation in numba-dpex +where every work-item is updating a global accumulator atomically. +""" +import dpnp +import numba_dpex as dpex +from numba_dpex import kernel_api as kapi -@ndpx.kernel -def atomic_reduction(a, res): - """Summarize all the items in a and writes it into res using atomic.add. - :param a: array of values to get sum - :param res: result where to add all the items from a array. It must be preset to 0. +@dpex.kernel +def atomic_reduction(item: kapi.Item, a, res): + """Array reduction using :func:`AtomicRef.fetch_add`. + + Args: + item (kapi.Item): Index space id for each work item. + a (dpnp.ndarray): An 1-d array to be reduced. + res (dpnp.ndarray): A single element array into which the result is + accumulated. """ - idx = ndpx.get_global_id(0) - ndpx.atomic.add(res, 0, a[idx]) + idx = item.get_id(0) + acc = kapi.AtomicRef(res, 0) + acc.fetch_add(a[idx]) def main(): - N = 10 + N = 1024 - # We are storing sum to the first element - a = np.arange(0, N) - res = np.zeros(1, dtype=a.dtype) + a = dpnp.arange(0, N) + res = dpnp.zeros(1, dtype=a.dtype) - print("Using device ...") - print(a.device) + print("Executing on device:") + a.device.print_device_info() - atomic_reduction[ndpx.Range(N)](a, res) - print("Reduction sum =", res[0]) + dpex.call_kernel(atomic_reduction, dpex.Range(N), a, res) + print(f"Summation of {N} integers = {res[0]}") - print("Done...") + assert res[0] == N * (N - 1) / 2 if __name__ == "__main__": diff --git a/numba_dpex/examples/kernel/black_scholes.py b/numba_dpex/examples/kernel/black_scholes.py index b139371d87..37bcda9d06 100644 --- a/numba_dpex/examples/kernel/black_scholes.py +++ b/numba_dpex/examples/kernel/black_scholes.py @@ -1,13 +1,22 @@ # SPDX-FileCopyrightText: 2022 - 2024 Intel Corporation # # SPDX-License-Identifier: Apache 2.0 -# SPDX-License-Identifier: Apache-2.0 + +"""The example shows the implementation of the Black-Scholes formula as a range kernel. + +The Black-Scholes model is a mathematical model for derivatives trading with +various underlying assumptions. The example shown here is a simplified +representation of the actual model. + +Refer: https://en.wikipedia.org/wiki/Black%E2%80%93Scholes_model +""" from math import erf, exp, log, sqrt import dpnp as np -import numba_dpex as ndpx +import numba_dpex as dpex +from numba_dpex import kernel_api as kapi # Stock price range S0L = 10.0 @@ -47,14 +56,17 @@ def initialize(): return price, strike, t, rate, volatility, call, put -@ndpx.kernel -def kernel_black_scholes(price, strike, t, rate, volatility, call, put): +@dpex.kernel +def kernel_black_scholes( + item: kapi.Item, price, strike, t, rate, volatility, call, put +): + """A range kernel implementing a simplified Black-Scholes model.""" # Scalars mr = -rate sig_sig_two = volatility * volatility * 2.0 # Current index - i = ndpx.get_global_id(0) + i = item.get_id(0) # Get inputs into private memory p = price[i] @@ -86,11 +98,19 @@ def kernel_black_scholes(price, strike, t, rate, volatility, call, put): def main(): price, strike, t, rate, volatility, call, put = initialize() - print("Using device ...") - print(price.device) - - kernel_black_scholes[ndpx.Range(NOPT)]( - price, strike, t, rate, volatility, call, put + print("Executing on device:") + price.device.print_device_info() + + dpex.call_kernel( + kernel_black_scholes, + dpex.Range(NOPT), + price, + strike, + t, + rate, + volatility, + call, + put, ) print("Call:", call) diff --git a/numba_dpex/examples/kernel/device_func.py b/numba_dpex/examples/kernel/device_func.py index c178999157..576a1ce9a2 100644 --- a/numba_dpex/examples/kernel/device_func.py +++ b/numba_dpex/examples/kernel/device_func.py @@ -2,148 +2,64 @@ # # SPDX-License-Identifier: Apache-2.0 -import dpnp as np +"""Demonstrates the usage of the :func:`numba_dpex.device_func` decorator. -import numba_dpex as ndpx -from numba_dpex import float32, int32, int64 +Refer the API documentation and the Kenrel programming guide for further +details. +""" -# Array size -N = 10 +import dpnp - -# A device callable function that can be invoked from -# ``kernel`` and other device functions -@ndpx.func -def a_device_function(a): - return a + 1 +import numba_dpex as dpex +from numba_dpex import kernel_api as kapi -# A device callable function with signature that can be invoked -# from ``kernel`` and other device functions -@ndpx.func(int32(int32)) -def a_device_function_int32(a): +@dpex.device_func +def increment_by_1(a): + """A device callable function that can be invoked from a kernel or + another device function. + """ return a + 1 -# A device callable function with list signature that can be invoked -# from ``kernel`` and other device functions -@ndpx.func([int32(int32), float32(float32)]) -def a_device_function_int32_float32(a): - return a + 1 - - -# A device callable function can call another device function -@ndpx.func -def another_device_function(a): - return a_device_function(a * 2) - - -# A kernel function that calls the device function -@ndpx.kernel -def a_kernel_function(a, b): - i = ndpx.get_global_id(0) - b[i] = another_device_function(a[i]) - - -# A kernel function that calls the device function -@ndpx.kernel -def a_kernel_function_int32(a, b): - i = ndpx.get_global_id(0) - b[i] = a_device_function_int32(a[i]) - - -# A kernel function that calls the device function -@ndpx.kernel -def a_kernel_function_int32_float32(a, b): - i = ndpx.get_global_id(0) - b[i] = a_device_function_int32_float32(a[i]) - - -# test function 1: tests basic -def test1(): - a = np.ones(N) - b = np.ones(N) - - print("Using device ...") - print(a.device) - - print("A=", a) - try: - a_kernel_function[ndpx.Range(N)](a, b) - except Exception as err: - print(err) - print("B=", b) - - print("Done...") - - -# test function 2: test device func with signature -def test2(): - a = np.ones(N, dtype=np.int32) - b = np.ones(N, dtype=np.int32) - - print("Using device ...") - print(a.device) - - print("A=", a) - try: - a_kernel_function_int32[ndpx.Range(N)](a, b) - except Exception as err: - print(err) - print("B=", b) - - print("Done...") - - -# test function 3: test device function with list signature -def test3(): - a = np.ones(N, dtype=np.int32) - b = np.ones(N, dtype=np.int32) - - print("Using device ...") - print(a.device) - - print("A=", a) - try: - a_kernel_function_int32_float32[ndpx.Range(N)](a, b) - except Exception as err: - print(err) - print("B=", b) - - # with a different dtype - a = np.ones(N, dtype=np.float32) - b = np.ones(N, dtype=np.float32) +@dpex.device_func +def increment_and_sum_up(nd_item: kapi.NdItem, a): + """Demonstrates the usage of group_barrier and NdItem usage in a + device_func. + """ + i = nd_item.get_global_id(0) - print("Using device ...") - print(a.device) + a[i] += 1 + kapi.group_barrier(nd_item.get_group(), kapi.MemoryScope.DEVICE) - print("A=", a) - try: - a_kernel_function_int32_float32[ndpx.Range(N)](a, b) - except Exception as err: - print(err) - print("B=", b) + if i == 0: + for idx in range(1, a.size): + a[0] += a[idx] - # this will fail, since int64 is not in - # the signature list: [int32(int32), float32(float32)] - a = np.ones(N, dtype=np.int64) - b = np.ones(N, dtype=np.int64) - print("Using device ...") - print(a.device) +@dpex.kernel +def kernel1(item: kapi.Item, a, b): + """Demonstrates calling a device function from a kernel.""" + i = item.get_id(0) + b[i] = increment_by_1(a[i]) - print("A=", a) - try: - a_kernel_function_int32_float32[ndpx.Range(N)](a, b) - except Exception as err: - print(err) - print("B=", b) - print("Done...") +@dpex.kernel +def kernel2(nd_item: kapi.NdItem, a): + """The kernel delegates everything to a device_func and calls it.""" + increment_and_sum_up(nd_item, a) -# main function if __name__ == "__main__": - test1() - test2() - test3() + # Array size + N = 100 + a = dpnp.ones(N, dtype=dpnp.int32) + b = dpnp.zeros(N, dtype=dpnp.int32) + + dpex.call_kernel(kernel1, dpex.Range(N), a, b) + # b should be [2, 2, ...., 2] + print(b) + + dpex.call_kernel(kernel2, dpex.NdRange((N,), (N,)), b) + # b[0] should be 300 + print(b[0]) diff --git a/numba_dpex/examples/kernel/interpolation.py b/numba_dpex/examples/kernel/interpolation.py index cd023c69b6..ab3f5f261e 100644 --- a/numba_dpex/examples/kernel/interpolation.py +++ b/numba_dpex/examples/kernel/interpolation.py @@ -2,11 +2,17 @@ # # SPDX-License-Identifier: Apache-2.0 +"""Demonstrates natural cubic spline implemented as an nd-range kernel. + +Refer: https://en.wikipedia.org/wiki/Spline_interpolation +""" + import dpnp as np from numba import float32 from numpy.testing import assert_almost_equal -import numba_dpex as ndpx +import numba_dpex as dpex +from numba_dpex import kernel_api as kapi # Interpolation domain XLO = 10.0 @@ -81,15 +87,15 @@ ) -@ndpx.kernel() -def kernel_polynomial(x, y, coefficients): - c = ndpx.private.array( +@dpex.kernel() +def kernel_polynomial(nditem: kapi.NdItem, x, y, coefficients): + c = kapi.PrivateArray( 4, dtype=float32 ) # Coefficients of a polynomial of a given segment - z = ndpx.private.array(1, dtype=float32) # Keep x[i] in private memory + z = kapi.PrivateArray(1, dtype=float32) # Keep x[i] in private memory - gid = ndpx.get_global_id(0) - gr_id = ndpx.get_group_id(0) + gid = nditem.get_global_id(0) + gr_id = nditem.get_group().get_group_id(0) # Polynomial coefficients are fixed within a workgroup c[0] = coefficients[gr_id][0] @@ -112,16 +118,20 @@ def main(): xp = np.arange(XLO, XHI, (XHI - XLO) / N_POINTS) yp = np.empty(xp.shape) - print("Using device ...") - print(xp.device) - global_range = ndpx.Range( + print("Executing on device:") + xp.device.print_device_info() + global_range = kapi.Range( N_POINTS // N_POINTS_PER_WORK_ITEM, ) - local_range = ndpx.Range( + local_range = kapi.Range( LOCAL_SIZE, ) - kernel_polynomial[ndpx.NdRange(global_range, local_range)]( - xp, yp, COEFFICIENTS + dpex.call_kernel( + kernel_polynomial, + dpex.NdRange(global_range, local_range), + xp, + yp, + COEFFICIENTS, ) # Copy results back to the host diff --git a/numba_dpex/examples/kernel/kernel_private_memory.py b/numba_dpex/examples/kernel/kernel_private_memory.py index 249274c002..c257cb3c7e 100644 --- a/numba_dpex/examples/kernel/kernel_private_memory.py +++ b/numba_dpex/examples/kernel/kernel_private_memory.py @@ -1,47 +1,55 @@ # SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 +""" This example demonstrates the usage of :class:`numba_dpex.kernel_api.PrivateArray`. + +A ``PrivateArray`` is an array allocated on a device's private address space. As +the amount of available private memory is usually limited, programmers should be +careful with the size of a ``PrivateArray``. Allocating an array that is too big +will cause the data to be spilled to global memory, causing adverse performance. +""" import dpctl import dpctl.tensor as dpt import numpy as np -from numba import float32 import numba_dpex as ndpx +from numba_dpex import float32 +from numba_dpex import kernel_api as kapi def private_memory(): - """ - This example demonstrates the usage of numba_dpex's `private.array` - intrinsic function. The function is used to create a static array - allocated on the devices private address space. - """ + """Demonstrates usage of :class:`numba_dpex.kernel_api.PrivateArray`.""" @ndpx.kernel - def private_memory_kernel(A): - memory = ndpx.private.array(shape=1, dtype=np.float32) - i = ndpx.get_global_id(0) + def private_memory_kernel(nditem: kapi.NdItem, A): + memory = kapi.PrivateArray(shape=1, dtype=np.float32) + i = nditem.get_global_id(0) # preload memory[0] = i - ndpx.barrier(ndpx.LOCAL_MEM_FENCE) # local mem fence + gr = nditem.get_group() + # local mem fence + kapi.group_barrier(gr) # memory will not hold correct deterministic result if it is not # private to each thread. A[i] = memory[0] * 2 - N = 4 + N = 100 device = dpctl.select_default_device() arr = dpt.zeros(N, dtype=dpt.float32, device=device) orig = np.arange(N).astype(np.float32) - print("Using device ...") + print("Executing on device:") device.print_device_info() global_range = ndpx.Range(N) local_range = ndpx.Range(N) - private_memory_kernel[ndpx.NdRange(global_range, local_range)](arr) + ndpx.call_kernel( + private_memory_kernel, ndpx.NdRange(global_range, local_range), arr + ) arr_out = dpt.asnumpy(arr) np.testing.assert_allclose(orig * 2, arr_out) diff --git a/numba_dpex/examples/kernel/kernel_specialization.py b/numba_dpex/examples/kernel/kernel_specialization.py index 7367ec380f..1fd9ce70fa 100644 --- a/numba_dpex/examples/kernel/kernel_specialization.py +++ b/numba_dpex/examples/kernel/kernel_specialization.py @@ -2,6 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 +"""Demonstrates signature specialization feature to pre-compile a kernel. + +As opposed to JIT compilation at first call, a ``kernel`` or ``device_func`` +decorated function with signature specialization gets compiled on module +load and is cached in memory. The following examples demonstrate the feature for +the numba_dpex.kernel decorator and presents usage scenarios and current +limitations. +""" import dpctl.tensor as dpt import numpy as np @@ -11,34 +19,39 @@ InvalidKernelSpecializationError, MissingSpecializationError, ) - -# Similar to Numba, numba-ndpx supports eager compilation of functions. The -# following examples demonstrate the feature for numba_ndpx.kernel and presents -# usage scenarios and current limitations. +from numba_dpex.core.types.kernel_api.index_space_ids import ItemType # ------------ Example 1. ------------ # -# Define type specializations using the numba_ndpx usm_ndarray data type. +# Define type specializations using the numba_dpex usm_ndarray data type. i64arrty = usm_ndarray(1, "C", int64) f32arrty = usm_ndarray(1, "C", float32) +# Type specialization for the index space id type +itemty = ItemType(ndim=1) # specialize a kernel for the i64arrty -@ndpx.kernel((i64arrty, i64arrty, i64arrty)) -def data_parallel_sum(a, b, c): +specialized_kernel = ndpx.kernel((itemty, i64arrty, i64arrty, i64arrty)) + + +def data_parallel_sum(item, a, b, c): """ Vector addition using the ``kernel`` decorator. """ - i = ndpx.get_global_id(0) + i = item.get_id(0) c[i] = a[i] + b[i] +# pre-compiled kernel +pre_compiled_kernel = specialized_kernel(data_parallel_sum) + # run the specialized kernel a = dpt.ones(1024, dtype=dpt.int64) b = dpt.ones(1024, dtype=dpt.int64) c = dpt.zeros(1024, dtype=dpt.int64) -data_parallel_sum[ndpx.Range(1024)](a, b, c) +# Call the pre-compiled kernel +ndpx.call_kernel(pre_compiled_kernel, ndpx.Range(1024), a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.int64) @@ -52,21 +65,32 @@ def data_parallel_sum(a, b, c): # specialize a kernel for the i64arrty -@ndpx.kernel([(i64arrty, i64arrty, i64arrty), (f32arrty, f32arrty, f32arrty)]) -def data_parallel_sum2(a, b, c): +specialized_kernels_list = ndpx.kernel( + [ + (itemty, i64arrty, i64arrty, i64arrty), + (itemty, f32arrty, f32arrty, f32arrty), + ] +) + + +def data_parallel_sum2(item, a, b, c): """ Vector addition using the ``kernel`` decorator. """ - i = ndpx.get_global_id(0) + i = item.get_id(0) c[i] = a[i] + b[i] +# Pre-compile both variants of the kernel +pre_compiled_kernels = specialized_kernels_list(data_parallel_sum2) + # run the i64 specialized kernel a = dpt.ones(1024, dtype=dpt.int64) b = dpt.ones(1024, dtype=dpt.int64) c = dpt.zeros(1024, dtype=dpt.int64) -data_parallel_sum2[ndpx.Range(1024)](a, b, c) +# Compiler will type match the right variant and call it. +ndpx.call_kernel(pre_compiled_kernels, ndpx.Range(1024), a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.int64) @@ -77,43 +101,13 @@ def data_parallel_sum2(a, b, c): b = dpt.ones(1024, dtype=dpt.float32) c = dpt.zeros(1024, dtype=dpt.float32) -data_parallel_sum2[ndpx.Range(1024)](a, b, c) +ndpx.call_kernel(pre_compiled_kernels, ndpx.Range(1024), a, b, c) npc = dpt.asnumpy(c) npc_expected = np.full(1024, 2, dtype=np.float32) assert np.array_equal(npc, npc_expected) -# ------------ Example 3. ------------ # - -# A specialized kernel cannot be jit compiled. Calling a specialized kernel -# with arguments having type different from the specialization will result in -# an MissingSpecializationError. - -a = dpt.ones(1024, dtype=dpt.int32) -b = dpt.ones(1024, dtype=dpt.int32) -c = dpt.zeros(1024, dtype=dpt.int32) - -try: - data_parallel_sum[ndpx.Range(1024)](a, b, c) -except MissingSpecializationError as mse: - print(mse) - - -# ------------ Example 4. ------------ # - -# Numba_ndpx does not support NumPy arrays as kernel arguments and all -# array arguments should be inferable as a numba_ndpx.types.usm_ndarray. Trying -# to eager compile with a NumPy array-based signature will lead to an -# InvalidKernelSpecializationError - -try: - ndpx.kernel((int64[::1], int64[::1], int64[::1])) -except InvalidKernelSpecializationError as e: - print("Dpex kernels cannot be specialized using NumPy arrays.") - print(e) - - # ------------ Limitations ------------ # diff --git a/numba_dpex/examples/kernel/matmul.py b/numba_dpex/examples/kernel/matmul.py index 804dd3b16e..335d319bda 100644 --- a/numba_dpex/examples/kernel/matmul.py +++ b/numba_dpex/examples/kernel/matmul.py @@ -4,69 +4,97 @@ # # SPDX-License-Identifier: Apache-2.0 -import dpctl +"""The example demonstrates a sliding window matrix-matrix multiplication kernel. +""" + import dpctl.tensor as dpt import numpy as np -import numba_dpex as ndpx - - -@ndpx.kernel -def gemm(a, b, c): - """ - A basic DGEMM implemented as a ``kernel`` function. - """ - i = ndpx.get_global_id(0) - j = ndpx.get_global_id(1) - if i >= c.shape[0] or j >= c.shape[1]: - return - c[i, j] = 0 - for k in range(c.shape[0]): - c[i, j] += a[i, k] * b[k, j] - - -# Array dimensions -X = 1024 -Y = 16 -global_size = X, X - -griddim = ndpx.Range(X, X) -blockdim = ndpx.Range(Y, Y) - - -def driver(a, b, c): - # Invoke the kernel - gemm[ndpx.NdRange(griddim, blockdim)](a, b, c) - - -def main(): - a = np.arange(X * X, dtype=np.float32).reshape(X, X) - b = np.array(np.random.random(X * X), dtype=np.float32).reshape(X, X) - - device = dpctl.select_default_device() - a_dpt = dpt.arange(X * X, dtype=dpt.float32, device=device) - a_dpt = dpt.reshape(a_dpt, (X, X)) - b_dpt = dpt.asarray(b, dtype=dpt.float32, device=device) - b_dpt = dpt.reshape(b_dpt, (X, X)) - c_dpt = dpt.ones_like(a_dpt) - c_dpt = dpt.reshape(c_dpt, (X, X)) - - print("Using device ...") - device.print_device_info() - - driver(a_dpt, b_dpt, c_dpt) - c_out = dpt.asnumpy(c_dpt) - - # Host compute using standard NumPy - Amat = np.matrix(a) - Bmat = np.matrix(b) - Cans = Amat * Bmat - - # Check result - assert np.allclose(c_out, Cans) - - print("Done...") - - -if __name__ == "__main__": - main() +import numba_dpex as dpex +from numba_dpex import kernel_api as kapi + +square_block_side = 2 +work_group_size = (square_block_side, square_block_side) +dtype = np.float32 + + +@dpex.kernel +def matmul( + nditem: kapi.NdItem, + X, # IN READ-ONLY (X_n_rows, n_cols) + y, # IN READ-ONLY (n_cols, y_n_rows), + X_slm, # SLM to store a sliding window over X + Y_slm, # SLM to store a sliding window over Y + result, # OUT (X_n_rows, y_n_rows) +): + X_n_rows = X.shape[0] + Y_n_cols = y.shape[1] + n_cols = X.shape[1] + + result_row_idx = nditem.get_global_id(0) + result_col_idx = nditem.get_global_id(1) + + local_row_idx = nditem.get_local_id(0) + local_col_idx = nditem.get_local_id(1) + + n_blocks_for_cols = n_cols // square_block_side + if (n_cols % square_block_side) > 0: + n_blocks_for_cols += 1 + + output = dtype(0) + + gr = nditem.get_group() + + for block_idx in range(n_blocks_for_cols): + X_slm[local_row_idx, local_col_idx] = dtype(0) + Y_slm[local_row_idx, local_col_idx] = dtype(0) + if (result_row_idx < X_n_rows) and ( + (local_col_idx + (square_block_side * block_idx)) < n_cols + ): + X_slm[local_row_idx, local_col_idx] = X[ + result_row_idx, local_col_idx + (square_block_side * block_idx) + ] + + if (result_col_idx < Y_n_cols) and ( + (local_row_idx + (square_block_side * block_idx)) < n_cols + ): + Y_slm[local_row_idx, local_col_idx] = y[ + local_row_idx + (square_block_side * block_idx), result_col_idx + ] + + kapi.group_barrier(gr) + + for idx in range(square_block_side): + output += X_slm[local_row_idx, idx] * Y_slm[idx, local_col_idx] + + kapi.group_barrier(gr) + + if (result_row_idx < X_n_rows) and (result_col_idx < Y_n_cols): + result[result_row_idx, result_col_idx] = output + + +def _arange_reshaped(shape, dtype): + n_items = shape[0] * shape[1] + return np.arange(n_items, dtype=dtype).reshape(shape) + + +X = _arange_reshaped((5, 5), dtype) +Y = _arange_reshaped((5, 5), dtype) +X = dpt.asarray(X) +Y = dpt.asarray(Y) +device = X.device.sycl_device +result = dpt.zeros((5, 5), dtype, device=device) +X_slm = kapi.LocalAccessor(shape=work_group_size, dtype=dtype) +Y_slm = kapi.LocalAccessor(shape=work_group_size, dtype=dtype) + +dpex.call_kernel( + matmul, kapi.NdRange((6, 6), (2, 2)), X, Y, X_slm, Y_slm, result +) + +# Expected: +# [[ 150. 160. 170. 180. 190.] +# [ 400. 435. 470. 505. 540.] +# [ 650. 710. 770. 830. 890.] +# [ 900. 985. 1070. 1155. 1240.] +# [1150. 1260. 1370. 1480. 1590.]] +print(result) diff --git a/numba_dpex/examples/kernel/pairwise_distance.py b/numba_dpex/examples/kernel/pairwise_distance.py index 0bfb6205d7..0737ee52c7 100644 --- a/numba_dpex/examples/kernel/pairwise_distance.py +++ b/numba_dpex/examples/kernel/pairwise_distance.py @@ -2,13 +2,14 @@ # # SPDX-License-Identifier: Apache-2.0 +"""The example demonstrates a N^2 pairwise distance matrix computation for an +array of N elements using a numba_dpex range kernel. +""" import argparse from math import sqrt from string import Template from time import time -import dpctl -import dpctl.memory as dpctl_mem import dpnp as np import numba_dpex as ndpx @@ -26,7 +27,6 @@ # Global work size is equal to the number of points global_size = ndpx.Range(args.n) -# Local Work size is optional local_size = ndpx.Range(args.l) X = np.random.random((args.n, args.d)).astype(np.single) @@ -34,12 +34,12 @@ @ndpx.kernel -def pairwise_distance(X, D, xshape0, xshape1): +def pairwise_distance(nditem, X, D): """ An Euclidean pairwise distance computation implemented as a ``kernel`` function. """ - idx = ndpx.get_global_id(0) + idx = nditem.get_global_id(0) d0 = X[idx, 0] - X[idx, 0] # for i in range(xshape0): @@ -54,10 +54,10 @@ def pairwise_distance(X, D, xshape0, xshape1): def driver(): # measure running time times = list() - for repeat in range(args.r): + for _ in range(args.r): start = time() - pairwise_distance[ndpx.NdRange(global_size, local_size)]( - X, D, X.shape[0], X.shape[1] + ndpx.call_kernel( + pairwise_distance, ndpx.NdRange(global_size, local_size), X, D ) end = time() diff --git a/numba_dpex/examples/kernel/pipelining.py b/numba_dpex/examples/kernel/pipelining.py index 4272ed5b26..4c9bf29fb5 100644 --- a/numba_dpex/examples/kernel/pipelining.py +++ b/numba_dpex/examples/kernel/pipelining.py @@ -20,10 +20,9 @@ import numpy as np import numba_dpex as dpex -import numba_dpex.experimental as dpex_exp -@dpex_exp.kernel +@dpex.kernel def async_kernel(x): idx = dpex.get_global_id(0) @@ -48,7 +47,7 @@ def run_serial(host_arr, n_itr): q.memcpy(_a_data, usm_host_data, usm_host_data.nbytes) - dpex_exp.call_kernel( + dpex.call_kernel( async_kernel, dpex.Range(len(_a)), _a, @@ -85,7 +84,7 @@ def run_pipeline(host_arr, n_itr): e_a.wait() - _, e_a = dpex_exp.call_kernel_async( + _, e_a = dpex.call_kernel_async( async_kernel, dpex.Range(len(_a)), (e_a,), diff --git a/numba_dpex/examples/kernel/scan.py b/numba_dpex/examples/kernel/scan.py index bf2d194414..36f3b41cd2 100644 --- a/numba_dpex/examples/kernel/scan.py +++ b/numba_dpex/examples/kernel/scan.py @@ -2,52 +2,55 @@ # # SPDX-License-Identifier: Apache-2.0 -# scan.py is not working due to issue: https://github.com/IntelPython/numba-dpex/issues/829 +"""An implementation of the Hillis-Steele algorithm to compute prefix sums. + +The algorithm is implemented to work with a single work group of N work items, +where N is the number of elements. +""" import dpnp as np import numba_dpex as ndpx +from numba_dpex import kernel_api as kapi # 1D array size N = 64 -# Implements Hillis-Steele prefix sum algorithm @ndpx.kernel -def kernel_hillis_steele_scan(a): +def kernel_hillis_steele_scan(nditem: kapi.NdItem, a, slm_b, slm_c): # Get local and global id and workgroup size - gid = ndpx.get_global_id(0) - lid = ndpx.get_local_id(0) - ls = ndpx.get_local_size(0) - - # Create temporals in local memory - b = ndpx.local.array(ls, dtype=a.dtype) - c = ndpx.local.array(ls, dtype=a.dtype) + gid = nditem.get_global_id(0) + lid = nditem.get_local_id(0) + ls = nditem.get_local_range(0) + gr = nditem.get_group() # Initialize locals - c[lid] = b[lid] = a[gid] - ndpx.barrier(ndpx.LOCAL_MEM_FENCE) + slm_c[lid] = slm_b[lid] = a[gid] + + kapi.group_barrier(gr) # Calculate prefix sum d = 1 while d < ls: if lid > d: - c[lid] = b[lid] + b[lid - d] + slm_c[lid] = slm_b[lid] + slm_b[lid - d] else: - c[lid] = b[lid] + slm_c[lid] = slm_b[lid] - ndpx.barrier(ndpx.LOCAL_MEM_FENCE) + kapi.group_barrier(gr) # Swap c and b - e = c[lid] - c[lid] = b[lid] - b[lid] = e + e = slm_c[lid] + slm_c[lid] = slm_b[lid] + slm_b[lid] = e # Double the stride d *= 2 - ndpx.barrier() # The same as ndpx.barrier(ndpx.GLOBAL_MEM_FENCE) - a[gid] = b[lid] + kapi.group_barrier(gr, kapi.MemoryScope.DEVICE) + + a[gid] = slm_b[lid] def main(): @@ -56,7 +59,14 @@ def main(): print("Using device ...") print(arr.device) - kernel_hillis_steele_scan[ndpx.Range(N)](arr) + + # Create temporals in local memory + slm_b = kapi.LocalAccessor(N, dtype=arr.dtype) + slm_c = kapi.LocalAccessor(N, dtype=arr.dtype) + + ndpx.call_kernel( + kernel_hillis_steele_scan, ndpx.NdRange((N,), (N,)), arr, slm_b, slm_c + ) # the output should be [0, 1, 3, 6, ...] arr_np = np.asnumpy(arr) diff --git a/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py b/numba_dpex/examples/kernel/sum_reduction_recursive.py similarity index 78% rename from numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py rename to numba_dpex/examples/kernel/sum_reduction_recursive.py index 8cd4463464..a6f55a65d9 100644 --- a/numba_dpex/examples/kernel/sum_reduction_recursive_ocl.py +++ b/numba_dpex/examples/kernel/sum_reduction_recursive.py @@ -13,21 +13,20 @@ from numba import int32 import numba_dpex as ndpx +from numba_dpex import kernel_api as kapi @ndpx.kernel -def sum_reduction_kernel(A, input_size, partial_sums): +def sum_reduction_kernel(nditem: kapi.NdItem, A, input_size, partial_sums, slm): local_id = ndpx.get_local_id(0) global_id = ndpx.get_global_id(0) group_size = ndpx.get_local_size(0) group_id = ndpx.get_group_id(0) - local_sums = ndpx.local.array(64, int32) - - local_sums[local_id] = 0 + slm[local_id] = 0 if global_id < input_size: - local_sums[local_id] = A[global_id] + slm[local_id] = A[global_id] # Loop for computing local_sums : divide workgroup into 2 parts stride = group_size // 2 @@ -37,12 +36,12 @@ def sum_reduction_kernel(A, input_size, partial_sums): # Add elements 2 by 2 between local_id and local_id + stride if local_id < stride: - local_sums[local_id] += local_sums[local_id + stride] + slm[local_id] += slm[local_id + stride] stride >>= 1 if local_id == 0: - partial_sums[group_id] = local_sums[0] + partial_sums[group_id] = slm[0] def sum_recursive_reduction(size, group_size, Dinp, Dpartial_sums): @@ -60,12 +59,24 @@ def sum_recursive_reduction(size, group_size, Dinp, Dpartial_sums): gr = ndpx.Range(passed_size) lr = ndpx.Range(group_size) - - sum_reduction_kernel[ndpx.NdRange(gr, lr)](Dinp, size, Dpartial_sums) + slm = kapi.LocalAccessor(64, Dinp.dtype) + ndpx.call_kernel( + sum_reduction_kernel, + ndpx.NdRange(gr, lr), + Dinp, + size, + Dpartial_sums, + slm, + ) if nb_work_groups <= group_size: - sum_reduction_kernel[ndpx.NdRange(lr, lr)]( - Dpartial_sums, nb_work_groups, Dinp + ndpx.call_kernel( + sum_reduction_kernel, + ndpx.NdRange(lr, lr), + Dpartial_sums, + nb_work_groups, + Dinp, + slm, ) result = int(Dinp[0]) else: diff --git a/numba_dpex/examples/kernel/sum_reduction_ocl.py b/numba_dpex/examples/kernel/sum_reduction_tree.py similarity index 61% rename from numba_dpex/examples/kernel/sum_reduction_ocl.py rename to numba_dpex/examples/kernel/sum_reduction_tree.py index 889fca0082..d2375ca5a3 100644 --- a/numba_dpex/examples/kernel/sum_reduction_ocl.py +++ b/numba_dpex/examples/kernel/sum_reduction_tree.py @@ -2,43 +2,49 @@ # # SPDX-License-Identifier: Apache-2.0 +""" Demonstration of a simple tree reduction algorithm to reduce an array of scalars. + +The algorithm works in two steps: First an nd-range kernel is launched to +calculate a partially reduced array. The size of the partially reduced array is +equal to the number of work groups over which the initial reduction is done. +The partial results are then summed up on the host device. +""" import dpctl import dpctl.tensor as dpt -from numba import int32 import numba_dpex as ndpx +from numba_dpex import kernel_api as kapi @ndpx.kernel -def sum_reduction_kernel(A, partial_sums): +def sum_reduction_kernel(nditem: kapi.NdItem, A, partial_sums, slm): """ The example demonstrates a reduction kernel implemented as a ``kernel`` function. """ - local_id = ndpx.get_local_id(0) - global_id = ndpx.get_global_id(0) - group_size = ndpx.get_local_size(0) - group_id = ndpx.get_group_id(0) - - local_sums = ndpx.local.array(64, int32) + local_id = nditem.get_local_id(0) + global_id = nditem.get_global_id(0) + group_size = nditem.get_local_range(0) + gr = nditem.get_group() + group_id = gr.get_group_id(0) # Copy from global to local memory - local_sums[local_id] = A[global_id] + slm[local_id] = A[global_id] # Loop for computing local_sums : divide workgroup into 2 parts stride = group_size // 2 while stride > 0: # Waiting for each 2x2 addition into given workgroup - ndpx.barrier(ndpx.LOCAL_MEM_FENCE) + kapi.group_barrier(gr) # Add elements 2 by 2 between local_id and local_id + stride if local_id < stride: - local_sums[local_id] += local_sums[local_id + stride] + slm[local_id] += slm[local_id + stride] stride >>= 1 if local_id == 0: - partial_sums[group_id] = local_sums[0] + partial_sums[group_id] = slm[0] def sum_reduce(A): @@ -51,7 +57,10 @@ def sum_reduce(A): gs = ndpx.Range(global_size) ls = ndpx.Range(work_group_size) - sum_reduction_kernel[ndpx.NdRange(gs, ls)](A, partial_sums) + slm = kapi.LocalAccessor(64, A.dtype) + ndpx.call_kernel( + sum_reduction_kernel, ndpx.NdRange(gs, ls), A, partial_sums, slm + ) final_sum = 0 # calculate the final sum in HOST diff --git a/numba_dpex/examples/kernel/vector_sum.py b/numba_dpex/examples/kernel/vector_sum.py index 6b89cbe112..dc0bf7643d 100644 --- a/numba_dpex/examples/kernel/vector_sum.py +++ b/numba_dpex/examples/kernel/vector_sum.py @@ -2,6 +2,9 @@ # # SPDX-License-Identifier: Apache-2.0 +"""The example demonstrates a 1D vector addition kernel. +""" + import dpnp import numpy.testing as testing @@ -10,14 +13,14 @@ # Data parallel kernel implementing vector sum @ndpx.kernel -def kernel_vector_sum(a, b, c): - i = ndpx.get_global_id(0) +def kernel_vector_sum(item, a, b, c): + i = item.get_id(0) c[i] = a[i] + b[i] # Utility function for printing and testing def driver(a, b, c, global_size): - kernel_vector_sum[ndpx.Range(global_size)](a, b, c) + ndpx.call_kernel(kernel_vector_sum, ndpx.Range(global_size), a, b, c) a_np = dpnp.asnumpy(a) # Copy dpnp array a to NumPy array a_np b_np = dpnp.asnumpy(b) # Copy dpnp array b to NumPy array b_np c_np = dpnp.asnumpy(c) # Copy dpnp array c to NumPy array c_np @@ -35,8 +38,8 @@ def main(): b = dpnp.random.random(N) c = dpnp.ones_like(a) - print("Using device ...") - print(a.device) + print("Executing on device:") + a.device.print_device_info() driver(a, b, c, global_size) print("Done...") diff --git a/numba_dpex/examples/kernel/vector_sum2D.py b/numba_dpex/examples/kernel/vector_sum2D.py index 0dd60d1165..79fb857711 100644 --- a/numba_dpex/examples/kernel/vector_sum2D.py +++ b/numba_dpex/examples/kernel/vector_sum2D.py @@ -4,6 +4,9 @@ # # SPDX-License-Identifier: Apache-2.0 +"""The example demonstrates a 2-D vector addition kernel. +""" + import dpctl import dpctl.tensor as dpt import numpy as np @@ -12,17 +15,17 @@ @ndpx.kernel -def data_parallel_sum(a, b, c): +def data_parallel_sum(item, a, b, c): """ A two-dimensional vector addition example using the ``kernel`` decorator. """ - i = ndpx.get_global_id(0) - j = ndpx.get_global_id(1) + i = item.get_id(0) + j = item.get_id(1) c[i, j] = a[i, j] + b[i, j] def driver(a, b, c, global_size): - data_parallel_sum[global_size](a, b, c) + ndpx.call_kernel(data_parallel_sum, global_size, a, b, c) def main(): @@ -45,7 +48,7 @@ def main(): c_dpt = dpt.empty_like(a_dpt) c_dpt = dpt.reshape(c_dpt, (X, Y)) - print("Using device ...") + print("Executing on device:") device.print_device_info() print("Running kernel ...") diff --git a/numba_dpex/examples/sum_reduction.py b/numba_dpex/examples/sum_reduction.py deleted file mode 100644 index 2c7e66423b..0000000000 --- a/numba_dpex/examples/sum_reduction.py +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -import math - -import dpnp as np - -import numba_dpex as ndpx - - -@ndpx.kernel -def sum_reduction_kernel(A, R, stride): - i = ndpx.get_global_id(0) - # sum two element - R[i] = A[i] + A[i + stride] - # store the sum to be used in nex iteration - A[i] = R[i] - - -def sum_reduce(A): - """Size of A should be power of two.""" - total = len(A) - # max size will require half the size of A to store sum - R = np.array(np.random.random(math.floor(total / 2)), dtype=A.dtype) - - while total > 1: - global_size = math.floor(total // 2) - total = total - global_size - sum_reduction_kernel[ndpx.Range(global_size)](A, R, total) - - return R[0] - - -def test_sum_reduce(): - N = 2048 - - A = np.arange(N, dtype=np.float32) - A_copy = np.arange(N, dtype=np.float32) - - actual = sum_reduce(A) - expected = A_copy.sum() - - print("Actual: ", actual) - print("Expected:", expected) - - assert expected - actual < 1e-2 - - print("Done...") - - -if __name__ == "__main__": - test_sum_reduce() diff --git a/numba_dpex/experimental/__init__.py b/numba_dpex/experimental/__init__.py index 97134ee042..c5682cb2ad 100644 --- a/numba_dpex/experimental/__init__.py +++ b/numba_dpex/experimental/__init__.py @@ -20,8 +20,6 @@ _index_space_id_overloads, _private_array_overloads, ) -from .decorators import device_func, kernel -from .launcher import call_kernel, call_kernel_async from .models import * from .types import KernelDispatcherType @@ -38,9 +36,5 @@ def dpex_dispatcher_const(context): __all__ = [ - "device_func", - "kernel", - "call_kernel", - "call_kernel_async", "SPIRVKernelDispatcher", ] diff --git a/numba_dpex/experimental/decorators.py b/numba_dpex/experimental/decorators.py deleted file mode 100644 index 89ff2d7ff4..0000000000 --- a/numba_dpex/experimental/decorators.py +++ /dev/null @@ -1,162 +0,0 @@ -# SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -""" The set of experimental decorators provided by numba_dpex that are not yet -ready to move to numba_dpex.core. -""" -import inspect -from warnings import warn - -from numba.core import sigutils, typeinfer -from numba.core.target_extension import ( - jit_registry, - resolve_dispatcher_from_str, - target_registry, -) - -from numba_dpex.kernel_api_impl.spirv.dispatcher import SPIRVKernelDispatcher -from numba_dpex.kernel_api_impl.spirv.target import CompilationMode - -from .target import DPEX_KERNEL_EXP_TARGET_NAME - - -def _parse_func_or_sig(signature_or_function): - # Handle signature (borrowed from numba). swapped signature and list check - if signature_or_function is None: - # No signature, no function - pyfunc = None - sigs = [] - elif sigutils.is_signature(signature_or_function): - # A single signature is passed - pyfunc = None - sigs = [signature_or_function] - elif isinstance(signature_or_function, list): - # A list of signatures is passed - pyfunc = None - sigs = signature_or_function - else: - # A function is passed - pyfunc = signature_or_function - sigs = [] - - return pyfunc, sigs - - -def kernel(func_or_sig=None, **options): - """A decorator to define a kernel function. - - A kernel function is conceptually equivalent to a SYCL kernel function, and - gets compiled into either an OpenCL or a LevelZero SPIR-V binary kernel. - A kernel decorated Python function has the following restrictions: - - * The function can not return any value. - * All array arguments passed to a kernel should adhere to compute - follows data programming model. - """ - - # dispatcher is a type: - # - dispatcher = resolve_dispatcher_from_str(DPEX_KERNEL_EXP_TARGET_NAME) - if "_compilation_mode" in options: - user_compilation_mode = options["_compilation_mode"] - warn( - "_compilation_mode is an internal flag that should not be set " - "in the decorator. The decorator defined option " - f"{user_compilation_mode} is going to be ignored." - ) - options["_compilation_mode"] = CompilationMode.KERNEL - - # FIXME: The options need to be evaluated and checked here like it is - # done in numba.core.decorators.jit - - func, sigs = _parse_func_or_sig(func_or_sig) - for sig in sigs: - if isinstance(sig, str): - raise NotImplementedError( - "Specifying signatures as string is not yet supported" - ) - - def _kernel_dispatcher(pyfunc): - disp: SPIRVKernelDispatcher = dispatcher( - pyfunc=pyfunc, - targetoptions=options, - ) - - if len(sigs) > 0: - with typeinfer.register_dispatcher(disp): - for sig in sigs: - disp.compile(sig) - disp.disable_compile() - - return disp - - if func is None: - return _kernel_dispatcher - - if not inspect.isfunction(func): - raise ValueError( - "Argument passed to the kernel decorator is neither a " - "function object, nor a signature. If you are trying to " - "specialize the kernel that takes a single argument, specify " - "the return type as None explicitly." - ) - return _kernel_dispatcher(func) - - -def device_func(func_or_sig=None, **options): - """Generates a function with a device-only calling convention, e.g., - spir_func for SPIR-V based devices. - - The decorator is used to compile overloads in the DpexKernelTarget and - users should use the decorator to define functions that are only callable - from inside another device_func or a kernel. - - A device_func is not compiled down to device binary IR and instead left as - LLVM IR. It is done so that the function can be inlined fully into the - kernel module from where it is used at the LLVM level, leading to more - optimization opportunities. - - Returns: - KernelDispatcher: A KernelDispatcher instance with the - _compilation_mode option set to DEVICE_FUNC. - """ - dispatcher = resolve_dispatcher_from_str(DPEX_KERNEL_EXP_TARGET_NAME) - - if "_compilation_mode" in options: - user_compilation_mode = options["_compilation_mode"] - warn( - "_compilation_mode is an internal flag that should not be set " - "in the decorator. The decorator defined option " - f"{user_compilation_mode} is going to be ignored." - ) - options["_compilation_mode"] = CompilationMode.DEVICE_FUNC - - func, sigs = _parse_func_or_sig(func_or_sig) - for sig in sigs: - if isinstance(sig, str): - raise NotImplementedError( - "Specifying signatures as string is not yet supported" - ) - - def _kernel_dispatcher(pyfunc): - disp: SPIRVKernelDispatcher = dispatcher( - pyfunc=pyfunc, - targetoptions=options, - ) - - if len(sigs) > 0: - with typeinfer.register_dispatcher(disp): - for sig in sigs: - disp.compile(sig) - disp.disable_compile() - - return disp - - if func is None: - return _kernel_dispatcher - - return _kernel_dispatcher(func_or_sig) - - -jit_registry[target_registry[DPEX_KERNEL_EXP_TARGET_NAME]] = device_func diff --git a/numba_dpex/tests/experimental/__init__.py b/numba_dpex/tests/codegen/__init__.py similarity index 100% rename from numba_dpex/tests/experimental/__init__.py rename to numba_dpex/tests/codegen/__init__.py diff --git a/numba_dpex/tests/experimental/codegen/test_inline_threshold_codegen.py b/numba_dpex/tests/codegen/test_inline_threshold_codegen.py similarity index 91% rename from numba_dpex/tests/experimental/codegen/test_inline_threshold_codegen.py rename to numba_dpex/tests/codegen/test_inline_threshold_codegen.py index e3953adfb3..c5af4e70c6 100644 --- a/numba_dpex/tests/experimental/codegen/test_inline_threshold_codegen.py +++ b/numba_dpex/tests/codegen/test_inline_threshold_codegen.py @@ -5,9 +5,8 @@ import dpctl from numba.core import types -from numba_dpex import DpctlSyclQueue, DpnpNdArray -from numba_dpex import experimental as dpex_exp -from numba_dpex import int64 +import numba_dpex as dpex +from numba_dpex import DpctlSyclQueue, DpnpNdArray, int64 from numba_dpex.core.types.kernel_api.index_space_ids import ItemType from numba_dpex.kernel_api import Item @@ -39,7 +38,7 @@ def test_codegen_with_max_inline_threshold(): i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) kernel_sig = types.void(ItemType(1), i64arr_ty, i64arr_ty, i64arr_ty) - disp = dpex_exp.kernel(inline_threshold=1)(kernel_func) + disp = dpex.kernel(inline_threshold=1)(kernel_func) disp.compile(kernel_sig) kcres = disp.overloads[kernel_sig.args] llvm_ir_mod = kcres.library._final_module @@ -60,7 +59,7 @@ def test_codegen_without_max_inline_threshold(): i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) kernel_sig = types.void(ItemType(1), i64arr_ty, i64arr_ty, i64arr_ty) - disp = dpex_exp.kernel(inline_threshold=0)(kernel_func) + disp = dpex.kernel(inline_threshold=0)(kernel_func) disp.compile(kernel_sig) kcres = disp.overloads[kernel_sig.args] llvm_ir_mod = kcres.library._final_module diff --git a/numba_dpex/tests/experimental/codegen/test_intenum_literal_codegen.py b/numba_dpex/tests/codegen/test_intenum_literal_codegen.py similarity index 91% rename from numba_dpex/tests/experimental/codegen/test_intenum_literal_codegen.py rename to numba_dpex/tests/codegen/test_intenum_literal_codegen.py index 33ca4063e4..5d875f37af 100644 --- a/numba_dpex/tests/experimental/codegen/test_intenum_literal_codegen.py +++ b/numba_dpex/tests/codegen/test_intenum_literal_codegen.py @@ -7,7 +7,7 @@ import dpctl from numba.core import types -import numba_dpex.experimental as exp_dpex +import numba_dpex as dpex from numba_dpex import DpctlSyclQueue, DpnpNdArray, int64 from numba_dpex.kernel_api.flag_enum import FlagEnum @@ -27,7 +27,7 @@ class PseudoFlags(FlagEnum): FLAG1 = 1 FLAG2 = 2 - @exp_dpex.device_func + @dpex.device_func def bitwise_or_flags(flag1, flag2): return flag1 | flag2 @@ -40,7 +40,7 @@ def pass_flags_to_func(a): i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) kernel_sig = types.void(i64arr_ty) - disp = exp_dpex.kernel(inline_threshold=0)(pass_flags_to_func) + disp = dpex.kernel(inline_threshold=0)(pass_flags_to_func) disp.compile(kernel_sig) kcres = disp.overloads[kernel_sig.args] llvm_ir_mod = kcres.library._final_module.__str__() diff --git a/numba_dpex/tests/experimental/codegen/test_local_accessor_kernel_arg.py b/numba_dpex/tests/codegen/test_local_accessor_kernel_arg.py similarity index 90% rename from numba_dpex/tests/experimental/codegen/test_local_accessor_kernel_arg.py rename to numba_dpex/tests/codegen/test_local_accessor_kernel_arg.py index 26905bf19b..b1d52b6cc3 100644 --- a/numba_dpex/tests/experimental/codegen/test_local_accessor_kernel_arg.py +++ b/numba_dpex/tests/codegen/test_local_accessor_kernel_arg.py @@ -4,11 +4,9 @@ import dpctl from llvmlite import ir as llvmir -from numba.core import types -from numba_dpex import DpctlSyclQueue, DpnpNdArray -from numba_dpex import experimental as dpex_exp -from numba_dpex import int64 +import numba_dpex as dpex +from numba_dpex import DpctlSyclQueue, DpnpNdArray, int64 from numba_dpex.core.types.kernel_api.index_space_ids import NdItemType from numba_dpex.core.types.kernel_api.local_accessor import LocalAccessorType from numba_dpex.kernel_api import ( @@ -37,7 +35,7 @@ def test_codegen_local_accessor_kernel_arg(): queue_ty = DpctlSyclQueue(dpctl.SyclQueue()) i64arr_ty = DpnpNdArray(ndim=1, dtype=int64, layout="C", queue=queue_ty) slm_ty = LocalAccessorType(ndim=1, dtype=int64) - disp = dpex_exp.kernel(inline_threshold=3)(kernel_func) + disp = dpex.kernel(inline_threshold=3)(kernel_func) dmm = disp.targetctx.data_model_manager i64arr_ty_flattened_arg_count = dmm.lookup(i64arr_ty).flattened_field_count diff --git a/numba_dpex/tests/core/types/IntEnumLiteral/test_compilation.py b/numba_dpex/tests/core/types/IntEnumLiteral/test_compilation.py index 9362ba20ff..8fb3509a31 100644 --- a/numba_dpex/tests/core/types/IntEnumLiteral/test_compilation.py +++ b/numba_dpex/tests/core/types/IntEnumLiteral/test_compilation.py @@ -4,7 +4,7 @@ import dpnp -import numba_dpex.experimental as exp_dpex +import numba_dpex as dpex from numba_dpex import Range from numba_dpex.kernel_api.flag_enum import FlagEnum @@ -14,7 +14,7 @@ class MockFlags(FlagEnum): FLAG2 = 200 -@exp_dpex.kernel( +@dpex.kernel( release_gil=False, no_compile=True, no_cpython_wrapper=True, @@ -28,7 +28,7 @@ def update_with_flag(a): def test_compilation_of_flag_enum(): """Tests if a FlagEnum subclass can be used inside a kernel function.""" a = dpnp.ones(10, dtype=dpnp.int64) - exp_dpex.call_kernel(update_with_flag, Range(10), a) + dpex.call_kernel(update_with_flag, Range(10), a) assert a[0] == MockFlags.FLAG1 assert a[1] == MockFlags.FLAG2 diff --git a/numba_dpex/tests/debugging/test_breakpoints.py b/numba_dpex/tests/debugging/test_breakpoints.py index bcb87fe179..ef178e5415 100644 --- a/numba_dpex/tests/debugging/test_breakpoints.py +++ b/numba_dpex/tests/debugging/test_breakpoints.py @@ -21,7 +21,7 @@ @pytest.mark.parametrize( "breakpoint", [ - "side-by-side.py:16", + "side-by-side.py:15", "common_loop_body", "side-by-side.py:common_loop_body", ], @@ -56,14 +56,14 @@ def test_device_func_breakpoint( SAT-4449 """ - if api == "numba-ndpx-kernel" and breakpoint != "side-by-side.py:16": + if api == "numba-ndpx-kernel" and breakpoint != "side-by-side.py:15": pytest.skip( "Breakpoint by function name not working for numba-dpex." ) # TODO: https://github.com/IntelPython/numba-dpex/issues/1242 app.breakpoint(breakpoint, condition=condition) app.run(f"side-by-side.py --api={api}") - app.expect_hit_breakpoint("side-by-side.py:16") + app.expect_hit_breakpoint("side-by-side.py:15") if exp_var is not None: app.print(exp_var, expected=exp_val) diff --git a/numba_dpex/tests/debugging/test_info.py b/numba_dpex/tests/debugging/test_info.py index e09d37858a..421aba8e16 100644 --- a/numba_dpex/tests/debugging/test_info.py +++ b/numba_dpex/tests/debugging/test_info.py @@ -24,7 +24,7 @@ ( "simple_dpex_func.py:18", "simple_dpex_func.py", - r"18\s+i = ndpx\.get_global_id\(0\)", + r"18\s+i = item\.get_id\(0\)", [ ( "a_in_kernel", @@ -47,18 +47,18 @@ ], ), ( - "side-by-side.py:16", + "side-by-side.py:15", "side-by-side.py --api=numba", - r"16\s+param_c = param_a \+ numba\.float32\(10\)", + r"15\s+param_c = param_a \+ numba\.float32\(10\)", [ ("param_a", r"[0-9]+", r"type = float32", r"type = float32"), ("param_b", r"[0-9]+", r"type = float32", r"type = float32"), ], ), ( - "side-by-side.py:16", + "side-by-side.py:15", "side-by-side.py --api=numba-ndpx-kernel", - r"16\s+param_c = param_a \+ numba\.float32\(10\)", + r"15\s+param_c = param_a \+ numba\.float32\(10\)", [ ("param_a", r"[0-9]+", r"type = float32", r"type = float32"), ("param_b", r"[0-9]+", r"type = float32", r"type = float32"), @@ -100,7 +100,7 @@ def test_info_functions(app): app.breakpoint("simple_sum.py:12") app.run("simple_sum.py") app.expect_hit_breakpoint("simple_sum.py:12") - app.expect(r"12\s+i = ndpx.get_global_id\(0\)", with_eol=True) + app.expect(r"12\s+i = item.get_id\(0\)", with_eol=True) app.info_functions("data_parallel_sum") @@ -117,7 +117,7 @@ def test_info_functions(app): def test_print_array_element(app, api): """Test access to array elements""" - app.breakpoint("side-by-side-2.py:17 if param_a == 5") + app.breakpoint("side-by-side-2.py:17", condition="param_a == 5") app.run(f"side-by-side-2.py --api={api}") app.expect_hit_breakpoint("side-by-side-2.py:17") @@ -140,9 +140,9 @@ def test_print_array_element(app, api): ], ) def test_assignment_to_variable(app, api, assign): - app.breakpoint("side-by-side-2.py:18", condition="param_a == 5") + app.breakpoint("side-by-side-2.py:17", condition="param_a == 5") app.run(f"side-by-side-2.py --api={api}") - app.expect_hit_breakpoint("side-by-side-2.py:18") + app.expect_hit_breakpoint("side-by-side-2.py:17") app.print("param_a", expected=5) if assign == "print": diff --git a/numba_dpex/tests/debugging/test_stepping.py b/numba_dpex/tests/debugging/test_stepping.py index 44f7e89246..58464350b4 100644 --- a/numba_dpex/tests/debugging/test_stepping.py +++ b/numba_dpex/tests/debugging/test_stepping.py @@ -22,7 +22,7 @@ def test_next(app: gdb): app.breakpoint("simple_dpex_func.py:18") app.run("simple_dpex_func.py") app.expect_hit_breakpoint("simple_dpex_func.py:18") - app.expect(r"18\s+i = ndpx.get_global_id\(0\)", with_eol=True) + app.expect(r"18\s+i = item.get_id\(0\)", with_eol=True) app.set_scheduler_lock() app.next() app.expect( @@ -35,19 +35,22 @@ def test_next(app: gdb): def test_step(app: gdb): - app.breakpoint("simple_dpex_func.py:18") + app.breakpoint("simple_dpex_func.py:19") app.run("simple_dpex_func.py") - app.expect_hit_breakpoint("simple_dpex_func.py:18") - app.expect(r"18\s+i = ndpx.get_global_id\(0\)", with_eol=True) - app.set_scheduler_lock() - app.step() + app.expect_hit_breakpoint("simple_dpex_func.py:19") app.expect( r"19\s+c_in_kernel\[i\] = func_sum\(a_in_kernel\[i\], b_in_kernel\[i\]\)", with_eol=True, ) + app.set_scheduler_lock() app.step() app.expect(r"__main__::func_sum.* at simple_dpex_func.py:12", with_eol=True) app.expect(r"12\s+result = a_in_func \+ b_in_func", with_eol=True) + app.step() + app.expect( + r"13\s+return result", + with_eol=True, + ) @pytest.mark.parametrize("func", ["stepi", "nexti"]) diff --git a/numba_dpex/tests/experimental/codegen/__init__.py b/numba_dpex/tests/experimental/codegen/__init__.py deleted file mode 100644 index 1d329c9611..0000000000 --- a/numba_dpex/tests/experimental/codegen/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-FileCopyrightText: 2023 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 diff --git a/numba_dpex/tests/kernel_tests/test_async_kernel.py b/numba_dpex/tests/kernel_tests/test_async_kernel.py index 58ec040e43..f3801ec537 100644 --- a/numba_dpex/tests/kernel_tests/test_async_kernel.py +++ b/numba_dpex/tests/kernel_tests/test_async_kernel.py @@ -7,12 +7,12 @@ import pytest from numba.core.errors import TypingError -import numba_dpex.experimental as exp_dpex +import numba_dpex as dpex from numba_dpex.experimental import testing from numba_dpex.kernel_api import Item, Range -@exp_dpex.kernel( +@dpex.kernel( release_gil=False, no_compile=True, no_cpython_wrapper=True, @@ -31,7 +31,7 @@ def test_async_add(): r = Range(size) - host_ref, event_ref = exp_dpex.call_kernel_async( + host_ref, event_ref = dpex.call_kernel_async( add, r, (), @@ -60,7 +60,7 @@ def test_async_dependent_add_list_exception(): # TODO: should capture ValueError, but numba captures it and generates # TypingError. ValueError is still readable there. with pytest.raises(TypingError): - exp_dpex.call_kernel_async( + dpex.call_kernel_async( add, Range(size), [dpctl.SyclEvent()], @@ -78,7 +78,7 @@ def test_async_dependent_add(): r = Range(size) - host_ref, event_ref = exp_dpex.call_kernel_async( + host_ref, event_ref = dpex.call_kernel_async( add, r, (), @@ -87,7 +87,7 @@ def test_async_dependent_add(): c, ) - host2_ref, event2_ref = exp_dpex.call_kernel_async( + host2_ref, event2_ref = dpex.call_kernel_async( add, r, (event_ref,), diff --git a/numba_dpex/tests/kernel_tests/test_atomic_fence.py b/numba_dpex/tests/kernel_tests/test_atomic_fence.py index dd9196028f..cea112379f 100644 --- a/numba_dpex/tests/kernel_tests/test_atomic_fence.py +++ b/numba_dpex/tests/kernel_tests/test_atomic_fence.py @@ -5,7 +5,6 @@ import dpnp import numba_dpex as dpex -import numba_dpex.experimental as dpex_exp from numba_dpex.kernel_api import ( AtomicRef, Item, @@ -18,7 +17,7 @@ def test_atomic_fence(): """A test for atomic_fence function.""" - @dpex_exp.kernel + @dpex.kernel def _kernel(item: Item, a, b): i = item.get_id(0) @@ -39,6 +38,6 @@ def _kernel(item: Item, a, b): a = dpnp.ones(N, dtype=dpnp.int64) b = dpnp.zeros(1, dtype=dpnp.int64) - dpex_exp.call_kernel(_kernel, dpex.Range(N), a, b) + dpex.call_kernel(_kernel, dpex.Range(N), a, b) assert a[0] == N + 1 diff --git a/numba_dpex/tests/kernel_tests/test_atomic_fetch_phi.py b/numba_dpex/tests/kernel_tests/test_atomic_fetch_phi.py index c239c8f91a..00dbc498b3 100644 --- a/numba_dpex/tests/kernel_tests/test_atomic_fetch_phi.py +++ b/numba_dpex/tests/kernel_tests/test_atomic_fetch_phi.py @@ -6,7 +6,7 @@ import pytest from numba.core.errors import TypingError -import numba_dpex.experimental as dpex_exp +import numba_dpex as dpex from numba_dpex.kernel_api import AtomicRef, Item, Range from numba_dpex.tests._helper import get_all_dtypes @@ -43,7 +43,7 @@ def input_arrays(request): def test_fetch_phi_fn(input_arrays, ref_index, fetch_phi_fn): """A test for all fetch_phi atomic functions.""" - @dpex_exp.kernel + @dpex.kernel def _kernel(item: Item, a, b, ref_index): i = item.get_id(0) v = AtomicRef(b, index=ref_index) @@ -59,9 +59,9 @@ def _kernel(item: Item, a, b, ref_index): # fetch_and, fetch_or, fetch_xor accept only int arguments. # test for TypingError when float arguments are passed. with pytest.raises(TypingError): - dpex_exp.call_kernel(_kernel, Range(10), a, b, ref_index) + dpex.call_kernel(_kernel, Range(10), a, b, ref_index) else: - dpex_exp.call_kernel(_kernel, Range(10), a, b, ref_index) + dpex.call_kernel(_kernel, Range(10), a, b, ref_index) # Verify that `a` accumulated at b[ref_index] by kernel # matches the `a` accumulated at b[ref_index+1] using Python for i in range(a.size): @@ -74,7 +74,7 @@ def _kernel(item: Item, a, b, ref_index): def test_fetch_phi_retval(fetch_phi_fn): """A test for all fetch_phi atomic functions.""" - @dpex_exp.kernel + @dpex.kernel def _kernel(item: Item, a, b, c): i = item.get_id(0) v = AtomicRef(b, index=i) @@ -88,7 +88,7 @@ def _kernel(item: Item, a, b, c): b_copy = dpnp.copy(b) c_copy = dpnp.copy(c) - dpex_exp.call_kernel(_kernel, Range(10), a, b, c) + dpex.call_kernel(_kernel, Range(10), a, b, c) # Verify if the value returned by fetch_phi kernel # stored into `c` is same as the value returned @@ -106,7 +106,7 @@ def test_fetch_phi_diff_types(fetch_phi_fn): AtomicRef type and value to be added are of different types. """ - @dpex_exp.kernel + @dpex.kernel def _kernel(item: Item, a, b): i = item.get_id(0) v = AtomicRef(b, index=0) @@ -117,17 +117,17 @@ def _kernel(item: Item, a, b): b = dpnp.zeros(N, dtype=dpnp.int32) with pytest.raises(TypingError): - dpex_exp.call_kernel(_kernel, Range(10), a, b) + dpex.call_kernel(_kernel, Range(10), a, b) -@dpex_exp.kernel +@dpex.kernel def atomic_ref_0(item: Item, a): i = item.get_id(0) v = AtomicRef(a, index=0) v.fetch_add(a[i + 2]) -@dpex_exp.kernel +@dpex.kernel def atomic_ref_1(item: Item, a): i = item.get_id(0) v = AtomicRef(a, index=1) @@ -143,14 +143,14 @@ def test_spirv_compiler_flags_add(): N = 10 a = dpnp.ones(N, dtype=dpnp.float32) - dpex_exp.call_kernel(atomic_ref_0, Range(N - 2), a) - dpex_exp.call_kernel(atomic_ref_1, Range(N - 2), a) + dpex.call_kernel(atomic_ref_0, Range(N - 2), a) + dpex.call_kernel(atomic_ref_1, Range(N - 2), a) assert a[0] == N - 1 assert a[1] == N - 1 -@dpex_exp.kernel +@dpex.kernel def atomic_max_0(item: Item, a): i = item.get_id(0) v = AtomicRef(a, index=0) @@ -158,7 +158,7 @@ def atomic_max_0(item: Item, a): v.fetch_max(a[i]) -@dpex_exp.kernel +@dpex.kernel def atomic_max_1(item: Item, a): i = item.get_id(0) v = AtomicRef(a, index=0) @@ -176,8 +176,8 @@ def test_spirv_compiler_flags_max(): a = dpnp.arange(N, dtype=dpnp.float32) b = dpnp.arange(N, dtype=dpnp.float32) - dpex_exp.call_kernel(atomic_max_0, Range(N), a) - dpex_exp.call_kernel(atomic_max_1, Range(N), b) + dpex.call_kernel(atomic_max_0, Range(N), a) + dpex.call_kernel(atomic_max_1, Range(N), b) assert a[0] == N - 1 assert b[0] == N - 1 diff --git a/numba_dpex/tests/kernel_tests/test_atomic_load_store_cmp_exchg.py b/numba_dpex/tests/kernel_tests/test_atomic_load_store_cmp_exchg.py index 9d34d99319..77618e3965 100644 --- a/numba_dpex/tests/kernel_tests/test_atomic_load_store_cmp_exchg.py +++ b/numba_dpex/tests/kernel_tests/test_atomic_load_store_cmp_exchg.py @@ -8,7 +8,6 @@ from numba.core.errors import TypingError import numba_dpex as dpex -import numba_dpex.experimental as dpex_exp from numba_dpex.kernel_api import AtomicRef from numba_dpex.tests._helper import get_all_dtypes @@ -26,7 +25,7 @@ def store_exchange_fn(request): def test_load_store_fn(supported_dtype): """A test for load/store atomic functions.""" - @dpex_exp.kernel + @dpex.kernel def _kernel(item, a, b): i = item.get_id(0) a_ref = AtomicRef(a, index=i) @@ -38,7 +37,7 @@ def _kernel(item, a, b): a = dpnp.zeros(2 * N, dtype=supported_dtype) b = dpnp.arange(N, dtype=supported_dtype) - dpex_exp.call_kernel(_kernel, dpex.Range(b.size), a, b) + dpex.call_kernel(_kernel, dpex.Range(b.size), a, b) # Verify that `b[i]` loaded and stored into a[i] by kernel # matches the `b[i]` loaded stored into a[i] using Python for i in range(b.size): @@ -54,7 +53,7 @@ def _kernel(item, a, b): def test_exchange_fn(supported_dtype): """A test for exchange atomic function.""" - @dpex_exp.kernel + @dpex.kernel def _kernel(item, a, b): i = item.get_id(0) v = AtomicRef(a, index=i) @@ -67,7 +66,7 @@ def _kernel(item, a, b): a_copy = dpnp.copy(a_orig) b_copy = dpnp.copy(b_orig) - dpex_exp.call_kernel(_kernel, dpex.Range(b_orig.size), a_copy, b_copy) + dpex.call_kernel(_kernel, dpex.Range(b_orig.size), a_copy, b_copy) # Values in `b` have been exchanged # with values in `a`. @@ -82,7 +81,7 @@ def _kernel(item, a, b): def test_compare_exchange_fns(supported_dtype): """A test for compare exchange atomic functions.""" - @dpex_exp.kernel + @dpex.kernel def _kernel(b): b_ref = AtomicRef(b, index=1) b[0] = b_ref.compare_exchange( @@ -91,13 +90,13 @@ def _kernel(b): b = dpnp.arange(4, dtype=supported_dtype) - dpex_exp.call_kernel(_kernel, dpex.Range(1), b) + dpex.call_kernel(_kernel, dpex.Range(1), b) # check for failure assert b[0] == 0 assert b[2] == b[1] - dpex_exp.call_kernel(_kernel, dpex.Range(1), b) + dpex.call_kernel(_kernel, dpex.Range(1), b) # check for success assert b[0] == 1 @@ -109,7 +108,7 @@ def test_store_exchange_diff_types(store_exchange_fn): AtomicRef type and value are of different types. """ - @dpex_exp.kernel + @dpex.kernel def _kernel(item, a, b): i = item.get_id(0) v = AtomicRef(b, index=0) @@ -120,4 +119,4 @@ def _kernel(item, a, b): b = dpnp.zeros(N, dtype=dpnp.int32) with pytest.raises(TypingError): - dpex_exp.call_kernel(_kernel, dpex.Range(10), a, b) + dpex.call_kernel(_kernel, dpex.Range(10), a, b) diff --git a/numba_dpex/tests/kernel_tests/test_atomic_ref.py b/numba_dpex/tests/kernel_tests/test_atomic_ref.py index dc93bcf620..7632cc6411 100644 --- a/numba_dpex/tests/kernel_tests/test_atomic_ref.py +++ b/numba_dpex/tests/kernel_tests/test_atomic_ref.py @@ -7,7 +7,7 @@ import pytest from numba.core.errors import TypingError -import numba_dpex.experimental as dpex_exp +import numba_dpex as dpex import numba_dpex.kernel_api as kapi @@ -17,7 +17,7 @@ def ref_type_options(request): def test_atomic_ref_compilation(): - @dpex_exp.kernel + @dpex.kernel def atomic_ref_kernel(item: kapi.Item, a, b): i = item.get_id(0) v = kapi.AtomicRef(b, index=0) @@ -26,13 +26,13 @@ def atomic_ref_kernel(item: kapi.Item, a, b): a = dpnp.ones(10) b = dpnp.zeros(10) try: - dpex_exp.call_kernel(atomic_ref_kernel, kapi.Range(10), a, b) + dpex.call_kernel(atomic_ref_kernel, kapi.Range(10), a, b) except Exception: pytest.fail("Unexpected execution failure") def test_atomic_ref_3_dim_compilation(): - @dpex_exp.kernel + @dpex.kernel def atomic_ref_kernel(item: kapi.Item, a, b): i = item.get_id(0) v = kapi.AtomicRef(b, index=(1, 1, 1)) @@ -45,7 +45,7 @@ def atomic_ref_kernel(item: kapi.Item, a, b): want[1, 1, 1] = a.size try: - dpex_exp.call_kernel(atomic_ref_kernel, kapi.Range(a.size), a, b) + dpex.call_kernel(atomic_ref_kernel, kapi.Range(a.size), a, b) except Exception: pytest.fail("Unexpected execution failure") @@ -58,7 +58,7 @@ def test_atomic_ref_compilation_failure(): ref. """ - @dpex_exp.kernel + @dpex.kernel def atomic_ref_kernel(item: kapi.Item, a, b): i = item.get_id(0) v = kapi.AtomicRef(b, index=0, address_space=kapi.AddressSpace.LOCAL) @@ -68,13 +68,13 @@ def atomic_ref_kernel(item: kapi.Item, a, b): b = dpnp.zeros(10) with pytest.raises(TypingError): - dpex_exp.call_kernel(atomic_ref_kernel, kapi.Range(10), a, b) + dpex.call_kernel(atomic_ref_kernel, kapi.Range(10), a, b) def test_atomic_ref_compilation_local_accessor(): """Tests if an AtomicRef object can be constructed from a LocalAccessor""" - @dpex_exp.kernel + @dpex.kernel def atomic_ref_slm_kernel(nditem: kapi.Item, a, slm): gi = nditem.get_global_id(0) v = kapi.AtomicRef(slm, 0) @@ -85,9 +85,7 @@ def atomic_ref_slm_kernel(nditem: kapi.Item, a, slm): a = dpnp.zeros(32) slm = kapi.LocalAccessor(1, a.dtype) - dpex_exp.call_kernel( - atomic_ref_slm_kernel, kapi.NdRange((32,), (32,)), a, slm - ) + dpex.call_kernel(atomic_ref_slm_kernel, kapi.NdRange((32,), (32,)), a, slm) want = dpnp.full_like(a, 32 * a.dtype.type(5)) assert np.allclose(a.asnumpy(), want.asnumpy()) diff --git a/numba_dpex/tests/kernel_tests/test_barriers.py b/numba_dpex/tests/kernel_tests/test_barriers.py index f6c93fb692..7d81b2eb3e 100644 --- a/numba_dpex/tests/kernel_tests/test_barriers.py +++ b/numba_dpex/tests/kernel_tests/test_barriers.py @@ -5,14 +5,13 @@ import dpnp import numba_dpex as dpex -import numba_dpex.experimental as dpex_exp from numba_dpex.kernel_api import MemoryScope, NdItem, group_barrier def test_group_barrier(): """A test for group_barrier function.""" - @dpex_exp.kernel + @dpex.kernel def _kernel(nd_item: NdItem, a): i = nd_item.get_global_id(0) @@ -26,7 +25,7 @@ def _kernel(nd_item: NdItem, a): N = 16 a = dpnp.ones(N, dtype=dpnp.int32) - dpex_exp.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a) + dpex.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a) assert a[0] == N * 2 @@ -34,7 +33,7 @@ def _kernel(nd_item: NdItem, a): def test_group_barrier_device_func(): """A test for group_barrier function.""" - @dpex_exp.device_func + @dpex.device_func def _increment_value(nd_item: NdItem, a): i = nd_item.get_global_id(0) @@ -45,13 +44,13 @@ def _increment_value(nd_item: NdItem, a): for idx in range(1, a.size): a[0] += a[idx] - @dpex_exp.kernel + @dpex.kernel def _kernel(nd_item: NdItem, a): _increment_value(nd_item, a) N = 16 a = dpnp.ones(N, dtype=dpnp.int32) - dpex_exp.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a) + dpex.call_kernel(_kernel, dpex.NdRange((N,), (N,)), a) assert a[0] == N * 2 diff --git a/numba_dpex/tests/kernel_tests/test_compiler_warnings.py b/numba_dpex/tests/kernel_tests/test_compiler_warnings.py index 67f26dd8d2..c051c39dea 100644 --- a/numba_dpex/tests/kernel_tests/test_compiler_warnings.py +++ b/numba_dpex/tests/kernel_tests/test_compiler_warnings.py @@ -4,7 +4,7 @@ import pytest -from numba_dpex import experimental as dpex_exp +from numba_dpex import kernel from numba_dpex.kernel_api import Item @@ -15,4 +15,4 @@ def _kernel(item: Item, a, b, c): def test_compilation_mode_option_user_definition(): with pytest.warns(UserWarning): - dpex_exp.kernel(_compilation_mode="kernel")(_kernel) + kernel(_compilation_mode="kernel")(_kernel) diff --git a/numba_dpex/tests/kernel_tests/test_complex_array_kernel_arg.py b/numba_dpex/tests/kernel_tests/test_complex_array_kernel_arg.py index 0b110595fc..283b738005 100644 --- a/numba_dpex/tests/kernel_tests/test_complex_array_kernel_arg.py +++ b/numba_dpex/tests/kernel_tests/test_complex_array_kernel_arg.py @@ -6,7 +6,7 @@ import numpy import pytest -import numba_dpex.experimental as dpex +import numba_dpex as dpex from numba_dpex.tests._helper import get_all_dtypes N = 1024 diff --git a/numba_dpex/tests/kernel_tests/test_dpnp_ndarray_args.py b/numba_dpex/tests/kernel_tests/test_dpnp_ndarray_args.py index 0077f4dff3..2eac38c249 100644 --- a/numba_dpex/tests/kernel_tests/test_dpnp_ndarray_args.py +++ b/numba_dpex/tests/kernel_tests/test_dpnp_ndarray_args.py @@ -4,7 +4,7 @@ import dpnp -import numba_dpex.experimental as ndpx +import numba_dpex as ndpx from numba_dpex import float32 from numba_dpex import kernel_api as kapi diff --git a/numba_dpex/tests/kernel_tests/test_dump_kernel_llvm.py b/numba_dpex/tests/kernel_tests/test_dump_kernel_llvm.py index 666852eefc..01428187b6 100644 --- a/numba_dpex/tests/kernel_tests/test_dump_kernel_llvm.py +++ b/numba_dpex/tests/kernel_tests/test_dump_kernel_llvm.py @@ -9,7 +9,7 @@ from numba.core import types -import numba_dpex.experimental as dpex +import numba_dpex as dpex from numba_dpex import float32, usm_ndarray from numba_dpex.core import config from numba_dpex.core.descriptor import dpex_kernel_target diff --git a/numba_dpex/tests/kernel_tests/test_exec_queue_inference.py b/numba_dpex/tests/kernel_tests/test_exec_queue_inference.py index b5df39c427..9b6334efd6 100644 --- a/numba_dpex/tests/kernel_tests/test_exec_queue_inference.py +++ b/numba_dpex/tests/kernel_tests/test_exec_queue_inference.py @@ -7,12 +7,12 @@ import pytest from numba.core import config -import numba_dpex.experimental as exp_dpex +import numba_dpex as dpex from numba_dpex import Range from numba_dpex.core.exceptions import ExecutionQueueInferenceError -@exp_dpex.kernel( +@dpex.kernel( release_gil=False, no_compile=True, no_cpython_wrapper=True, @@ -38,7 +38,7 @@ def test_successful_execution_queue_inference(): config.CAPTURED_ERRORS = "new_style" try: - exp_dpex.call_kernel(add, r, a, b, c) + dpex.call_kernel(add, r, a, b, c) except: pytest.fail("Unexpected error when calling kernel") @@ -65,7 +65,7 @@ def test_execution_queue_inference_error(): config.CAPTURED_ERRORS = "new_style" with pytest.raises(ExecutionQueueInferenceError): - exp_dpex.call_kernel(add, r, a, b, c) + dpex.call_kernel(add, r, a, b, c) config.CAPTURED_ERRORS = current_captured_error_style @@ -86,6 +86,6 @@ def test_error_when_no_array_args(): config.CAPTURED_ERRORS = "new_style" with pytest.raises(ExecutionQueueInferenceError): - exp_dpex.call_kernel(add, r, a, b, c) + dpex.call_kernel(add, r, a, b, c) config.CAPTURED_ERRORS = current_captured_error_style diff --git a/numba_dpex/tests/kernel_tests/test_func.py b/numba_dpex/tests/kernel_tests/test_func.py index b6c17ba327..f984fd08dc 100644 --- a/numba_dpex/tests/kernel_tests/test_func.py +++ b/numba_dpex/tests/kernel_tests/test_func.py @@ -5,7 +5,7 @@ import dpnp import numpy -import numba_dpex.experimental as dpex +import numba_dpex as dpex @dpex.device_func diff --git a/numba_dpex/tests/kernel_tests/test_func_qualname_disambiguation.py b/numba_dpex/tests/kernel_tests/test_func_qualname_disambiguation.py index 5b18ab367a..8bfc1a2ab4 100644 --- a/numba_dpex/tests/kernel_tests/test_func_qualname_disambiguation.py +++ b/numba_dpex/tests/kernel_tests/test_func_qualname_disambiguation.py @@ -5,7 +5,7 @@ import dpnp import numpy as np -import numba_dpex.experimental as ndpx +import numba_dpex as ndpx def make_write_values_kernel(n_rows): diff --git a/numba_dpex/tests/kernel_tests/test_func_specialization.py b/numba_dpex/tests/kernel_tests/test_func_specialization.py index 223c9c0521..47b7ca5f3f 100644 --- a/numba_dpex/tests/kernel_tests/test_func_specialization.py +++ b/numba_dpex/tests/kernel_tests/test_func_specialization.py @@ -6,7 +6,7 @@ import numpy as np from numba import int32, int64 -import numba_dpex.experimental as dpex +import numba_dpex as dpex i32_signature = dpex.device_func(int32(int32)) i32i64_signature = dpex.device_func([int32(int32), int64(int64)]) diff --git a/numba_dpex/tests/kernel_tests/test_index_space_ids.py b/numba_dpex/tests/kernel_tests/test_index_space_ids.py index eae0133406..88999a98bc 100644 --- a/numba_dpex/tests/kernel_tests/test_index_space_ids.py +++ b/numba_dpex/tests/kernel_tests/test_index_space_ids.py @@ -9,7 +9,6 @@ import pytest import numba_dpex as dpex -import numba_dpex.experimental as dpex_exp from numba_dpex.kernel_api import Item, NdItem, NdRange from numba_dpex.kernel_api import call_kernel as kapi_call_kernel from numba_dpex.tests._helper import has_cpu @@ -18,7 +17,7 @@ _GROUP_SIZE = 20 -_kernel_decorators = [(dpex_exp.call_kernel, dpex_exp.kernel)] +_kernel_decorators = [(dpex.call_kernel, dpex.kernel)] # run simulator tests only with arrays allocated on cpu to avoid performance # issues if has_cpu(): diff --git a/numba_dpex/tests/kernel_tests/test_inline_threshold_config.py b/numba_dpex/tests/kernel_tests/test_inline_threshold_config.py index 75c0430171..d4695a1b61 100644 --- a/numba_dpex/tests/kernel_tests/test_inline_threshold_config.py +++ b/numba_dpex/tests/kernel_tests/test_inline_threshold_config.py @@ -5,7 +5,6 @@ from numba.core import compiler import numba_dpex as dpex -from numba_dpex import experimental as dpex_exp from numba_dpex.kernel_api import Item @@ -18,7 +17,7 @@ def test_inline_threshold_set_using_config(): oldConfig = dpex.config.INLINE_THRESHOLD dpex.config.INLINE_THRESHOLD = None - disp = dpex_exp.kernel(kernel_func) + disp = dpex.kernel(kernel_func) flags = compiler.Flags() disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions) @@ -39,7 +38,7 @@ def test_inline_threshold_set_using_decorator_option(): Test setting the inline_threshold value using the kernel decorator flag """ - disp = dpex_exp.kernel(inline_threshold=2)(kernel_func) + disp = dpex.kernel(inline_threshold=2)(kernel_func) flags = compiler.Flags() disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions) @@ -50,7 +49,7 @@ def test_inline_threshold_set_using_decorator_supersedes_config_option(): oldConfig = dpex.config.INLINE_THRESHOLD dpex.config.INLINE_THRESHOLD = None - disp = dpex_exp.kernel(inline_threshold=3)(kernel_func) + disp = dpex.kernel(inline_threshold=3)(kernel_func) flags = compiler.Flags() disp.targetdescr.options.parse_as_flags(flags, disp.targetoptions) diff --git a/numba_dpex/tests/kernel_tests/test_invalid_kernel_args.py b/numba_dpex/tests/kernel_tests/test_invalid_kernel_args.py index a8143f6f88..45f5adeb42 100644 --- a/numba_dpex/tests/kernel_tests/test_invalid_kernel_args.py +++ b/numba_dpex/tests/kernel_tests/test_invalid_kernel_args.py @@ -5,7 +5,7 @@ import numpy import pytest -import numba_dpex.experimental as dpex +import numba_dpex as dpex from numba_dpex import kernel_api as kapi N = 1024 diff --git a/numba_dpex/tests/kernel_tests/test_kernel_dispatcher.py b/numba_dpex/tests/kernel_tests/test_kernel_dispatcher.py index 9c41bf9960..c0d86daf64 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_dispatcher.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_dispatcher.py @@ -5,11 +5,11 @@ import dpctl import dpnp -import numba_dpex.experimental as exp_dpex +import numba_dpex as dpex from numba_dpex import NdRange, Range, dpjit -@exp_dpex.kernel( +@dpex.kernel( release_gil=False, no_compile=True, no_cpython_wrapper=True, @@ -19,7 +19,7 @@ def add(a, b, c): c[0] = b[0] + a[0] -@exp_dpex.kernel( +@dpex.kernel( release_gil=False, no_compile=True, no_cpython_wrapper=True, @@ -42,11 +42,11 @@ def test_call_kernel_from_cpython(): r = Range(100) ndr = NdRange(global_size=(100,), local_size=(1,)) - exp_dpex.call_kernel(add, r, a, b, c) + dpex.call_kernel(add, r, a, b, c) assert c[0] == b[0] + a[0] - exp_dpex.call_kernel(add, ndr, a, b, c) + dpex.call_kernel(add, ndr, a, b, c) assert c[0] == b[0] + a[0] @@ -60,7 +60,7 @@ def test_call_kernel_from_dpjit(): @dpjit def range_kernel_caller(q, a, b, c): r = Range(100) - exp_dpex.call_kernel(add, r, a, b, c) + dpex.call_kernel(add, r, a, b, c) return c @dpjit @@ -68,7 +68,7 @@ def ndrange_kernel_caller(q, a, b, c): gr = Range(100) lr = Range(1) ndr = NdRange(gr, lr) - exp_dpex.call_kernel(add, ndr, a, b, c) + dpex.call_kernel(add, ndr, a, b, c) return c q = dpctl.SyclQueue() @@ -96,10 +96,10 @@ def test_call_multiple_kernels(): c = dpnp.zeros_like(a, sycl_queue=q) r = Range(100) - exp_dpex.call_kernel(add, r, a, b, c) + dpex.call_kernel(add, r, a, b, c) assert c[0] == b[0] + a[0] - exp_dpex.call_kernel(sq, r, a, c) + dpex.call_kernel(sq, r, a, c) assert a[0] == c[0] * c[0] diff --git a/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py b/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py index 739c82ec4a..36d5c3987b 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_has_return_value_error.py @@ -6,7 +6,7 @@ import pytest from numba.core.errors import TypingError -import numba_dpex.experimental as dpex +import numba_dpex as dpex from numba_dpex import int32, usm_ndarray from numba_dpex.core.exceptions import KernelHasReturnValueError from numba_dpex.core.types.kernel_api.index_space_ids import ItemType diff --git a/numba_dpex/tests/kernel_tests/test_kernel_specialization.py b/numba_dpex/tests/kernel_tests/test_kernel_specialization.py index b52062eeae..2e68134ca8 100644 --- a/numba_dpex/tests/kernel_tests/test_kernel_specialization.py +++ b/numba_dpex/tests/kernel_tests/test_kernel_specialization.py @@ -7,7 +7,7 @@ import pytest from numba.core.errors import TypingError -import numba_dpex.experimental as dpex_exp +import numba_dpex as dpex from numba_dpex import DpnpNdArray, float32, int64 from numba_dpex.core.exceptions import InvalidKernelSpecializationError from numba_dpex.core.types.kernel_api.index_space_ids import ItemType @@ -17,8 +17,8 @@ f32arrty = DpnpNdArray(ndim=1, dtype=float32, layout="C") item_ty = ItemType(ndim=1) -specialized_kernel1 = dpex_exp.kernel((item_ty, i64arrty, i64arrty, i64arrty)) -specialized_kernel2 = dpex_exp.kernel( +specialized_kernel1 = dpex.kernel((item_ty, i64arrty, i64arrty, i64arrty)) +specialized_kernel2 = dpex.kernel( [ (item_ty, i64arrty, i64arrty, i64arrty), (item_ty, f32arrty, f32arrty, f32arrty), @@ -50,7 +50,7 @@ def test_invalid_specialization_error(): """Test if an InvalidKernelSpecializationError is raised when attempting to specialize with NumPy arrays. """ - specialized_kernel3 = dpex_exp.kernel( + specialized_kernel3 = dpex.kernel( (item_ty, int64[::1], int64[::1], int64[::1]) ) with pytest.raises(InvalidKernelSpecializationError): @@ -68,9 +68,7 @@ def test_missing_specialization_error(): with pytest.raises(TypingError): data_parallel_sum_specialized = specialized_kernel1(data_parallel_sum) - dpex_exp.call_kernel( - data_parallel_sum_specialized, Range(SIZE), a, b, c - ) + dpex.call_kernel(data_parallel_sum_specialized, Range(SIZE), a, b, c) def test_execution_of_specialized_kernel(): @@ -83,7 +81,7 @@ def test_execution_of_specialized_kernel(): data_parallel_sum_specialized = specialized_kernel1(data_parallel_sum) - dpex_exp.call_kernel(data_parallel_sum_specialized, Range(SIZE), a, b, c) + dpex.call_kernel(data_parallel_sum_specialized, Range(SIZE), a, b, c) npc = dpnp.asnumpy(c) import numpy as np @@ -96,10 +94,10 @@ def test_string_specialization(): """Test if NotImplementedError is raised when signature is a string""" with pytest.raises(NotImplementedError): - dpex_exp.kernel("(item_ty, i64arrty, i64arrty, i64arrty)") + dpex.kernel("(item_ty, i64arrty, i64arrty, i64arrty)") with pytest.raises(NotImplementedError): - dpex_exp.kernel( + dpex.kernel( [ "(item_ty, i64arrty, i64arrty, i64arrty)", "(item_ty, f32arrty, f32arrty, f32arrty)", @@ -107,4 +105,4 @@ def test_string_specialization(): ) with pytest.raises(ValueError): - dpex_exp.kernel((i64arrty)) + dpex.kernel((i64arrty)) diff --git a/numba_dpex/tests/kernel_tests/test_local_accessors.py b/numba_dpex/tests/kernel_tests/test_local_accessors.py index d8ae378908..05fe1eedfa 100644 --- a/numba_dpex/tests/kernel_tests/test_local_accessors.py +++ b/numba_dpex/tests/kernel_tests/test_local_accessors.py @@ -8,7 +8,6 @@ from numba.core.errors import TypingError import numba_dpex as dpex -import numba_dpex.experimental as dpex_exp from numba_dpex.kernel_api import LocalAccessor, NdItem from numba_dpex.kernel_api import call_kernel as kapi_call_kernel from numba_dpex.tests._helper import get_all_dtypes @@ -65,9 +64,9 @@ def _kernel3(nd_item: NdItem, a, slm): def device_func_kernel(func): - _df = dpex_exp.device_func(func) + _df = dpex.device_func(func) - @dpex_exp.kernel + @dpex.kernel def _kernel(item, a, slm): _df(item, a, slm) @@ -86,8 +85,8 @@ def _kernel(item, a, slm): @pytest.mark.parametrize( "call_kernel, kernel", [ - (dpex_exp.call_kernel, dpex_exp.kernel), - (dpex_exp.call_kernel, device_func_kernel), + (dpex.call_kernel, dpex.kernel), + (dpex.call_kernel, device_func_kernel), (kapi_call_kernel, lambda f: f), ], ) @@ -123,4 +122,4 @@ def test_local_accessor_argument_to_range_kernel(): # A TypeError is raised if NUMBA_CAPTURED_ERROR=new_style and a # numba.TypingError is raised if NUMBA_CAPTURED_ERROR=old_style with pytest.raises((TypeError, TypingError)): - dpex_exp.call_kernel(_kernel1, dpex.Range(N), a, slm) + dpex.call_kernel(_kernel1, dpex.Range(N), a, slm) diff --git a/numba_dpex/tests/kernel_tests/test_math_functions.py b/numba_dpex/tests/kernel_tests/test_math_functions.py index e090dc7989..1c37c99581 100644 --- a/numba_dpex/tests/kernel_tests/test_math_functions.py +++ b/numba_dpex/tests/kernel_tests/test_math_functions.py @@ -8,7 +8,7 @@ import numpy import pytest -import numba_dpex.experimental as dpex +import numba_dpex as dpex from numba_dpex.tests._helper import get_all_dtypes list_of_unary_ops = ["fabs", "exp", "log", "sqrt", "sin", "cos", "tan"] diff --git a/numba_dpex/tests/kernel_tests/test_memory_enum_compilation.py b/numba_dpex/tests/kernel_tests/test_memory_enum_compilation.py index 991e256784..a0c1e01214 100644 --- a/numba_dpex/tests/kernel_tests/test_memory_enum_compilation.py +++ b/numba_dpex/tests/kernel_tests/test_memory_enum_compilation.py @@ -4,7 +4,7 @@ import dpnp -import numba_dpex.experimental as exp_dpex +import numba_dpex as dpex from numba_dpex import Range from numba_dpex.kernel_api import AddressSpace, MemoryOrder, MemoryScope @@ -12,7 +12,7 @@ def test_compilation_of_memory_order(): """Tests if a MemoryOrder flags can be used inside a kernel function.""" - @exp_dpex.kernel + @dpex.kernel def store_memory_order_flag(a): a[0] = MemoryOrder.RELAXED a[1] = MemoryOrder.CONSUME_UNSUPPORTED @@ -22,7 +22,7 @@ def store_memory_order_flag(a): a[5] = MemoryOrder.SEQ_CST a = dpnp.ones(10, dtype=dpnp.int64) - exp_dpex.call_kernel(store_memory_order_flag, Range(10), a) + dpex.call_kernel(store_memory_order_flag, Range(10), a) assert a[0] == MemoryOrder.RELAXED assert a[1] == MemoryOrder.CONSUME_UNSUPPORTED @@ -35,7 +35,7 @@ def store_memory_order_flag(a): def test_compilation_of_memory_scope(): """Tests if a MemoryScope flags can be used inside a kernel function.""" - @exp_dpex.kernel + @dpex.kernel def store_memory_scope_flag(a): a[0] = MemoryScope.DEVICE a[1] = MemoryScope.SUB_GROUP @@ -44,7 +44,7 @@ def store_memory_scope_flag(a): a[4] = MemoryScope.WORK_ITEM a = dpnp.ones(10, dtype=dpnp.int64) - exp_dpex.call_kernel(store_memory_scope_flag, Range(10), a) + dpex.call_kernel(store_memory_scope_flag, Range(10), a) assert a[0] == MemoryScope.DEVICE assert a[1] == MemoryScope.SUB_GROUP @@ -56,7 +56,7 @@ def store_memory_scope_flag(a): def test_compilation_of_address_space(): """Tests if a AddressSpace flags can be used inside a kernel function.""" - @exp_dpex.kernel + @dpex.kernel def store_address_space_flag(a): a[0] = AddressSpace.CONSTANT a[1] = AddressSpace.GENERIC @@ -65,7 +65,7 @@ def store_address_space_flag(a): a[4] = AddressSpace.PRIVATE a = dpnp.ones(10, dtype=dpnp.int64) - exp_dpex.call_kernel(store_address_space_flag, Range(10), a) + dpex.call_kernel(store_address_space_flag, Range(10), a) assert a[0] == AddressSpace.CONSTANT assert a[1] == AddressSpace.GENERIC diff --git a/numba_dpex/tests/kernel_tests/test_print.py b/numba_dpex/tests/kernel_tests/test_print.py index 5e951d091a..88455e29d2 100644 --- a/numba_dpex/tests/kernel_tests/test_print.py +++ b/numba_dpex/tests/kernel_tests/test_print.py @@ -7,7 +7,7 @@ import pytest from numba.core.errors import TypingError -import numba_dpex.experimental as dpex +import numba_dpex as dpex list_of_dtypes = [ dpnp.int32, diff --git a/numba_dpex/tests/kernel_tests/test_private_array.py b/numba_dpex/tests/kernel_tests/test_private_array.py index 44370f30da..487c4da3d0 100644 --- a/numba_dpex/tests/kernel_tests/test_private_array.py +++ b/numba_dpex/tests/kernel_tests/test_private_array.py @@ -6,7 +6,7 @@ import numpy as np import pytest -import numba_dpex.experimental as dpex_exp +import numba_dpex as dpex from numba_dpex.kernel_api import Item, PrivateArray, Range from numba_dpex.kernel_api import call_kernel as kapi_call_kernel @@ -70,7 +70,7 @@ def private_2d_array_kernel(item: Item, a): ) @pytest.mark.parametrize( "call_kernel, decorator", - [(dpex_exp.call_kernel, dpex_exp.kernel), (kapi_call_kernel, lambda a: a)], + [(dpex.call_kernel, dpex.kernel), (kapi_call_kernel, lambda a: a)], ) def test_private_array(call_kernel, decorator, kernel): kernel = decorator(kernel) @@ -95,14 +95,14 @@ def test_private_array(call_kernel, decorator, kernel): ) def test_private_array_in_device_func(func): - _df = dpex_exp.device_func(func) + _df = dpex.device_func(func) - @dpex_exp.kernel + @dpex.kernel def _kernel(item: Item, a): _df(item, a) a = dpnp.empty(10, dtype=dpnp.float32) - dpex_exp.call_kernel(_kernel, Range(a.size), a) + dpex.call_kernel(_kernel, Range(a.size), a) # sum of squares from 1 to n: n*(n+1)*(2*n+1)/6 want = np.full(a.size, (9) * (9 + 1) * (2 * 9 + 1) / 6, dtype=np.float32) diff --git a/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py b/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py index ba4965bfd4..d593c7a0ac 100644 --- a/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py +++ b/numba_dpex/tests/kernel_tests/test_scalar_arg_types.py @@ -6,7 +6,7 @@ import numpy import pytest -import numba_dpex.experimental as dpex +import numba_dpex as dpex from numba_dpex.tests._helper import get_all_dtypes N = 1024 diff --git a/numba_dpex/tests/kernel_tests/test_strided_dpnp_array_in_kernel.py b/numba_dpex/tests/kernel_tests/test_strided_dpnp_array_in_kernel.py index 90e48356ba..09a9bf5429 100644 --- a/numba_dpex/tests/kernel_tests/test_strided_dpnp_array_in_kernel.py +++ b/numba_dpex/tests/kernel_tests/test_strided_dpnp_array_in_kernel.py @@ -8,7 +8,7 @@ import numpy as np import pytest -import numba_dpex.experimental as exp_dpex +import numba_dpex as dpex from numba_dpex.kernel_api import Item, Range @@ -34,7 +34,7 @@ def get_order(a): raise Exception("Unknown order/layout") -@exp_dpex.kernel +@dpex.kernel def change_values_1d(item: Item, x, v): """Assign values in a 1d dpnp.ndarray @@ -57,7 +57,7 @@ def change_values_1d_func(a, p): a[i] = p -@exp_dpex.kernel +@dpex.kernel def change_values_2d(item: Item, x, v): """Assign values in a 2d dpnp.ndarray @@ -82,7 +82,7 @@ def change_values_2d_func(a, p): a[i, j] = p -@exp_dpex.kernel +@dpex.kernel def change_values_3d(item: Item, x, v): """Assign values in a 3d dpnp.ndarray @@ -122,7 +122,7 @@ def test_1d_strided_dpnp_array_in_kernel(s): u = dpnp.asarray(t) v = u[::s] - exp_dpex.call_kernel(change_values_1d, Range(v.shape[0]), v, k) + dpex.call_kernel(change_values_1d, Range(v.shape[0]), v, k) x = t[::s] change_values_1d_func(x, k) @@ -148,7 +148,7 @@ def test_multievel_1d_strided_dpnp_array_in_kernel(s): v, x = u, t while v.shape[0] > 1: v = v[::s] - exp_dpex.call_kernel(change_values_1d, Range(v.shape[0]), v, k) + dpex.call_kernel(change_values_1d, Range(v.shape[0]), v, k) x = x[::s] change_values_1d_func(x, k) @@ -177,7 +177,7 @@ def test_2d_strided_dpnp_array_in_kernel(s1, s2, order): assert get_order(u) == order v = u[::s1, ::s2] - exp_dpex.call_kernel(change_values_2d, Range(*v.shape), v, k) + dpex.call_kernel(change_values_2d, Range(*v.shape), v, k) x = t[::s1, ::s2] change_values_2d_func(x, k) @@ -208,7 +208,7 @@ def test_multilevel_2d_strided_dpnp_array_in_kernel(s1, s2, order): v, x = u, t while v.shape[0] > 1 and v.shape[1] > 1: v = v[::s1, ::s2] - exp_dpex.call_kernel(change_values_2d, Range(*v.shape), v, k) + dpex.call_kernel(change_values_2d, Range(*v.shape), v, k) x = x[::s1, ::s2] change_values_2d_func(x, k) @@ -238,7 +238,7 @@ def test_3d_strided_dpnp_array_in_kernel(s1, s2, s3, order): assert get_order(u) == order v = u[::s1, ::s2, ::s3] - exp_dpex.call_kernel(change_values_3d, Range(*v.shape), v, k) + dpex.call_kernel(change_values_3d, Range(*v.shape), v, k) x = t[::s1, ::s2, ::s3] change_values_3d_func(x, k) @@ -270,7 +270,7 @@ def test_multilevel_3d_strided_dpnp_array_in_kernel(s1, s2, s3, order): v, x = u, t while v.shape[0] > 1 and v.shape[1] > 1 and v.shape[2] > 1: v = v[::s1, ::s2, ::s3] - exp_dpex.call_kernel(change_values_3d, Range(*v.shape), v, k) + dpex.call_kernel(change_values_3d, Range(*v.shape), v, k) x = x[::s1, ::s2, ::s3] change_values_3d_func(x, k) diff --git a/numba_dpex/tests/kernel_tests/test_supported_array_types_as_kernel_args.py b/numba_dpex/tests/kernel_tests/test_supported_array_types_as_kernel_args.py index 3c4c1be6d2..647ee9da67 100644 --- a/numba_dpex/tests/kernel_tests/test_supported_array_types_as_kernel_args.py +++ b/numba_dpex/tests/kernel_tests/test_supported_array_types_as_kernel_args.py @@ -8,7 +8,7 @@ import dpnp import pytest -import numba_dpex.experimental as dpex_exp +import numba_dpex as dpex from numba_dpex.kernel_api import Item, Range from numba_dpex.tests._helper import get_all_dtypes @@ -27,13 +27,13 @@ def input_array(request): return zeros(_SIZE, dtype=dtype) -@dpex_exp.kernel +@dpex.kernel def set_ones(item: Item, a): i = item.get_id(0) a[i] = 1 def test_fetch_add(input_array): - dpex_exp.call_kernel(set_ones, Range(_SIZE), input_array) + dpex.call_kernel(set_ones, Range(_SIZE), input_array) assert input_array[0] == 1 diff --git a/numba_dpex/tests/kernel_tests/test_sycl_usm_array_iface_interop.py b/numba_dpex/tests/kernel_tests/test_sycl_usm_array_iface_interop.py deleted file mode 100644 index 908350858d..0000000000 --- a/numba_dpex/tests/kernel_tests/test_sycl_usm_array_iface_interop.py +++ /dev/null @@ -1,115 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -import numpy as np -import pytest -from dpctl import tensor as dpt - -import numba_dpex as dpex -from numba_dpex.tests._helper import get_all_dtypes - - -class DuckUSMArray: - """A Python class that defines a __sycl_usm_array_interface__ attribute.""" - - def __init__(self, shape, dtype="d", host_buffer=None): - _tensor = dpt.empty(shape, dtype=dtype, usm_type="shared") - shmem = _tensor.usm_data - if isinstance(host_buffer, np.ndarray): - shmem.copy_from_host(host_buffer.view(dtype="|u1")) - self.arr = np.ndarray(shape, dtype=dtype, buffer=shmem) - - def __getitem__(self, indx): - return self.arr[indx] - - def __setitem__(self, indx, val): - self.arr.__setitem__(indx, val) - - @property - def __sycl_usm_array_interface__(self): - iface = self.arr.__array_interface__ - b = self.arr.base - iface["syclobj"] = b.__sycl_usm_array_interface__["syclobj"] - iface["version"] = 1 - return iface - - -class PseudoDuckUSMArray: - """A Python class that defines an attributed called - __sycl_usm_array_interface__, but is not actually backed by USM memory. - - """ - - def __init__(self): - pass - - @property - def __sycl_usm_array_interface__(self): - iface = {} - iface["syclobj"] = None - iface["version"] = 0 - return iface - - -@dpex.kernel -def vecadd(a, b, c): - i = dpex.get_global_id(0) - c[i] = a[i] + b[i] - - -dtypes = get_all_dtypes( - no_bool=True, no_float16=True, no_none=True, no_complex=True -) - - -@pytest.fixture(params=dtypes) -def dtype(request): - return request.param - - -def test_kernel_valid_usm_obj(dtype): - """Test if a ``numba_dpex.kernel`` function accepts a DuckUSMArray argument. - - The ``DuckUSMArray`` uses ``dpctl.memory`` to allocate a Python object that - defines a ``__sycl_usm_array_interface__`` attribute. We test if - ``numba_dpex`` recognizes the ``DuckUSMArray`` as a valid USM-backed Python - object and accepts it as a kernel argument. - - """ - N = 1024 - - buffA = np.arange(0, N, dtype=dtype) - buffB = np.arange(0, N, dtype=dtype) - buffC = np.zeros(N, dtype=dtype) - - A = DuckUSMArray(shape=buffA.shape, dtype=dtype, host_buffer=buffA) - B = DuckUSMArray(shape=buffB.shape, dtype=dtype, host_buffer=buffB) - C = DuckUSMArray(shape=buffC.shape, dtype=dtype, host_buffer=buffC) - - try: - dpex.call_kernel(vecadd, dpex.Range(N), A, B, C) - except Exception: - pytest.fail( - "Could not pass Python object with sycl_usm_array_interface" - + " to a kernel." - ) - - -def test_kernel_invalid_usm_obj(dtype): - """Test if a ``numba_dpex.kernel`` function rejects a PseudoDuckUSMArray - argument. - - The ``PseudoDuckUSMArray`` defines a fake attribute called - __sycl_usm_array__interface__. We test if - ``numba_dpex`` correctly recognizes and rejects the ``PseudoDuckUSMArray``. - - """ - N = 1024 - - A = PseudoDuckUSMArray() - B = PseudoDuckUSMArray() - C = PseudoDuckUSMArray() - - with pytest.raises(Exception): - dpex.call_kernel(vecadd, dpex.Range(N), A, B, C) diff --git a/numba_dpex/tests/kernel_tests/test_target_specific_overload.py b/numba_dpex/tests/kernel_tests/test_target_specific_overload.py index 3dddeadc4b..be34a122b4 100644 --- a/numba_dpex/tests/kernel_tests/test_target_specific_overload.py +++ b/numba_dpex/tests/kernel_tests/test_target_specific_overload.py @@ -5,7 +5,7 @@ import dpnp from numba.core.extending import overload -import numba_dpex.experimental as dpex_exp +import numba_dpex as dpex from numba_dpex.core.descriptor import dpex_kernel_target from numba_dpex.experimental.target import ( DPEX_KERNEL_EXP_TARGET_NAME, @@ -26,7 +26,7 @@ def ol_scalar_add_impl(a, b): return ol_scalar_add_impl -@dpex_exp.kernel +@dpex.kernel def kernel_calling_overload(item: Item, a, b, c): i = item.get_id(0) c[i] = scalar_add(a[i], b[i]) @@ -36,7 +36,7 @@ def kernel_calling_overload(item: Item, a, b, c): b = dpnp.ones(10, dtype=dpnp.int64) c = dpnp.zeros(10, dtype=dpnp.int64) -dpex_exp.call_kernel(kernel_calling_overload, Range(10), a, b, c) +dpex.call_kernel(kernel_calling_overload, Range(10), a, b, c) def test_end_to_end_overload_execution(): diff --git a/numba_dpex/tests/kernel_tests/test_usm_ndarray_args.py b/numba_dpex/tests/kernel_tests/test_usm_ndarray_args.py index fe1edb3f06..dfeed5955b 100644 --- a/numba_dpex/tests/kernel_tests/test_usm_ndarray_args.py +++ b/numba_dpex/tests/kernel_tests/test_usm_ndarray_args.py @@ -6,7 +6,7 @@ import numpy import pytest -import numba_dpex.experimental as dpex +import numba_dpex as dpex from numba_dpex.tests._helper import get_all_dtypes list_of_dtype = get_all_dtypes( diff --git a/numba_dpex/tests/kernel_tests/test_usm_ndarray_interop.py b/numba_dpex/tests/kernel_tests/test_usm_ndarray_interop.py deleted file mode 100644 index e6863e47e5..0000000000 --- a/numba_dpex/tests/kernel_tests/test_usm_ndarray_interop.py +++ /dev/null @@ -1,61 +0,0 @@ -# SPDX-FileCopyrightText: 2020 - 2024 Intel Corporation -# -# SPDX-License-Identifier: Apache-2.0 - -import dpctl.tensor as dpt -import numpy -import pytest - -import numba_dpex as dpex -from numba_dpex.tests._helper import get_all_dtypes - -list_of_dtype = get_all_dtypes( - no_bool=True, no_float16=True, no_none=True, no_complex=True -) - - -@pytest.fixture(params=list_of_dtype) -def dtype(request): - return request.param - - -list_of_usm_type = [ - "shared", - "device", - "host", -] - - -@pytest.fixture(params=list_of_usm_type) -def usm_type(request): - return request.param - - -def test_consuming_usm_ndarray(dtype, usm_type): - @dpex.kernel - def data_parallel_sum(a, b, c): - """ - Vector addition using the ``kernel`` decorator. - """ - i = dpex.get_global_id(0) - j = dpex.get_global_id(1) - c[i, j] = a[i, j] + b[i, j] - - N = 1000 - global_size = N * N - - a = dpt.arange(global_size, dtype=dtype, usm_type=usm_type) - a = dpt.reshape(a, shape=(N, N)) - - b = dpt.arange(global_size, dtype=dtype, usm_type=usm_type) - b = dpt.reshape(b, shape=(N, N)) - - c = dpt.empty_like(a) - - dpex.call_kernel(data_parallel_sum, dpex.Range(N, N), a, b, c) - - na = dpt.asnumpy(a) - nb = dpt.asnumpy(b) - nc = dpt.asnumpy(c) - - assert numpy.array_equal(nc, na + nb) diff --git a/numba_dpex/tests/misc/test_warnings.py b/numba_dpex/tests/misc/test_warnings.py index 247964eaca..b09d2fecc5 100644 --- a/numba_dpex/tests/misc/test_warnings.py +++ b/numba_dpex/tests/misc/test_warnings.py @@ -11,9 +11,9 @@ from numba_dpex.core import config -@dpex.kernel(enable_cache=False) -def foo(a): - a[dpex.get_global_id(0)] = 0 +@dpex.kernel +def foo(item, a): + a[item.get_id(0)] = 0 def test_inline_threshold_negative_val_warning_(): @@ -26,16 +26,6 @@ def test_inline_threshold_negative_val_warning_(): config.INLINE_THRESHOLD = bkp -def test_inline_threshold_gt_3_warning(): - bkp = config.INLINE_THRESHOLD - config.INLINE_THRESHOLD = 4 - - with pytest.warns(UserWarning): - dpex.call_kernel(foo, dpex.Range(10), dpnp.arange(10)) - - config.INLINE_THRESHOLD = bkp - - def test_no_warning(): with warnings.catch_warnings(): warnings.simplefilter("error") diff --git a/numba_dpex/tests/test_debuginfo.py b/numba_dpex/tests/test_debuginfo.py index 140b46a37a..e95e9e6d1f 100644 --- a/numba_dpex/tests/test_debuginfo.py +++ b/numba_dpex/tests/test_debuginfo.py @@ -7,15 +7,18 @@ import re import pytest +from numba.core import types import numba_dpex as dpex from numba_dpex import float32, int32, usm_ndarray from numba_dpex.core.descriptor import dpex_kernel_target +from numba_dpex.core.types.kernel_api.index_space_ids import ItemType from numba_dpex.tests._helper import override_config debug_options = [True, False] f32arrty = usm_ndarray(ndim=1, dtype=float32, layout="C") +itemty = ItemType(ndim=1) @pytest.fixture(params=debug_options) @@ -23,20 +26,6 @@ def debug_option(request): return request.param -def get_kernel_ir(fn, sig, debug=False): - kernel = dpex.core.kernel_interface.spirv_kernel.SpirvKernel( - fn, fn.__name__ - ) - kernel.compile( - args=sig, - target_ctx=dpex_kernel_target.target_context, - typing_ctx=dpex_kernel_target.typing_context, - debug=debug, - compile_flags=None, - ) - return kernel.llvm_module - - def make_check(ir, val_to_search): """ Check the compiled assembly for debuginfo. @@ -52,11 +41,15 @@ def test_debug_flag_generates_ir_with_debuginfo(debug_option): Check debug info is emitting to IR if debug parameter is set to True """ - def foo(x): - x = 1 # noqa + def foo(item, x): + i = item.get_id(0) + x[i] = 1 # noqa + + sig = (itemty, f32arrty) + disp = dpex.kernel(sig, debug=debug_option)(foo) + kcres = disp.get_compile_result(types.void(itemty, f32arrty)) + kernel_ir = kcres.library.get_llvm_str() - sig = (int32,) - kernel_ir = get_kernel_ir(foo, sig, debug=debug_option) tag = "!dbg" if debug_option: @@ -71,8 +64,8 @@ def test_debug_info_locals_vars_on_no_opt(): if debug parameter is set to True and optimization is O0 """ - def foo(var_a, var_b, var_c): - i = dpex.get_global_id(0) + def foo(item, var_a, var_b, var_c): + i = item.get_id(0) var_c[i] = var_a[i] + var_b[i] ir_tags = [ @@ -81,10 +74,14 @@ def foo(var_a, var_b, var_c): '!DILocalVariable(name: "var_c"', '!DILocalVariable(name: "i"', ] - sig = (f32arrty, f32arrty, f32arrty) + sig = (itemty, f32arrty, f32arrty, f32arrty) with override_config("OPT", 0): - kernel_ir = get_kernel_ir(foo, sig, debug=True) + disp = dpex.kernel(sig, debug=True)(foo) + kcres = disp.get_compile_result( + types.void(itemty, f32arrty, f32arrty, f32arrty) + ) + kernel_ir = kcres.library.get_llvm_str() for tag in ir_tags: assert tag in kernel_ir @@ -96,8 +93,8 @@ def test_debug_kernel_local_vars_in_ir(): created in kernel """ - def foo(arr): - index = dpex.get_global_id(0) + def foo(item, arr): + index = item.get_id(0) local_d = 9 * 99 + 5 arr[index] = local_d + 100 @@ -105,8 +102,10 @@ def foo(arr): '!DILocalVariable(name: "index"', '!DILocalVariable(name: "local_d"', ] - sig = (f32arrty,) - kernel_ir = get_kernel_ir(foo, sig, debug=True) + sig = (itemty, f32arrty) + disp = dpex.kernel(sig, debug=True)(foo) + kcres = disp.get_compile_result(types.void(itemty, f32arrty)) + kernel_ir = kcres.library.get_llvm_str() for tag in ir_tags: assert tag in kernel_ir @@ -117,13 +116,13 @@ def test_debug_flag_generates_ir_with_debuginfo_for_func(debug_option): Check debug info is emitting to IR if debug parameter is set to True """ - @dpex.func(debug=debug_option) + @dpex.device_func(debug=debug_option) def func_sum(a, b): result = a + b return result - def data_parallel_sum(a, b, c): - i = dpex.get_global_id(0) + def data_parallel_sum(item, a, b, c): + i = item.get_id(0) c[i] = func_sum(a[i], b[i]) ir_tags = [ @@ -131,9 +130,12 @@ def data_parallel_sum(a, b, c): r'\!DISubprogram\(name: ".*data_parallel_sum\$?\d*"', ] - sig = (f32arrty, f32arrty, f32arrty) - - kernel_ir = get_kernel_ir(data_parallel_sum, sig, debug=debug_option) + sig = (itemty, f32arrty, f32arrty, f32arrty) + disp = dpex.kernel(sig, debug=debug_option)(data_parallel_sum) + kcres = disp.get_compile_result( + types.void(itemty, f32arrty, f32arrty, f32arrty) + ) + kernel_ir = kcres.library.get_llvm_str() for tag in ir_tags: assert debug_option == make_check(kernel_ir, tag) @@ -144,13 +146,13 @@ def test_env_var_generates_ir_with_debuginfo_for_func(debug_option): Check debug info is emitting to IR if NUMBA_DPEX_DEBUGINFO is set to 1 """ - @dpex.func + @dpex.device_func(debug=debug_option) def func_sum(a, b): result = a + b return result - def data_parallel_sum(a, b, c): - i = dpex.get_global_id(0) + def data_parallel_sum(item, a, b, c): + i = item.get_id(0) c[i] = func_sum(a[i], b[i]) ir_tags = [ @@ -158,43 +160,52 @@ def data_parallel_sum(a, b, c): r'\!DISubprogram\(name: ".*data_parallel_sum\$\d*"', ] - sig = (f32arrty, f32arrty, f32arrty) + sig = (itemty, f32arrty, f32arrty, f32arrty) with override_config("DEBUGINFO_DEFAULT", int(debug_option)): - kernel_ir = get_kernel_ir(data_parallel_sum, sig) + disp = dpex.kernel(sig, debug=debug_option, inline_threshold=0)( + data_parallel_sum + ) + kcres = disp.get_compile_result( + types.void(itemty, f32arrty, f32arrty, f32arrty) + ) + kernel_ir = kcres.library.get_llvm_str() for tag in ir_tags: assert debug_option == make_check(kernel_ir, tag) def test_debuginfo_DISubprogram_linkageName(): - def func(a, b): - i = dpex.get_global_id(0) + def foo(item, a, b): + i = item.get_id(0) b[i] = a[i] ir_tags = [ - r'\!DISubprogram\(.*linkageName: ".*func.*"', + r'\!DISubprogram\(.*linkageName: ".*foo.*"', ] - sig = (f32arrty, f32arrty) - kernel_ir = get_kernel_ir(func, sig, debug=True) + sig = (itemty, f32arrty, f32arrty) + disp = dpex.kernel(sig, debug=debug_option)(foo) + kcres = disp.get_compile_result(types.void(itemty, f32arrty, f32arrty)) + kernel_ir = kcres.library.get_llvm_str() for tag in ir_tags: assert make_check(kernel_ir, tag) def test_debuginfo_DICompileUnit_language_and_producer(): - def func(a, b): - i = dpex.get_global_id(0) + def foo(item, a, b): + i = item.get_id(0) b[i] = a[i] ir_tags = [ r"\!DICompileUnit\(language: DW_LANG_C_plus_plus,", ] - sig = (f32arrty, f32arrty) - - kernel_ir = get_kernel_ir(func, sig, debug=True) + sig = (itemty, f32arrty, f32arrty) + disp = dpex.kernel(sig, debug=debug_option)(foo) + kcres = disp.get_compile_result(types.void(itemty, f32arrty, f32arrty)) + kernel_ir = kcres.library.get_llvm_str() for tag in ir_tags: assert make_check(kernel_ir, tag)