From 491adac8fb49b2f87078ba0d22179f6ff97762dc Mon Sep 17 00:00:00 2001 From: Yevhenii Havrylko Date: Wed, 2 Aug 2023 11:02:09 -0400 Subject: [PATCH 1/4] Remove atomics emulation and spirv-tools linking --- numba_dpex/config.py | 3 - numba_dpex/ocl/__init__.py | 2 - numba_dpex/ocl/atomics/__init__.py | 29 ---- numba_dpex/ocl/oclimpl.py | 161 ++---------------- numba_dpex/spirv_generator.py | 32 +--- .../tests/kernel_tests/test_atomic_op.py | 53 ++---- 6 files changed, 29 insertions(+), 251 deletions(-) diff --git a/numba_dpex/config.py b/numba_dpex/config.py index 609fbc2cc7..855c7748ac 100644 --- a/numba_dpex/config.py +++ b/numba_dpex/config.py @@ -53,9 +53,6 @@ def __getattr__(name): # Dump offload diagnostics OFFLOAD_DIAGNOSTICS = _readenv("NUMBA_DPEX_OFFLOAD_DIAGNOSTICS", int, 0) -# Activate Native floating point atomcis support for supported devices. -# Requires llvm-spirv supporting the FP atomics extension -NATIVE_FP_ATOMICS = _readenv("NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE", int, 0) # Emit debug info DEBUG = _readenv("NUMBA_DPEX_DEBUG", int, config.DEBUG) DEBUGINFO_DEFAULT = _readenv( diff --git a/numba_dpex/ocl/__init__.py b/numba_dpex/ocl/__init__.py index 9abb201da1..00fec5f515 100644 --- a/numba_dpex/ocl/__init__.py +++ b/numba_dpex/ocl/__init__.py @@ -1,5 +1,3 @@ # SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 - -from .atomics import atomic_support_present diff --git a/numba_dpex/ocl/atomics/__init__.py b/numba_dpex/ocl/atomics/__init__.py index 2043302ed7..00fec5f515 100644 --- a/numba_dpex/ocl/atomics/__init__.py +++ b/numba_dpex/ocl/atomics/__init__.py @@ -1,32 +1,3 @@ # SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation # # SPDX-License-Identifier: Apache-2.0 - -import os -import os.path - - -def atomic_support_present(): - if os.path.isfile( - os.path.join(os.path.dirname(__file__), "atomic_ops.spir") - ): - return True - else: - return False - - -def get_atomic_spirv_path(): - if atomic_support_present(): - return os.path.join(os.path.dirname(__file__), "atomic_ops.spir") - else: - return None - - -def read_atomic_spirv_file(): - path = get_atomic_spirv_path() - if path: - with open(path, "rb") as fin: - spirv = fin.read() - return spirv - else: - return None diff --git a/numba_dpex/ocl/oclimpl.py b/numba_dpex/ocl/oclimpl.py index ff84b5bb93..e47bb215c4 100644 --- a/numba_dpex/ocl/oclimpl.py +++ b/numba_dpex/ocl/oclimpl.py @@ -143,63 +143,6 @@ def sub_group_barrier_impl(context, builder, sig, args): return _void_value -def insert_and_call_atomic_fn( - context, builder, sig, fn_type, dtype, ptr, val, addrspace -): - ll_p = None - name = "" - if dtype.name == "float32": - ll_val = llvmir.FloatType() - ll_p = ll_val.as_pointer() - if fn_type == "add": - name = "numba_dpex_atomic_add_f32" - elif fn_type == "sub": - name = "numba_dpex_atomic_sub_f32" - else: - raise TypeError("Operation type is not supported %s" % (fn_type)) - elif dtype.name == "float64": - if True: - ll_val = llvmir.DoubleType() - ll_p = ll_val.as_pointer() - if fn_type == "add": - name = "numba_dpex_atomic_add_f64" - elif fn_type == "sub": - name = "numba_dpex_atomic_sub_f64" - else: - raise TypeError( - "Operation type is not supported %s" % (fn_type) - ) - else: - raise TypeError( - "Atomic operation is not supported for type %s" % (dtype.name) - ) - - if addrspace == address_space.LOCAL: - name = name + "_local" - else: - name = name + "_global" - - assert ll_p is not None - assert name != "" - ll_p.addrspace = address_space.GENERIC - - mod = builder.module - if sig.return_type == types.void: - llretty = llvmir.VoidType() - else: - llretty = context.get_value_type(sig.return_type) - - llargs = [ll_p, context.get_value_type(sig.args[2])] - fnty = llvmir.FunctionType(llretty, llargs) - - fn = cgutils.get_or_insert_function(mod, fnty, name) - fn.calling_convention = kernel_target.CC_SPIR_FUNC - - generic_ptr = context.addrspacecast(builder, ptr, address_space.GENERIC) - - return builder.call(fn, [generic_ptr, val]) - - def native_atomic_add(context, builder, sig, args): aryty, indty, valty = sig.args ary, inds, val = args @@ -282,27 +225,29 @@ def native_atomic_add(context, builder, sig, args): return builder.call(fn, fn_args) +def support_atomic(dtype: types.Type) -> bool: + # This check should be the same as described in sycl documentation: + # https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:atomic-references + # If atomic is not supported, it will be emulated by the sycl compiler. + return ( + dtype == types.int32 + or dtype == types.uint32 + or dtype == types.float32 + or dtype == types.int64 + or dtype == types.uint64 + or dtype == types.float64 + ) + + @lower(stubs.atomic.add, types.Array, types.intp, types.Any) @lower(stubs.atomic.add, types.Array, types.UniTuple, types.Any) @lower(stubs.atomic.add, types.Array, types.Tuple, types.Any) def atomic_add_tuple(context, builder, sig, args): - device_type = dpctl.get_current_queue().sycl_device.device_type dtype = sig.args[0].dtype - - if dtype == types.float32 or dtype == types.float64: - if ( - device_type == dpctl.device_type.gpu - and config.NATIVE_FP_ATOMICS == 1 - ): - return native_atomic_add(context, builder, sig, args) - else: - # Currently, DPCPP only supports native floating point - # atomics for GPUs. - return atomic_add(context, builder, sig, args, "add") - elif dtype == types.int32 or dtype == types.int64: + if support_atomic(dtype): return native_atomic_add(context, builder, sig, args) else: - raise TypeError("Atomic operation on unsupported type %s" % dtype) + raise TypeError(f"Atomic operation on unsupported type {dtype}") def atomic_sub_wrapper(context, builder, sig, args): @@ -337,81 +282,11 @@ def atomic_sub_wrapper(context, builder, sig, args): @lower(stubs.atomic.sub, types.Array, types.UniTuple, types.Any) @lower(stubs.atomic.sub, types.Array, types.Tuple, types.Any) def atomic_sub_tuple(context, builder, sig, args): - device_type = dpctl.get_current_queue().sycl_device.device_type dtype = sig.args[0].dtype - - if dtype == types.float32 or dtype == types.float64: - if ( - device_type == dpctl.device_type.gpu - and config.NATIVE_FP_ATOMICS == 1 - ): - return atomic_sub_wrapper(context, builder, sig, args) - else: - # Currently, DPCPP only supports native floating point - # atomics for GPUs. - return atomic_add(context, builder, sig, args, "sub") - elif dtype == types.int32 or dtype == types.int64: + if support_atomic(dtype): return atomic_sub_wrapper(context, builder, sig, args) else: - raise TypeError("Atomic operation on unsupported type %s" % dtype) - - -def atomic_add(context, builder, sig, args, name): - from .atomics import atomic_support_present - - if atomic_support_present(): - context.extra_compile_options[kernel_target.LINK_ATOMIC] = True - aryty, indty, valty = sig.args - ary, inds, val = args - dtype = aryty.dtype - - if indty == types.intp: - indices = [inds] # just a single integer - indty = [indty] - else: - indices = cgutils.unpack_tuple(builder, inds, count=len(indty)) - indices = [ - context.cast(builder, i, t, types.intp) - for t, i in zip(indty, indices) - ] - - if dtype != valty: - raise TypeError("expecting %s but got %s" % (dtype, valty)) - - if aryty.ndim != len(indty): - raise TypeError( - "indexing %d-D array with %d-D index" % (aryty.ndim, len(indty)) - ) - - lary = context.make_array(aryty)(context, builder, ary) - ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices) - - if isinstance(aryty, Array) and aryty.addrspace == address_space.LOCAL: - return insert_and_call_atomic_fn( - context, - builder, - sig, - name, - dtype, - ptr, - val, - address_space.LOCAL, - ) - else: - return insert_and_call_atomic_fn( - context, - builder, - sig, - name, - dtype, - ptr, - val, - address_space.GLOBAL, - ) - else: - raise ImportError( - "Atomic support is not present, can not perform atomic_add" - ) + raise TypeError(f"Atomic operation on unsupported type {dtype}") @lower(stubs.private.array, types.IntegerLiteral, types.Any) diff --git a/numba_dpex/spirv_generator.py b/numba_dpex/spirv_generator.py index e9c87d405b..75627edb77 100644 --- a/numba_dpex/spirv_generator.py +++ b/numba_dpex/spirv_generator.py @@ -5,7 +5,6 @@ """A wrapper to connect to the SPIR-V binaries (Tools, Translator).""" import os -import shutil import tempfile from subprocess import CalledProcessError, check_call @@ -75,14 +74,6 @@ def generate(self, llvm_spirv_args, ipath, opath): if config.DEBUG: llvm_spirv_flags.append("--spirv-debug-info-version=ocl-100") - if not config.NATIVE_FP_ATOMICS: - # Do NOT upgrade version unless you are 100% confident. Not all - # kernel outputs can be converted to higher version of spirv. - # That results in different spirv file versions. As next step - # requires linking of the result file and - # numba_dpex/ocl/atomics/atomic_ops.spir it will raise an error - # that two spirv files have different version and can't be linked - llvm_spirv_args = ["--spirv-max-version", "1.0"] + llvm_spirv_args llvm_spirv_tool = self._llvm_spirv() if config.DEBUG: @@ -102,17 +93,6 @@ def _llvm_spirv(): result = dls.get_llvm_spirv_path() return result - def link(self, opath, binaries): - """ - Link spirv modules. - - Args: - opath: Output file path of the linked final spirv. - binaries: Spirv modules to be linked. - """ - flags = ["--allow-partial-linkage"] - check_call(["spirv-link", *flags, "-o", opath, *binaries]) - class Module(object): def __init__(self, context, llvmir, llvmbc): @@ -162,15 +142,9 @@ def finalize(self): # Generate SPIR-V from "friendly" LLVM-based SPIR 2.0 spirv_path = self._track_temp_file("generated-spirv") - binary_paths = [spirv_path] - llvm_spirv_args = [] for key in list(self.context.extra_compile_options.keys()): - if key == LINK_ATOMIC: - from .ocl.atomics import get_atomic_spirv_path - - binary_paths.append(get_atomic_spirv_path()) - elif key == LLVM_SPIRV_ARGS: + if key == LLVM_SPIRV_ARGS: llvm_spirv_args = self.context.extra_compile_options[key] del self.context.extra_compile_options[key] @@ -194,10 +168,6 @@ def finalize(self): opath=spirv_path, ) - if len(binary_paths) > 1: - spirv_path = self._track_temp_file("linked-spirv") - self._cmd.link(spirv_path, binary_paths) - if config.SAVE_IR_FILES != 0: # Dump the llvmir and llvmbc in file with open("generated_spirv.spir", "wb") as f1: diff --git a/numba_dpex/tests/kernel_tests/test_atomic_op.py b/numba_dpex/tests/kernel_tests/test_atomic_op.py index 3f18b20e57..d4a28f8e1c 100644 --- a/numba_dpex/tests/kernel_tests/test_atomic_op.py +++ b/numba_dpex/tests/kernel_tests/test_atomic_op.py @@ -2,12 +2,10 @@ # # SPDX-License-Identifier: Apache-2.0 -import dpctl import dpnp as np import pytest import numba_dpex as dpex -from numba_dpex import config from numba_dpex.core.descriptor import dpex_kernel_target from numba_dpex.tests._helper import override_config @@ -66,13 +64,6 @@ def f(a): return dpex.kernel(f), request.param[1] -skip_no_atomic_support = pytest.mark.skipif( - not dpex.ocl.atomic_support_present(), - reason="No atomic support", -) - - -@skip_no_atomic_support def test_kernel_atomic_simple(input_arrays, kernel_result_pair): a, dtype = input_arrays() kernel, expected = kernel_result_pair @@ -111,7 +102,6 @@ def f(a): return f -@skip_no_atomic_support def test_kernel_atomic_local(input_arrays, return_list_of_op): a, dtype = input_arrays() op_type, expected = return_list_of_op @@ -148,7 +138,6 @@ def f(a): return dpex.kernel(f) -@skip_no_atomic_support def test_kernel_atomic_multi_dim( return_list_of_op, return_list_of_dim, return_dtype ): @@ -160,23 +149,6 @@ def test_kernel_atomic_multi_dim( assert a[0] == expected -skip_NATIVE_FP_ATOMICS_0 = pytest.mark.skipif( - not config.NATIVE_FP_ATOMICS, reason="Native FP atomics disabled" -) - - -def skip_if_disabled(*args): - return pytest.param(*args, marks=skip_NATIVE_FP_ATOMICS_0) - - -@skip_no_atomic_support -@pytest.mark.parametrize( - "NATIVE_FP_ATOMICS, expected_native_atomic_for_device", - [ - skip_if_disabled(1, lambda device: device != "opencl:cpu:0"), - (0, lambda device: False), - ], -) @pytest.mark.parametrize( "function_generator", [get_func_global, get_func_local] ) @@ -189,8 +161,6 @@ def skip_if_disabled(*args): ) @pytest.mark.parametrize("dtype", list_of_f_dtypes) def test_atomic_fp_native( - NATIVE_FP_ATOMICS, - expected_native_atomic_for_device, function_generator, operator_name, expected_spirv_function, @@ -206,16 +176,13 @@ def test_atomic_fp_native( for arg in args ] - with override_config("NATIVE_FP_ATOMICS", NATIVE_FP_ATOMICS): - kernel.compile( - args=argtypes, - debug=False, - compile_flags=None, - target_ctx=dpex_kernel_target.target_context, - typing_ctx=dpex_kernel_target.typing_context, - ) - - is_native_atomic = expected_spirv_function in kernel._llvm_module - assert is_native_atomic == expected_native_atomic_for_device( - dpctl.select_default_device().filter_string - ) + kernel.compile( + args=argtypes, + debug=False, + compile_flags=None, + target_ctx=dpex_kernel_target.target_context, + typing_ctx=dpex_kernel_target.typing_context, + ) + + # TODO: this may fail if code is generated for platform that emulates atomic support? + assert expected_spirv_function in kernel._llvm_module From b50a69c6ff9f0c194c0daedddd653f0910658caf Mon Sep 17 00:00:00 2001 From: Yevhenii Havrylko Date: Wed, 2 Aug 2023 11:17:13 -0400 Subject: [PATCH 2/4] Remove atomics emulation and spirv-tools from setup.py & environment --- Dockerfile | 2 +- conda-recipe/run_test.sh | 9 +- environment/coverage.yml | 1 - environment/docs.yml | 1 - numba_dpex/ocl/atomics/atomic_ops.cl | 143 --------------------------- setup.py | 82 --------------- 6 files changed, 2 insertions(+), 236 deletions(-) delete mode 100644 numba_dpex/ocl/atomics/atomic_ops.cl diff --git a/Dockerfile b/Dockerfile index 0e90599329..1bfa3f78ed 100644 --- a/Dockerfile +++ b/Dockerfile @@ -199,7 +199,7 @@ RUN \ --mount=type=bind,target=/opt/toolkit,source=/opt/toolkit,from=toolkit-dist \ export http_proxy=$http_proxy https_proxy=$https_proxy \ && apt-get update && apt-get install -y \ - spirv-tools spirv-headers \ + spirv-headers \ rsync \ && rm -rf /var/lib/apt/lists/* \ && rsync -a /opt/toolkit/bin/ /usr/local/bin/ \ diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh index 4454e3abae..80c7f6ba35 100755 --- a/conda-recipe/run_test.sh +++ b/conda-recipe/run_test.sh @@ -5,15 +5,8 @@ unset ONEAPI_DEVICE_SELECTOR for selector in $(python -c "import dpctl; print(\" \".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))") do - export "ONEAPI_DEVICE_SELECTOR=$selector" - unset NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 - + ONEAPI_DEVICE_SELECTOR=$selector \ pytest -q -ra --disable-warnings --pyargs numba_dpex -vv - - export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 - - pytest -q -ra --disable-warnings -vv \ - --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native done exit 0 diff --git a/environment/coverage.yml b/environment/coverage.yml index a950d49bf5..315233f37d 100644 --- a/environment/coverage.yml +++ b/environment/coverage.yml @@ -13,7 +13,6 @@ dependencies: - numba=0.57 - dpctl - dpnp - - spirv-tools - dpcpp-llvm-spirv - opencl_rt - coverage diff --git a/environment/docs.yml b/environment/docs.yml index ffde6d9943..14fa086643 100644 --- a/environment/docs.yml +++ b/environment/docs.yml @@ -13,7 +13,6 @@ dependencies: - numba=0.57 - dpctl - dpnp - - spirv-tools - dpcpp-llvm-spirv - opencl_rt - pip diff --git a/numba_dpex/ocl/atomics/atomic_ops.cl b/numba_dpex/ocl/atomics/atomic_ops.cl deleted file mode 100644 index 4e6ce925bd..0000000000 --- a/numba_dpex/ocl/atomics/atomic_ops.cl +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 - -/* OpenCL extension specification states extensions should define the extension - and to use it we need to enable is the following way using the pragma. - Link to specification: https://www.khronos.org/registry/OpenCL/specs/2.2/html/OpenCL_Ext.html -*/ -#ifdef cl_khr_int64_base_atomics - #pragma OPENCL EXTENSION cl_khr_int64_base_atomics: enable - #ifdef cl_khr_fp64 - #pragma OPENCL EXTENSION cl_khr_fp64: enable - - double numba_dpex_atomic_cmpxchg_f64_local(volatile __generic double *p, double cmp, double val) { - union { - ulong u64; - double f64; - } cmp_union, val_union, old_union; - - cmp_union.f64 = cmp; - val_union.f64 = val; - old_union.u64 = atom_cmpxchg((volatile __local ulong *) p, cmp_union.u64, val_union.u64); - return old_union.f64; - } - - double numba_dpex_atomic_cmpxchg_f64_global(volatile __generic double *p, double cmp, double val) { - union { - ulong u64; - double f64; - } cmp_union, val_union, old_union; - - cmp_union.f64 = cmp; - val_union.f64 = val; - old_union.u64 = atom_cmpxchg((volatile __global ulong *) p, cmp_union.u64, val_union.u64); - return old_union.f64; - } - - double numba_dpex_atomic_add_f64_local(volatile __generic double *p, double val) { - double found = *p; - double expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f64_local(p, expected, expected + val); - } while (found != expected); - return found; - } - - double numba_dpex_atomic_add_f64_global(volatile __generic double *p, double val) { - double found = *p; - double expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f64_global(p, expected, expected + val); - } while (found != expected); - return found; - } - - - double numba_dpex_atomic_sub_f64_local(volatile __generic double *p, double val) { - double found = *p; - double expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f64_local(p, expected, expected - val); - } while (found != expected); - return found; - } - - double numba_dpex_atomic_sub_f64_global(volatile __generic double *p, double val) { - double found = *p; - double expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f64_global(p, expected, expected - val); - } while (found != expected); - return found; - } - #endif -#endif - -float numba_dpex_atomic_cmpxchg_f32_local(volatile __generic float *p, float cmp, float val) { - union { - unsigned int u32; - float f32; - } cmp_union, val_union, old_union; - - cmp_union.f32 = cmp; - val_union.f32 = val; - old_union.u32 = atomic_cmpxchg((volatile __local unsigned int *) p, cmp_union.u32, val_union.u32); - return old_union.f32; -} - -float numba_dpex_atomic_cmpxchg_f32_global(volatile __generic float *p, float cmp, float val) { - union { - unsigned int u32; - float f32; - } cmp_union, val_union, old_union; - - cmp_union.f32 = cmp; - val_union.f32 = val; - old_union.u32 = atomic_cmpxchg((volatile __global unsigned int *) p, cmp_union.u32, val_union.u32); - return old_union.f32; -} - -float numba_dpex_atomic_add_f32_local(volatile __generic float *p, float val) { - float found = *p; - float expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f32_local(p, expected, expected + val); - } while (found != expected); - return found; -} - -float numba_dpex_atomic_add_f32_global(volatile __generic float *p, float val) { - float found = *p; - float expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f32_global(p, expected, expected + val); - } while (found != expected); - return found; -} - -float numba_dpex_atomic_sub_f32_local(volatile __generic float *p, float val) { - float found = *p; - float expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f32_local(p, expected, expected - val); - } while (found != expected); - return found; -} - -float numba_dpex_atomic_sub_f32_global(volatile __generic float *p, float val) { - float found = *p; - float expected; - do { - expected = found; - found = numba_dpex_atomic_cmpxchg_f32_global(p, expected, expected - val); - } while (found != expected); - return found; -} diff --git a/setup.py b/setup.py index 0c9eb684a1..1801ceccdc 100644 --- a/setup.py +++ b/setup.py @@ -3,16 +3,12 @@ # SPDX-License-Identifier: Apache-2.0 import os -import shutil -import subprocess import sys import sysconfig import dpctl import numba import numpy -import setuptools.command.develop as orig_develop -import setuptools.command.install as orig_install from setuptools import Extension, find_packages, setup import versioneer @@ -62,83 +58,6 @@ def get_ext_modules(): return ext_modules -class install(orig_install.install): - def run(self): - spirv_compile() - super().run() - - -class develop(orig_develop.develop): - def run(self): - spirv_compile() - super().run() - - -def _get_cmdclass(): - cmdclass = versioneer.get_cmdclass() - cmdclass["install"] = install - cmdclass["develop"] = develop - return cmdclass - - -def spirv_compile(): - if IS_LIN: - compiler = "icx" - if IS_WIN: - compiler = "clang.exe" - - clang_args = [ - compiler, - "-flto", - "-fveclib=none", - "-target", - "spir64-unknown-unknown", - "-c", - "-x", - "cl", - "-emit-llvm", - "-cl-std=CL2.0", - "-Xclang", - "-finclude-default-header", - "numba_dpex/ocl/atomics/atomic_ops.cl", - "-o", - "numba_dpex/ocl/atomics/atomic_ops.bc", - ] - spirv_args = [ - _llvm_spirv(), - "--spirv-max-version", - "1.0", - "numba_dpex/ocl/atomics/atomic_ops.bc", - "-o", - "numba_dpex/ocl/atomics/atomic_ops.spir", - ] - subprocess.check_call( - clang_args, - stderr=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - shell=False, - ) - subprocess.check_call( - spirv_args, - stderr=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - shell=False, - ) - - -def _llvm_spirv(): - """Return path to llvm-spirv executable.""" - - try: - import dpcpp_llvm_spirv as dls - except ImportError: - raise ImportError("Cannot import dpcpp-llvm-spirv package") - - result = dls.get_llvm_spirv_path() - - return result - - packages = find_packages( include=["numba_dpex", "numba_dpex.*", "_dpexrt_python"] ) @@ -151,7 +70,6 @@ def _llvm_spirv(): metadata = dict( name="numba-dpex", version=versioneer.get_version(), - cmdclass=_get_cmdclass(), description="An extension for Numba to add data-parallel offload capability", url="https://github.com/IntelPython/numba-dpex", packages=packages, From 10baaa017356cea00758663f8d59cdf9581e8748 Mon Sep 17 00:00:00 2001 From: Yevhenii Havrylko Date: Wed, 2 Aug 2023 11:17:57 -0400 Subject: [PATCH 3/4] Fix atomics example --- numba_dpex/examples/kernel/atomic_op.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/numba_dpex/examples/kernel/atomic_op.py b/numba_dpex/examples/kernel/atomic_op.py index 1ff8246dc6..49a5d74671 100644 --- a/numba_dpex/examples/kernel/atomic_op.py +++ b/numba_dpex/examples/kernel/atomic_op.py @@ -8,20 +8,28 @@ @ndpx.kernel -def atomic_reduction(a): +def atomic_reduction(a, res): + """Summarize all the items in a and writes it into res using atomic.add. + + :param a: array of values to get sum + :param res: result where to add all the items from a array. It must be preset to 0. + """ idx = ndpx.get_global_id(0) - ndpx.atomic.add(a, 0, a[idx]) + ndpx.atomic.add(res, 0, a[idx]) def main(): N = 10 - a = np.arange(N) + + # We are storing sum to the first element + a = np.arange(0, N) + res = np.zeros(1, dtype=a.dtype) print("Using device ...") print(a.device) - atomic_reduction[ndpx.Range(N)](a) - print("Reduction sum =", a[0]) + atomic_reduction[ndpx.Range(N)](a, res) + print("Reduction sum =", res[0]) print("Done...") From 167def82a010ae690e4041dc6de7bfa059ea454e Mon Sep 17 00:00:00 2001 From: Yevhenii Havrylko Date: Wed, 2 Aug 2023 11:18:17 -0400 Subject: [PATCH 4/4] Update atomics documentation --- docs/backups/user_guides/getting_started.rst | 4 +--- .../atomic-operations.rst | 15 --------------- docs/source/getting_started.rst | 6 +++--- .../kernel_programming/atomic-operations.rst | 17 ----------------- 4 files changed, 4 insertions(+), 38 deletions(-) diff --git a/docs/backups/user_guides/getting_started.rst b/docs/backups/user_guides/getting_started.rst index 795f1c0396..50b4b69534 100644 --- a/docs/backups/user_guides/getting_started.rst +++ b/docs/backups/user_guides/getting_started.rst @@ -11,7 +11,6 @@ Numba-dpex depends on following components: * dpnp 0.10.1 (`Intel Python DPNP`_) * `dpcpp-llvm-spirv`_ (SPIRV generation from LLVM IR) * `llvmdev`_ (LLVM IR generation) -* `spirv-tools`_ * `packaging`_ * `scipy`_ (for testing) * `pytest`_ (for testing) @@ -59,7 +58,7 @@ installed in conda environment: .. code-block:: bash export ONEAPI_ROOT=/opt/intel/oneapi - conda create -n numba-dpex-env -c ${ONEAPI_ROOT}/conda_channel python=3.7 dpctl dpnp numba spirv-tools dpcpp-llvm-spirv llvmdev pytest + conda create -n numba-dpex-env -c ${ONEAPI_ROOT}/conda_channel python=3.7 dpctl dpnp numba dpcpp-llvm-spirv llvmdev pytest conda activate numba-dpex-env Activate DPC++ compiler: @@ -150,7 +149,6 @@ Refer to :ref:`Docker ` section for more options. .. _`Intel Python dpnp`: https://github.com/IntelPython/dpnp .. _`dpcpp-llvm-spirv`: https://github.com/IntelPython/dpcpp-llvm-spirv .. _`llvmdev`: https://anaconda.org/intel/llvmdev -.. _`spirv-tools`: https://anaconda.org/intel/spirv-tools .. _`packaging`: https://packaging.pypa.io/ .. _`scipy`: https://anaconda.org/intel/scipy .. _`pytest`: https://docs.pytest.org diff --git a/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst b/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst index 95b5a060b4..3c9626e52c 100644 --- a/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst +++ b/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst @@ -21,21 +21,6 @@ Example usage of atomic operations The ``numba_dpex.atomic.add`` function is analogous to The ``numba.cuda.atomic.add`` provided by the ``numba.cuda`` backend. -Generating Native FP Atomics ----------------------------- -Numba-dpex supports generating native floating-point atomics. -This feature is experimental. Users will need to provide -the following environment variables to activate it. - - NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 - -Example command: - -.. code-block:: bash - - NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 \ - python program.py - Full examples ------------- diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index cdd6fc06da..da928df207 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -19,7 +19,7 @@ to get the latest production releases. .. code-block:: bash conda create -n numba-dpex-env \ - numba-dpex dpnp dpctl dpcpp-llvm-spirv spirv-tools \ + numba-dpex dpnp dpctl dpcpp-llvm-spirv \ -c intel -c conda-forge To try out the bleeding edge, the latest packages built from tip of the main @@ -28,7 +28,7 @@ source trunk can be installed from the ``dppy/label/dev`` conda channel. .. code-block:: bash conda create -n numba-dpex-env \ - numba-dpex dpnp dpctl dpcpp-llvm-spirv spirv-tools \ + numba-dpex dpnp dpctl dpcpp-llvm-spirv \ -c dppy/label/dev -c intel -c conda-forge @@ -70,7 +70,7 @@ first step. # Create a conda environment that hass needed dependencies installed conda create -n numba-dpex-env \ - dpctl dpnp numba spirv-tools dpcpp-llvm-spirv llvmdev pytest \ + dpctl dpnp numba dpcpp-llvm-spirv llvmdev pytest \ -c intel -c conda-forge # Activate the environment conda activate numba-dpex-env diff --git a/docs/source/user_guide/kernel_programming/atomic-operations.rst b/docs/source/user_guide/kernel_programming/atomic-operations.rst index 388e822ffc..07c2be4795 100644 --- a/docs/source/user_guide/kernel_programming/atomic-operations.rst +++ b/docs/source/user_guide/kernel_programming/atomic-operations.rst @@ -21,23 +21,6 @@ Example usage of atomic operations The ``numba_dpex.atomic.add`` function is analogous to The ``numba.cuda.atomic.add`` provided by the ``numba.cuda`` backend. -Generating Native FP Atomics ----------------------------- -Numba-dpex supports generating native floating-point atomics. -This feature is experimental. Users will need to provide -the following environment variables to activate it. - - NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 - NUMBA_DPEX_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv - -Example command: - -.. code-block:: bash - - NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 \ - NUMBA_DPEX_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv \ - python program.py - Full examples -------------