From 491adac8fb49b2f87078ba0d22179f6ff97762dc Mon Sep 17 00:00:00 2001
From: Yevhenii Havrylko <egavrilko@gmail.com>
Date: Wed, 2 Aug 2023 11:02:09 -0400
Subject: [PATCH 1/4] Remove atomics emulation and spirv-tools linking

---
 numba_dpex/config.py                          |   3 -
 numba_dpex/ocl/__init__.py                    |   2 -
 numba_dpex/ocl/atomics/__init__.py            |  29 ----
 numba_dpex/ocl/oclimpl.py                     | 161 ++----------------
 numba_dpex/spirv_generator.py                 |  32 +---
 .../tests/kernel_tests/test_atomic_op.py      |  53 ++----
 6 files changed, 29 insertions(+), 251 deletions(-)

diff --git a/numba_dpex/config.py b/numba_dpex/config.py
index 609fbc2cc7..855c7748ac 100644
--- a/numba_dpex/config.py
+++ b/numba_dpex/config.py
@@ -53,9 +53,6 @@ def __getattr__(name):
 # Dump offload diagnostics
 OFFLOAD_DIAGNOSTICS = _readenv("NUMBA_DPEX_OFFLOAD_DIAGNOSTICS", int, 0)
 
-# Activate Native floating point atomcis support for supported devices.
-# Requires llvm-spirv supporting the FP atomics extension
-NATIVE_FP_ATOMICS = _readenv("NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE", int, 0)
 # Emit debug info
 DEBUG = _readenv("NUMBA_DPEX_DEBUG", int, config.DEBUG)
 DEBUGINFO_DEFAULT = _readenv(
diff --git a/numba_dpex/ocl/__init__.py b/numba_dpex/ocl/__init__.py
index 9abb201da1..00fec5f515 100644
--- a/numba_dpex/ocl/__init__.py
+++ b/numba_dpex/ocl/__init__.py
@@ -1,5 +1,3 @@
 # SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
-
-from .atomics import atomic_support_present
diff --git a/numba_dpex/ocl/atomics/__init__.py b/numba_dpex/ocl/atomics/__init__.py
index 2043302ed7..00fec5f515 100644
--- a/numba_dpex/ocl/atomics/__init__.py
+++ b/numba_dpex/ocl/atomics/__init__.py
@@ -1,32 +1,3 @@
 # SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
-
-import os
-import os.path
-
-
-def atomic_support_present():
-    if os.path.isfile(
-        os.path.join(os.path.dirname(__file__), "atomic_ops.spir")
-    ):
-        return True
-    else:
-        return False
-
-
-def get_atomic_spirv_path():
-    if atomic_support_present():
-        return os.path.join(os.path.dirname(__file__), "atomic_ops.spir")
-    else:
-        return None
-
-
-def read_atomic_spirv_file():
-    path = get_atomic_spirv_path()
-    if path:
-        with open(path, "rb") as fin:
-            spirv = fin.read()
-        return spirv
-    else:
-        return None
diff --git a/numba_dpex/ocl/oclimpl.py b/numba_dpex/ocl/oclimpl.py
index ff84b5bb93..e47bb215c4 100644
--- a/numba_dpex/ocl/oclimpl.py
+++ b/numba_dpex/ocl/oclimpl.py
@@ -143,63 +143,6 @@ def sub_group_barrier_impl(context, builder, sig, args):
     return _void_value
 
 
-def insert_and_call_atomic_fn(
-    context, builder, sig, fn_type, dtype, ptr, val, addrspace
-):
-    ll_p = None
-    name = ""
-    if dtype.name == "float32":
-        ll_val = llvmir.FloatType()
-        ll_p = ll_val.as_pointer()
-        if fn_type == "add":
-            name = "numba_dpex_atomic_add_f32"
-        elif fn_type == "sub":
-            name = "numba_dpex_atomic_sub_f32"
-        else:
-            raise TypeError("Operation type is not supported %s" % (fn_type))
-    elif dtype.name == "float64":
-        if True:
-            ll_val = llvmir.DoubleType()
-            ll_p = ll_val.as_pointer()
-            if fn_type == "add":
-                name = "numba_dpex_atomic_add_f64"
-            elif fn_type == "sub":
-                name = "numba_dpex_atomic_sub_f64"
-            else:
-                raise TypeError(
-                    "Operation type is not supported %s" % (fn_type)
-                )
-    else:
-        raise TypeError(
-            "Atomic operation is not supported for type %s" % (dtype.name)
-        )
-
-    if addrspace == address_space.LOCAL:
-        name = name + "_local"
-    else:
-        name = name + "_global"
-
-    assert ll_p is not None
-    assert name != ""
-    ll_p.addrspace = address_space.GENERIC
-
-    mod = builder.module
-    if sig.return_type == types.void:
-        llretty = llvmir.VoidType()
-    else:
-        llretty = context.get_value_type(sig.return_type)
-
-    llargs = [ll_p, context.get_value_type(sig.args[2])]
-    fnty = llvmir.FunctionType(llretty, llargs)
-
-    fn = cgutils.get_or_insert_function(mod, fnty, name)
-    fn.calling_convention = kernel_target.CC_SPIR_FUNC
-
-    generic_ptr = context.addrspacecast(builder, ptr, address_space.GENERIC)
-
-    return builder.call(fn, [generic_ptr, val])
-
-
 def native_atomic_add(context, builder, sig, args):
     aryty, indty, valty = sig.args
     ary, inds, val = args
@@ -282,27 +225,29 @@ def native_atomic_add(context, builder, sig, args):
     return builder.call(fn, fn_args)
 
 
+def support_atomic(dtype: types.Type) -> bool:
+    # This check should be the same as described in sycl documentation:
+    # https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:atomic-references
+    # If atomic is not supported, it will be emulated by the sycl compiler.
+    return (
+        dtype == types.int32
+        or dtype == types.uint32
+        or dtype == types.float32
+        or dtype == types.int64
+        or dtype == types.uint64
+        or dtype == types.float64
+    )
+
+
 @lower(stubs.atomic.add, types.Array, types.intp, types.Any)
 @lower(stubs.atomic.add, types.Array, types.UniTuple, types.Any)
 @lower(stubs.atomic.add, types.Array, types.Tuple, types.Any)
 def atomic_add_tuple(context, builder, sig, args):
-    device_type = dpctl.get_current_queue().sycl_device.device_type
     dtype = sig.args[0].dtype
-
-    if dtype == types.float32 or dtype == types.float64:
-        if (
-            device_type == dpctl.device_type.gpu
-            and config.NATIVE_FP_ATOMICS == 1
-        ):
-            return native_atomic_add(context, builder, sig, args)
-        else:
-            # Currently, DPCPP only supports native floating point
-            # atomics for GPUs.
-            return atomic_add(context, builder, sig, args, "add")
-    elif dtype == types.int32 or dtype == types.int64:
+    if support_atomic(dtype):
         return native_atomic_add(context, builder, sig, args)
     else:
-        raise TypeError("Atomic operation on unsupported type %s" % dtype)
+        raise TypeError(f"Atomic operation on unsupported type {dtype}")
 
 
 def atomic_sub_wrapper(context, builder, sig, args):
@@ -337,81 +282,11 @@ def atomic_sub_wrapper(context, builder, sig, args):
 @lower(stubs.atomic.sub, types.Array, types.UniTuple, types.Any)
 @lower(stubs.atomic.sub, types.Array, types.Tuple, types.Any)
 def atomic_sub_tuple(context, builder, sig, args):
-    device_type = dpctl.get_current_queue().sycl_device.device_type
     dtype = sig.args[0].dtype
-
-    if dtype == types.float32 or dtype == types.float64:
-        if (
-            device_type == dpctl.device_type.gpu
-            and config.NATIVE_FP_ATOMICS == 1
-        ):
-            return atomic_sub_wrapper(context, builder, sig, args)
-        else:
-            # Currently, DPCPP only supports native floating point
-            # atomics for GPUs.
-            return atomic_add(context, builder, sig, args, "sub")
-    elif dtype == types.int32 or dtype == types.int64:
+    if support_atomic(dtype):
         return atomic_sub_wrapper(context, builder, sig, args)
     else:
-        raise TypeError("Atomic operation on unsupported type %s" % dtype)
-
-
-def atomic_add(context, builder, sig, args, name):
-    from .atomics import atomic_support_present
-
-    if atomic_support_present():
-        context.extra_compile_options[kernel_target.LINK_ATOMIC] = True
-        aryty, indty, valty = sig.args
-        ary, inds, val = args
-        dtype = aryty.dtype
-
-        if indty == types.intp:
-            indices = [inds]  # just a single integer
-            indty = [indty]
-        else:
-            indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
-            indices = [
-                context.cast(builder, i, t, types.intp)
-                for t, i in zip(indty, indices)
-            ]
-
-        if dtype != valty:
-            raise TypeError("expecting %s but got %s" % (dtype, valty))
-
-        if aryty.ndim != len(indty):
-            raise TypeError(
-                "indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))
-            )
-
-        lary = context.make_array(aryty)(context, builder, ary)
-        ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)
-
-        if isinstance(aryty, Array) and aryty.addrspace == address_space.LOCAL:
-            return insert_and_call_atomic_fn(
-                context,
-                builder,
-                sig,
-                name,
-                dtype,
-                ptr,
-                val,
-                address_space.LOCAL,
-            )
-        else:
-            return insert_and_call_atomic_fn(
-                context,
-                builder,
-                sig,
-                name,
-                dtype,
-                ptr,
-                val,
-                address_space.GLOBAL,
-            )
-    else:
-        raise ImportError(
-            "Atomic support is not present, can not perform atomic_add"
-        )
+        raise TypeError(f"Atomic operation on unsupported type {dtype}")
 
 
 @lower(stubs.private.array, types.IntegerLiteral, types.Any)
diff --git a/numba_dpex/spirv_generator.py b/numba_dpex/spirv_generator.py
index e9c87d405b..75627edb77 100644
--- a/numba_dpex/spirv_generator.py
+++ b/numba_dpex/spirv_generator.py
@@ -5,7 +5,6 @@
 """A wrapper to connect to the SPIR-V binaries (Tools, Translator)."""
 
 import os
-import shutil
 import tempfile
 from subprocess import CalledProcessError, check_call
 
@@ -75,14 +74,6 @@ def generate(self, llvm_spirv_args, ipath, opath):
         if config.DEBUG:
             llvm_spirv_flags.append("--spirv-debug-info-version=ocl-100")
 
-        if not config.NATIVE_FP_ATOMICS:
-            # Do NOT upgrade version unless you are 100% confident. Not all
-            # kernel outputs can be converted to higher version of spirv.
-            # That results in different spirv file versions. As next step
-            # requires linking of the result file and
-            # numba_dpex/ocl/atomics/atomic_ops.spir it will raise an error
-            # that two spirv files have different version and can't be linked
-            llvm_spirv_args = ["--spirv-max-version", "1.0"] + llvm_spirv_args
         llvm_spirv_tool = self._llvm_spirv()
 
         if config.DEBUG:
@@ -102,17 +93,6 @@ def _llvm_spirv():
         result = dls.get_llvm_spirv_path()
         return result
 
-    def link(self, opath, binaries):
-        """
-        Link spirv modules.
-
-        Args:
-            opath: Output file path of the linked final spirv.
-            binaries: Spirv modules to be linked.
-        """
-        flags = ["--allow-partial-linkage"]
-        check_call(["spirv-link", *flags, "-o", opath, *binaries])
-
 
 class Module(object):
     def __init__(self, context, llvmir, llvmbc):
@@ -162,15 +142,9 @@ def finalize(self):
         # Generate SPIR-V from "friendly" LLVM-based SPIR 2.0
         spirv_path = self._track_temp_file("generated-spirv")
 
-        binary_paths = [spirv_path]
-
         llvm_spirv_args = []
         for key in list(self.context.extra_compile_options.keys()):
-            if key == LINK_ATOMIC:
-                from .ocl.atomics import get_atomic_spirv_path
-
-                binary_paths.append(get_atomic_spirv_path())
-            elif key == LLVM_SPIRV_ARGS:
+            if key == LLVM_SPIRV_ARGS:
                 llvm_spirv_args = self.context.extra_compile_options[key]
             del self.context.extra_compile_options[key]
 
@@ -194,10 +168,6 @@ def finalize(self):
             opath=spirv_path,
         )
 
-        if len(binary_paths) > 1:
-            spirv_path = self._track_temp_file("linked-spirv")
-            self._cmd.link(spirv_path, binary_paths)
-
         if config.SAVE_IR_FILES != 0:
             # Dump the llvmir and llvmbc in file
             with open("generated_spirv.spir", "wb") as f1:
diff --git a/numba_dpex/tests/kernel_tests/test_atomic_op.py b/numba_dpex/tests/kernel_tests/test_atomic_op.py
index 3f18b20e57..d4a28f8e1c 100644
--- a/numba_dpex/tests/kernel_tests/test_atomic_op.py
+++ b/numba_dpex/tests/kernel_tests/test_atomic_op.py
@@ -2,12 +2,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import dpctl
 import dpnp as np
 import pytest
 
 import numba_dpex as dpex
-from numba_dpex import config
 from numba_dpex.core.descriptor import dpex_kernel_target
 from numba_dpex.tests._helper import override_config
 
@@ -66,13 +64,6 @@ def f(a):
     return dpex.kernel(f), request.param[1]
 
 
-skip_no_atomic_support = pytest.mark.skipif(
-    not dpex.ocl.atomic_support_present(),
-    reason="No atomic support",
-)
-
-
-@skip_no_atomic_support
 def test_kernel_atomic_simple(input_arrays, kernel_result_pair):
     a, dtype = input_arrays()
     kernel, expected = kernel_result_pair
@@ -111,7 +102,6 @@ def f(a):
     return f
 
 
-@skip_no_atomic_support
 def test_kernel_atomic_local(input_arrays, return_list_of_op):
     a, dtype = input_arrays()
     op_type, expected = return_list_of_op
@@ -148,7 +138,6 @@ def f(a):
     return dpex.kernel(f)
 
 
-@skip_no_atomic_support
 def test_kernel_atomic_multi_dim(
     return_list_of_op, return_list_of_dim, return_dtype
 ):
@@ -160,23 +149,6 @@ def test_kernel_atomic_multi_dim(
     assert a[0] == expected
 
 
-skip_NATIVE_FP_ATOMICS_0 = pytest.mark.skipif(
-    not config.NATIVE_FP_ATOMICS, reason="Native FP atomics disabled"
-)
-
-
-def skip_if_disabled(*args):
-    return pytest.param(*args, marks=skip_NATIVE_FP_ATOMICS_0)
-
-
-@skip_no_atomic_support
-@pytest.mark.parametrize(
-    "NATIVE_FP_ATOMICS, expected_native_atomic_for_device",
-    [
-        skip_if_disabled(1, lambda device: device != "opencl:cpu:0"),
-        (0, lambda device: False),
-    ],
-)
 @pytest.mark.parametrize(
     "function_generator", [get_func_global, get_func_local]
 )
@@ -189,8 +161,6 @@ def skip_if_disabled(*args):
 )
 @pytest.mark.parametrize("dtype", list_of_f_dtypes)
 def test_atomic_fp_native(
-    NATIVE_FP_ATOMICS,
-    expected_native_atomic_for_device,
     function_generator,
     operator_name,
     expected_spirv_function,
@@ -206,16 +176,13 @@ def test_atomic_fp_native(
         for arg in args
     ]
 
-    with override_config("NATIVE_FP_ATOMICS", NATIVE_FP_ATOMICS):
-        kernel.compile(
-            args=argtypes,
-            debug=False,
-            compile_flags=None,
-            target_ctx=dpex_kernel_target.target_context,
-            typing_ctx=dpex_kernel_target.typing_context,
-        )
-
-        is_native_atomic = expected_spirv_function in kernel._llvm_module
-        assert is_native_atomic == expected_native_atomic_for_device(
-            dpctl.select_default_device().filter_string
-        )
+    kernel.compile(
+        args=argtypes,
+        debug=False,
+        compile_flags=None,
+        target_ctx=dpex_kernel_target.target_context,
+        typing_ctx=dpex_kernel_target.typing_context,
+    )
+
+    # TODO: this may fail if code is generated for platform that emulates atomic support?
+    assert expected_spirv_function in kernel._llvm_module

From b50a69c6ff9f0c194c0daedddd653f0910658caf Mon Sep 17 00:00:00 2001
From: Yevhenii Havrylko <egavrilko@gmail.com>
Date: Wed, 2 Aug 2023 11:17:13 -0400
Subject: [PATCH 2/4] Remove atomics emulation and spirv-tools from setup.py &
 environment

---
 Dockerfile                           |   2 +-
 conda-recipe/run_test.sh             |   9 +-
 environment/coverage.yml             |   1 -
 environment/docs.yml                 |   1 -
 numba_dpex/ocl/atomics/atomic_ops.cl | 143 ---------------------------
 setup.py                             |  82 ---------------
 6 files changed, 2 insertions(+), 236 deletions(-)
 delete mode 100644 numba_dpex/ocl/atomics/atomic_ops.cl

diff --git a/Dockerfile b/Dockerfile
index 0e90599329..1bfa3f78ed 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -199,7 +199,7 @@ RUN \
     --mount=type=bind,target=/opt/toolkit,source=/opt/toolkit,from=toolkit-dist \
     export http_proxy=$http_proxy https_proxy=$https_proxy \
     && apt-get update && apt-get install -y \
-    spirv-tools spirv-headers \
+    spirv-headers \
     rsync \
     && rm -rf /var/lib/apt/lists/* \
     && rsync -a /opt/toolkit/bin/ /usr/local/bin/ \
diff --git a/conda-recipe/run_test.sh b/conda-recipe/run_test.sh
index 4454e3abae..80c7f6ba35 100755
--- a/conda-recipe/run_test.sh
+++ b/conda-recipe/run_test.sh
@@ -5,15 +5,8 @@ unset ONEAPI_DEVICE_SELECTOR
 
 for selector in $(python -c "import dpctl; print(\" \".join([dev.backend.name+\":\"+dev.device_type.name for dev in dpctl.get_devices() if dev.device_type.name in [\"cpu\",\"gpu\"]]))")
 do
-    export "ONEAPI_DEVICE_SELECTOR=$selector"
-    unset NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
-
+    ONEAPI_DEVICE_SELECTOR=$selector \
     pytest -q -ra --disable-warnings --pyargs numba_dpex -vv
-
-    export NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
-
-    pytest -q -ra --disable-warnings -vv \
-        --pyargs numba_dpex.tests.kernel_tests.test_atomic_op::test_atomic_fp_native
 done
 
 exit 0
diff --git a/environment/coverage.yml b/environment/coverage.yml
index a950d49bf5..315233f37d 100644
--- a/environment/coverage.yml
+++ b/environment/coverage.yml
@@ -13,7 +13,6 @@ dependencies:
   - numba=0.57
   - dpctl
   - dpnp
-  - spirv-tools
   - dpcpp-llvm-spirv
   - opencl_rt
   - coverage
diff --git a/environment/docs.yml b/environment/docs.yml
index ffde6d9943..14fa086643 100644
--- a/environment/docs.yml
+++ b/environment/docs.yml
@@ -13,7 +13,6 @@ dependencies:
   - numba=0.57
   - dpctl
   - dpnp
-  - spirv-tools
   - dpcpp-llvm-spirv
   - opencl_rt
   - pip
diff --git a/numba_dpex/ocl/atomics/atomic_ops.cl b/numba_dpex/ocl/atomics/atomic_ops.cl
deleted file mode 100644
index 4e6ce925bd..0000000000
--- a/numba_dpex/ocl/atomics/atomic_ops.cl
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-FileCopyrightText: 2020 - 2022 Intel Corporation
-//
-// SPDX-License-Identifier: Apache-2.0
-
-/* OpenCL extension specification states extensions should define the extension
-   and to use it we need to enable is the following way using the pragma.
-   Link to specification: https://www.khronos.org/registry/OpenCL/specs/2.2/html/OpenCL_Ext.html
-*/
-#ifdef cl_khr_int64_base_atomics
-  #pragma OPENCL EXTENSION cl_khr_int64_base_atomics: enable
-  #ifdef cl_khr_fp64
-    #pragma OPENCL EXTENSION cl_khr_fp64: enable
-
-    double numba_dpex_atomic_cmpxchg_f64_local(volatile __generic double *p, double cmp, double val) {
-        union {
-            ulong  u64;
-            double f64;
-        } cmp_union, val_union, old_union;
-
-        cmp_union.f64 = cmp;
-        val_union.f64 = val;
-        old_union.u64 = atom_cmpxchg((volatile __local ulong *) p, cmp_union.u64, val_union.u64);
-        return old_union.f64;
-    }
-
-    double numba_dpex_atomic_cmpxchg_f64_global(volatile __generic double *p, double cmp, double val) {
-        union {
-            ulong  u64;
-            double f64;
-        } cmp_union, val_union, old_union;
-
-        cmp_union.f64 = cmp;
-        val_union.f64 = val;
-        old_union.u64 = atom_cmpxchg((volatile __global ulong *) p, cmp_union.u64, val_union.u64);
-        return old_union.f64;
-    }
-
-    double numba_dpex_atomic_add_f64_local(volatile __generic double *p, double val) {
-        double  found = *p;
-        double  expected;
-        do {
-            expected = found;
-            found = numba_dpex_atomic_cmpxchg_f64_local(p, expected, expected + val);
-        } while (found != expected);
-        return found;
-    }
-
-    double numba_dpex_atomic_add_f64_global(volatile __generic double *p, double val) {
-        double  found = *p;
-        double  expected;
-        do {
-            expected = found;
-            found = numba_dpex_atomic_cmpxchg_f64_global(p, expected, expected + val);
-        } while (found != expected);
-        return found;
-    }
-
-
-    double numba_dpex_atomic_sub_f64_local(volatile __generic double *p, double val) {
-        double  found = *p;
-        double  expected;
-        do {
-            expected = found;
-            found = numba_dpex_atomic_cmpxchg_f64_local(p, expected, expected - val);
-        } while (found != expected);
-        return found;
-    }
-
-    double numba_dpex_atomic_sub_f64_global(volatile __generic double *p, double val) {
-        double  found = *p;
-        double  expected;
-        do {
-            expected = found;
-            found = numba_dpex_atomic_cmpxchg_f64_global(p, expected, expected - val);
-        } while (found != expected);
-        return found;
-    }
-  #endif
-#endif
-
-float numba_dpex_atomic_cmpxchg_f32_local(volatile __generic float *p, float cmp, float val) {
-    union {
-        unsigned int u32;
-        float        f32;
-    } cmp_union, val_union, old_union;
-
-    cmp_union.f32 = cmp;
-    val_union.f32 = val;
-    old_union.u32 = atomic_cmpxchg((volatile __local unsigned int *) p, cmp_union.u32, val_union.u32);
-    return old_union.f32;
-}
-
-float numba_dpex_atomic_cmpxchg_f32_global(volatile __generic float *p, float cmp, float val) {
-    union {
-        unsigned int u32;
-        float        f32;
-    } cmp_union, val_union, old_union;
-
-    cmp_union.f32 = cmp;
-    val_union.f32 = val;
-    old_union.u32 = atomic_cmpxchg((volatile __global unsigned int *) p, cmp_union.u32, val_union.u32);
-    return old_union.f32;
-}
-
-float numba_dpex_atomic_add_f32_local(volatile __generic float *p, float val) {
-    float found = *p;
-    float expected;
-    do {
-        expected = found;
-        found = numba_dpex_atomic_cmpxchg_f32_local(p, expected, expected + val);
-    } while (found != expected);
-    return found;
-}
-
-float numba_dpex_atomic_add_f32_global(volatile __generic float *p, float val) {
-    float found = *p;
-    float expected;
-    do {
-        expected = found;
-        found = numba_dpex_atomic_cmpxchg_f32_global(p, expected, expected + val);
-    } while (found != expected);
-    return found;
-}
-
-float numba_dpex_atomic_sub_f32_local(volatile __generic float *p, float val) {
-    float found = *p;
-    float expected;
-    do {
-        expected = found;
-        found = numba_dpex_atomic_cmpxchg_f32_local(p, expected, expected - val);
-    } while (found != expected);
-    return found;
-}
-
-float numba_dpex_atomic_sub_f32_global(volatile __generic float *p, float val) {
-    float found = *p;
-    float expected;
-    do {
-        expected = found;
-        found = numba_dpex_atomic_cmpxchg_f32_global(p, expected, expected - val);
-    } while (found != expected);
-    return found;
-}
diff --git a/setup.py b/setup.py
index 0c9eb684a1..1801ceccdc 100644
--- a/setup.py
+++ b/setup.py
@@ -3,16 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
-import shutil
-import subprocess
 import sys
 import sysconfig
 
 import dpctl
 import numba
 import numpy
-import setuptools.command.develop as orig_develop
-import setuptools.command.install as orig_install
 from setuptools import Extension, find_packages, setup
 
 import versioneer
@@ -62,83 +58,6 @@ def get_ext_modules():
     return ext_modules
 
 
-class install(orig_install.install):
-    def run(self):
-        spirv_compile()
-        super().run()
-
-
-class develop(orig_develop.develop):
-    def run(self):
-        spirv_compile()
-        super().run()
-
-
-def _get_cmdclass():
-    cmdclass = versioneer.get_cmdclass()
-    cmdclass["install"] = install
-    cmdclass["develop"] = develop
-    return cmdclass
-
-
-def spirv_compile():
-    if IS_LIN:
-        compiler = "icx"
-    if IS_WIN:
-        compiler = "clang.exe"
-
-    clang_args = [
-        compiler,
-        "-flto",
-        "-fveclib=none",
-        "-target",
-        "spir64-unknown-unknown",
-        "-c",
-        "-x",
-        "cl",
-        "-emit-llvm",
-        "-cl-std=CL2.0",
-        "-Xclang",
-        "-finclude-default-header",
-        "numba_dpex/ocl/atomics/atomic_ops.cl",
-        "-o",
-        "numba_dpex/ocl/atomics/atomic_ops.bc",
-    ]
-    spirv_args = [
-        _llvm_spirv(),
-        "--spirv-max-version",
-        "1.0",
-        "numba_dpex/ocl/atomics/atomic_ops.bc",
-        "-o",
-        "numba_dpex/ocl/atomics/atomic_ops.spir",
-    ]
-    subprocess.check_call(
-        clang_args,
-        stderr=subprocess.DEVNULL,
-        stdout=subprocess.DEVNULL,
-        shell=False,
-    )
-    subprocess.check_call(
-        spirv_args,
-        stderr=subprocess.DEVNULL,
-        stdout=subprocess.DEVNULL,
-        shell=False,
-    )
-
-
-def _llvm_spirv():
-    """Return path to llvm-spirv executable."""
-
-    try:
-        import dpcpp_llvm_spirv as dls
-    except ImportError:
-        raise ImportError("Cannot import dpcpp-llvm-spirv package")
-
-    result = dls.get_llvm_spirv_path()
-
-    return result
-
-
 packages = find_packages(
     include=["numba_dpex", "numba_dpex.*", "_dpexrt_python"]
 )
@@ -151,7 +70,6 @@ def _llvm_spirv():
 metadata = dict(
     name="numba-dpex",
     version=versioneer.get_version(),
-    cmdclass=_get_cmdclass(),
     description="An extension for Numba to add data-parallel offload capability",
     url="https://github.com/IntelPython/numba-dpex",
     packages=packages,

From 10baaa017356cea00758663f8d59cdf9581e8748 Mon Sep 17 00:00:00 2001
From: Yevhenii Havrylko <egavrilko@gmail.com>
Date: Wed, 2 Aug 2023 11:17:57 -0400
Subject: [PATCH 3/4] Fix atomics example

---
 numba_dpex/examples/kernel/atomic_op.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/numba_dpex/examples/kernel/atomic_op.py b/numba_dpex/examples/kernel/atomic_op.py
index 1ff8246dc6..49a5d74671 100644
--- a/numba_dpex/examples/kernel/atomic_op.py
+++ b/numba_dpex/examples/kernel/atomic_op.py
@@ -8,20 +8,28 @@
 
 
 @ndpx.kernel
-def atomic_reduction(a):
+def atomic_reduction(a, res):
+    """Summarize all the items in a and writes it into res using atomic.add.
+
+    :param a: array of values to get sum
+    :param res: result where to add all the items from a array. It must be preset to 0.
+    """
     idx = ndpx.get_global_id(0)
-    ndpx.atomic.add(a, 0, a[idx])
+    ndpx.atomic.add(res, 0, a[idx])
 
 
 def main():
     N = 10
-    a = np.arange(N)
+
+    # We are storing sum to the first element
+    a = np.arange(0, N)
+    res = np.zeros(1, dtype=a.dtype)
 
     print("Using device ...")
     print(a.device)
 
-    atomic_reduction[ndpx.Range(N)](a)
-    print("Reduction sum =", a[0])
+    atomic_reduction[ndpx.Range(N)](a, res)
+    print("Reduction sum =", res[0])
 
     print("Done...")
 

From 167def82a010ae690e4041dc6de7bfa059ea454e Mon Sep 17 00:00:00 2001
From: Yevhenii Havrylko <egavrilko@gmail.com>
Date: Wed, 2 Aug 2023 11:18:17 -0400
Subject: [PATCH 4/4] Update atomics documentation

---
 docs/backups/user_guides/getting_started.rst    |  4 +---
 .../atomic-operations.rst                       | 15 ---------------
 docs/source/getting_started.rst                 |  6 +++---
 .../kernel_programming/atomic-operations.rst    | 17 -----------------
 4 files changed, 4 insertions(+), 38 deletions(-)

diff --git a/docs/backups/user_guides/getting_started.rst b/docs/backups/user_guides/getting_started.rst
index 795f1c0396..50b4b69534 100644
--- a/docs/backups/user_guides/getting_started.rst
+++ b/docs/backups/user_guides/getting_started.rst
@@ -11,7 +11,6 @@ Numba-dpex depends on following components:
 * dpnp 0.10.1 (`Intel Python DPNP`_)
 * `dpcpp-llvm-spirv`_ (SPIRV generation from LLVM IR)
 * `llvmdev`_ (LLVM IR generation)
-* `spirv-tools`_
 * `packaging`_
 * `scipy`_ (for testing)
 * `pytest`_ (for testing)
@@ -59,7 +58,7 @@ installed in conda environment:
 .. code-block:: bash
 
     export ONEAPI_ROOT=/opt/intel/oneapi
-    conda create -n numba-dpex-env -c ${ONEAPI_ROOT}/conda_channel python=3.7 dpctl dpnp numba spirv-tools dpcpp-llvm-spirv llvmdev pytest
+    conda create -n numba-dpex-env -c ${ONEAPI_ROOT}/conda_channel python=3.7 dpctl dpnp numba dpcpp-llvm-spirv llvmdev pytest
     conda activate numba-dpex-env
 
 Activate DPC++ compiler:
@@ -150,7 +149,6 @@ Refer to :ref:`Docker <docker>` section for more options.
 .. _`Intel Python dpnp`: https://github.com/IntelPython/dpnp
 .. _`dpcpp-llvm-spirv`: https://github.com/IntelPython/dpcpp-llvm-spirv
 .. _`llvmdev`: https://anaconda.org/intel/llvmdev
-.. _`spirv-tools`: https://anaconda.org/intel/spirv-tools
 .. _`packaging`: https://packaging.pypa.io/
 .. _`scipy`: https://anaconda.org/intel/scipy
 .. _`pytest`: https://docs.pytest.org
diff --git a/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst b/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst
index 95b5a060b4..3c9626e52c 100644
--- a/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst
+++ b/docs/backups/user_guides/kernel_programming_guide/atomic-operations.rst
@@ -21,21 +21,6 @@ Example usage of atomic operations
     The ``numba_dpex.atomic.add`` function is analogous to The
     ``numba.cuda.atomic.add`` provided by the ``numba.cuda`` backend.
 
-Generating Native FP Atomics
-----------------------------
-Numba-dpex supports generating native floating-point atomics.
-This feature is experimental. Users will need to provide
-the following environment variables to activate it.
-
-    NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
-
-Example command:
-
-.. code-block:: bash
-
-    NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 \
-    python program.py
-
 Full examples
 -------------
 
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index cdd6fc06da..da928df207 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -19,7 +19,7 @@ to get the latest production releases.
 .. code-block:: bash
 
     conda create -n numba-dpex-env                                             \
-        numba-dpex dpnp dpctl dpcpp-llvm-spirv spirv-tools                     \
+        numba-dpex dpnp dpctl dpcpp-llvm-spirv                     \
         -c intel -c conda-forge
 
 To try out the bleeding edge, the latest packages built from tip of the main
@@ -28,7 +28,7 @@ source trunk can be installed from the ``dppy/label/dev`` conda channel.
 .. code-block:: bash
 
     conda create -n numba-dpex-env                                             \
-        numba-dpex dpnp dpctl dpcpp-llvm-spirv spirv-tools                     \
+        numba-dpex dpnp dpctl dpcpp-llvm-spirv                     \
         -c dppy/label/dev -c intel -c conda-forge
 
 
@@ -70,7 +70,7 @@ first step.
 
     # Create a conda environment that hass needed dependencies installed
     conda create -n numba-dpex-env                                             \
-        dpctl dpnp numba spirv-tools dpcpp-llvm-spirv llvmdev pytest           \
+        dpctl dpnp numba dpcpp-llvm-spirv llvmdev pytest           \
         -c intel -c conda-forge
     # Activate the environment
     conda activate numba-dpex-env
diff --git a/docs/source/user_guide/kernel_programming/atomic-operations.rst b/docs/source/user_guide/kernel_programming/atomic-operations.rst
index 388e822ffc..07c2be4795 100644
--- a/docs/source/user_guide/kernel_programming/atomic-operations.rst
+++ b/docs/source/user_guide/kernel_programming/atomic-operations.rst
@@ -21,23 +21,6 @@ Example usage of atomic operations
     The ``numba_dpex.atomic.add`` function is analogous to The
     ``numba.cuda.atomic.add`` provided by the ``numba.cuda`` backend.
 
-Generating Native FP Atomics
-----------------------------
-Numba-dpex supports generating native floating-point atomics.
-This feature is experimental. Users will need to provide
-the following environment variables to activate it.
-
-    NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1
-    NUMBA_DPEX_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv
-
-Example command:
-
-.. code-block:: bash
-
-    NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE=1 \
-    NUMBA_DPEX_LLVM_SPIRV_ROOT=/path/to/dpcpp/provided/llvm_spirv \
-    python program.py
-
 Full examples
 -------------