Remove atomic emulation

ZzEeKkAa · ZzEeKkAa · commit e8253e282cac · 2023-07-26T15:08:46.000-04:00
diff --git a/numba_dpex/config.py b/numba_dpex/config.py
@@ -53,9 +53,6 @@ def __getattr__(name):
 # Dump offload diagnostics
 OFFLOAD_DIAGNOSTICS = _readenv("NUMBA_DPEX_OFFLOAD_DIAGNOSTICS", int, 0)
 
-# Activate Native floating point atomcis support for supported devices.
-# Requires llvm-spirv supporting the FP atomics extension
-NATIVE_FP_ATOMICS = _readenv("NUMBA_DPEX_ACTIVATE_ATOMICS_FP_NATIVE", int, 0)
 # Emit debug info
 DEBUG = _readenv("NUMBA_DPEX_DEBUG", int, config.DEBUG)
 DEBUGINFO_DEFAULT = _readenv(
diff --git a/numba_dpex/ocl/oclimpl.py b/numba_dpex/ocl/oclimpl.py
@@ -143,63 +143,6 @@ def sub_group_barrier_impl(context, builder, sig, args):
     return _void_value
 
 
-def insert_and_call_atomic_fn(
-    context, builder, sig, fn_type, dtype, ptr, val, addrspace
-):
-    ll_p = None
-    name = ""
-    if dtype.name == "float32":
-        ll_val = llvmir.FloatType()
-        ll_p = ll_val.as_pointer()
-        if fn_type == "add":
-            name = "numba_dpex_atomic_add_f32"
-        elif fn_type == "sub":
-            name = "numba_dpex_atomic_sub_f32"
-        else:
-            raise TypeError("Operation type is not supported %s" % (fn_type))
-    elif dtype.name == "float64":
-        if True:
-            ll_val = llvmir.DoubleType()
-            ll_p = ll_val.as_pointer()
-            if fn_type == "add":
-                name = "numba_dpex_atomic_add_f64"
-            elif fn_type == "sub":
-                name = "numba_dpex_atomic_sub_f64"
-            else:
-                raise TypeError(
-                    "Operation type is not supported %s" % (fn_type)
-                )
-    else:
-        raise TypeError(
-            "Atomic operation is not supported for type %s" % (dtype.name)
-        )
-
-    if addrspace == address_space.LOCAL:
-        name = name + "_local"
-    else:
-        name = name + "_global"
-
-    assert ll_p is not None
-    assert name != ""
-    ll_p.addrspace = address_space.GENERIC
-
-    mod = builder.module
-    if sig.return_type == types.void:
-        llretty = llvmir.VoidType()
-    else:
-        llretty = context.get_value_type(sig.return_type)
-
-    llargs = [ll_p, context.get_value_type(sig.args[2])]
-    fnty = llvmir.FunctionType(llretty, llargs)
-
-    fn = cgutils.get_or_insert_function(mod, fnty, name)
-    fn.calling_convention = kernel_target.CC_SPIR_FUNC
-
-    generic_ptr = context.addrspacecast(builder, ptr, address_space.GENERIC)
-
-    return builder.call(fn, [generic_ptr, val])
-
-
 def native_atomic_add(context, builder, sig, args):
     aryty, indty, valty = sig.args
     ary, inds, val = args
@@ -286,20 +229,15 @@ def native_atomic_add(context, builder, sig, args):
 @lower(stubs.atomic.add, types.Array, types.UniTuple, types.Any)
 @lower(stubs.atomic.add, types.Array, types.Tuple, types.Any)
 def atomic_add_tuple(context, builder, sig, args):
-    device_type = dpctl.get_current_queue().sycl_device.device_type
     dtype = sig.args[0].dtype
 
-    if dtype == types.float32 or dtype == types.float64:
-        if (
-            device_type == dpctl.device_type.gpu
-            and config.NATIVE_FP_ATOMICS == 1
-        ):
-            return native_atomic_add(context, builder, sig, args)
-        else:
-            # Currently, DPCPP only supports native floating point
-            # atomics for GPUs.
-            return atomic_add(context, builder, sig, args, "add")
-    elif dtype == types.int32 or dtype == types.int64:
+    # TODO: do we need this check, or should we just use native_atomic_add for everything?
+    if (
+        dtype == types.float32
+        or dtype == types.float64
+        or dtype == types.int32
+        or dtype == types.int64
+    ):
         return native_atomic_add(context, builder, sig, args)
     else:
         raise TypeError("Atomic operation on unsupported type %s" % dtype)
@@ -337,83 +275,19 @@ def atomic_sub_wrapper(context, builder, sig, args):
 @lower(stubs.atomic.sub, types.Array, types.UniTuple, types.Any)
 @lower(stubs.atomic.sub, types.Array, types.Tuple, types.Any)
 def atomic_sub_tuple(context, builder, sig, args):
-    device_type = dpctl.get_current_queue().sycl_device.device_type
     dtype = sig.args[0].dtype
 
-    if dtype == types.float32 or dtype == types.float64:
-        if (
-            device_type == dpctl.device_type.gpu
-            and config.NATIVE_FP_ATOMICS == 1
-        ):
-            return atomic_sub_wrapper(context, builder, sig, args)
-        else:
-            # Currently, DPCPP only supports native floating point
-            # atomics for GPUs.
-            return atomic_add(context, builder, sig, args, "sub")
-    elif dtype == types.int32 or dtype == types.int64:
+    if (
+        dtype == types.float32
+        or dtype == types.float64
+        or dtype == types.int32
+        or dtype == types.int64
+    ):
         return atomic_sub_wrapper(context, builder, sig, args)
     else:
         raise TypeError("Atomic operation on unsupported type %s" % dtype)
 
 
-def atomic_add(context, builder, sig, args, name):
-    from .atomics import atomic_support_present
-
-    if atomic_support_present():
-        context.extra_compile_options[kernel_target.LINK_ATOMIC] = True
-        aryty, indty, valty = sig.args
-        ary, inds, val = args
-        dtype = aryty.dtype
-
-        if indty == types.intp:
-            indices = [inds]  # just a single integer
-            indty = [indty]
-        else:
-            indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
-            indices = [
-                context.cast(builder, i, t, types.intp)
-                for t, i in zip(indty, indices)
-            ]
-
-        if dtype != valty:
-            raise TypeError("expecting %s but got %s" % (dtype, valty))
-
-        if aryty.ndim != len(indty):
-            raise TypeError(
-                "indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))
-            )
-
-        lary = context.make_array(aryty)(context, builder, ary)
-        ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)
-
-        if isinstance(aryty, Array) and aryty.addrspace == address_space.LOCAL:
-            return insert_and_call_atomic_fn(
-                context,
-                builder,
-                sig,
-                name,
-                dtype,
-                ptr,
-                val,
-                address_space.LOCAL,
-            )
-        else:
-            return insert_and_call_atomic_fn(
-                context,
-                builder,
-                sig,
-                name,
-                dtype,
-                ptr,
-                val,
-                address_space.GLOBAL,
-            )
-    else:
-        raise ImportError(
-            "Atomic support is not present, can not perform atomic_add"
-        )
-
-
 @lower(stubs.private.array, types.IntegerLiteral, types.Any)
 def dpex_private_array_integer(context, builder, sig, args):
     length = sig.args[0].literal_value
diff --git a/numba_dpex/spirv_generator.py b/numba_dpex/spirv_generator.py
@@ -5,7 +5,6 @@
 """A wrapper to connect to the SPIR-V binaries (Tools, Translator)."""
 
 import os
-import shutil
 import tempfile
 from subprocess import CalledProcessError, check_call
 
@@ -75,14 +74,6 @@ def generate(self, llvm_spirv_args, ipath, opath):
         if config.DEBUG:
             llvm_spirv_flags.append("--spirv-debug-info-version=ocl-100")
 
-        if not config.NATIVE_FP_ATOMICS:
-            # Do NOT upgrade version unless you are 100% confident. Not all
-            # kernel outputs can be converted to higher version of spirv.
-            # That results in different spirv file versions. As next step
-            # requires linking of the result file and
-            # numba_dpex/ocl/atomics/atomic_ops.spir it will raise an error
-            # that two spirv files have different version and can't be linked
-            llvm_spirv_args = ["--spirv-max-version", "1.0"] + llvm_spirv_args
         llvm_spirv_tool = self._llvm_spirv()
 
         if config.DEBUG:
@@ -162,15 +153,9 @@ def finalize(self):
         # Generate SPIR-V from "friendly" LLVM-based SPIR 2.0
         spirv_path = self._track_temp_file("generated-spirv")
 
-        binary_paths = [spirv_path]
-
         llvm_spirv_args = []
         for key in list(self.context.extra_compile_options.keys()):
-            if key == LINK_ATOMIC:
-                from .ocl.atomics import get_atomic_spirv_path
-
-                binary_paths.append(get_atomic_spirv_path())
-            elif key == LLVM_SPIRV_ARGS:
+            if key == LLVM_SPIRV_ARGS:
                 llvm_spirv_args = self.context.extra_compile_options[key]
             del self.context.extra_compile_options[key]
 
@@ -194,10 +179,6 @@ def finalize(self):
             opath=spirv_path,
         )
 
-        if len(binary_paths) > 1:
-            spirv_path = self._track_temp_file("linked-spirv")
-            self._cmd.link(spirv_path, binary_paths)
-
         if config.SAVE_IR_FILES != 0:
             # Dump the llvmir and llvmbc in file
             with open("generated_spirv.spir", "wb") as f1:
diff --git a/numba_dpex/tests/kernel_tests/test_atomic_op.py b/numba_dpex/tests/kernel_tests/test_atomic_op.py
@@ -160,21 +160,11 @@ def test_kernel_atomic_multi_dim(
     assert a[0] == expected
 
 
-skip_NATIVE_FP_ATOMICS_0 = pytest.mark.skipif(
-    not config.NATIVE_FP_ATOMICS, reason="Native FP atomics disabled"
-)
-
-
-def skip_if_disabled(*args):
-    return pytest.param(*args, marks=skip_NATIVE_FP_ATOMICS_0)
-
-
 @skip_no_atomic_support
 @pytest.mark.parametrize(
-    "NATIVE_FP_ATOMICS, expected_native_atomic_for_device",
+    "expected_native_atomic_for_device",
     [
-        skip_if_disabled(1, lambda device: device != "opencl:cpu:0"),
-        (0, lambda device: False),
+        lambda device: True,
     ],
 )
 @pytest.mark.parametrize(
@@ -189,7 +179,6 @@ def skip_if_disabled(*args):
 )
 @pytest.mark.parametrize("dtype", list_of_f_dtypes)
 def test_atomic_fp_native(
-    NATIVE_FP_ATOMICS,
     expected_native_atomic_for_device,
     function_generator,
     operator_name,
@@ -206,16 +195,15 @@ def test_atomic_fp_native(
         for arg in args
     ]
 
-    with override_config("NATIVE_FP_ATOMICS", NATIVE_FP_ATOMICS):
-        kernel.compile(
-            args=argtypes,
-            debug=False,
-            compile_flags=None,
-            target_ctx=dpex_kernel_target.target_context,
-            typing_ctx=dpex_kernel_target.typing_context,
-        )
-
-        is_native_atomic = expected_spirv_function in kernel._llvm_module
-        assert is_native_atomic == expected_native_atomic_for_device(
-            dpctl.select_default_device().filter_string
-        )
+    kernel.compile(
+        args=argtypes,
+        debug=False,
+        compile_flags=None,
+        target_ctx=dpex_kernel_target.target_context,
+        typing_ctx=dpex_kernel_target.typing_context,
+    )
+
+    is_native_atomic = expected_spirv_function in kernel._llvm_module
+    assert is_native_atomic == expected_native_atomic_for_device(
+        dpctl.select_default_device().filter_string
+    )