diff --git a/sycl/doc/extensions/README.md b/sycl/doc/extensions/README.md index 967d6a6e00b84..d9d107cb7a6f6 100644 --- a/sycl/doc/extensions/README.md +++ b/sycl/doc/extensions/README.md @@ -13,7 +13,7 @@ DPC++ extensions status: | [SYCL_INTEL_deduction_guides](deduction_guides/SYCL_INTEL_deduction_guides.asciidoc) | Supported | | | [SYCL_INTEL_device_specific_kernel_queries](DeviceSpecificKernelQueries/SYCL_INTEL_device_specific_kernel_queries.asciidoc) | Proposal | | | [SYCL_INTEL_enqueue_barrier](EnqueueBarrier/enqueue_barrier.asciidoc) | Supported(OpenCL, Level Zero) | | -| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Proposal | | +| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Partially supported(OpenCL: CPU, GPU) | Not supported: pointer types | | [SYCL_INTEL_group_algorithms](GroupAlgorithms/SYCL_INTEL_group_algorithms.asciidoc) | Supported(OpenCL) | | | [SYCL_INTEL_group_mask](./GroupMask/SYCL_INTEL_group_mask.asciidoc) | Proposal | | | [FPGA selector](IntelFPGA/FPGASelector.md) | Supported | | diff --git a/sycl/include/CL/__spirv/spirv_ops.hpp b/sycl/include/CL/__spirv/spirv_ops.hpp index aa7696d45e321..28482dca289cc 100644 --- a/sycl/include/CL/__spirv/spirv_ops.hpp +++ b/sycl/include/CL/__spirv/spirv_ops.hpp @@ -155,6 +155,7 @@ extern SYCL_EXTERNAL TempRetT __spirv_ImageSampleExplicitLod(SampledType, macro(__attribute__((opencl_local)), Arg) __SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, float) +__SPIRV_ATOMICS(__SPIRV_ATOMIC_FLOAT, double) __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, int) __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long) __SPIRV_ATOMICS(__SPIRV_ATOMIC_SIGNED, long long) diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 06053b487d0fd..e537bf6e7d979 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/include/CL/sycl/detail/defines.hpp b/sycl/include/CL/sycl/detail/defines.hpp index 7c4381fae189e..59aa54247028d 100644 --- a/sycl/include/CL/sycl/detail/defines.hpp +++ b/sycl/include/CL/sycl/detail/defines.hpp @@ -46,3 +46,10 @@ #warning "No assumptions will be emitted due to no __builtin_assume available" #endif #endif + +// inline constexpr is a C++17 feature +#if __cplusplus >= 201703L +#define __SYCL_INLINE_CONSTEXPR inline constexpr +#else +#define __SYCL_INLINE_CONSTEXPR static constexpr +#endif diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index e3c783c7bf65d..a95e81f5506ac 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef __SYCL_DEVICE_ONLY__ __SYCL_INLINE_NAMESPACE(cl) { @@ -28,7 +29,7 @@ template struct group_scope> { static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup; }; -template <> struct group_scope { +template <> struct group_scope<::cl::sycl::intel::sub_group> { static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup; }; @@ -69,6 +70,226 @@ T GroupBroadcast(T x, id local_id) { return __spirv_GroupBroadcast(group_scope::value, ocl_x, ocl_id); } +// Single happens-before means semantics should always apply to all spaces +// Although consume is unsupported, forwarding to acquire is valid +static inline constexpr __spv::MemorySemanticsMask::Flag +getMemorySemanticsMask(intel::memory_order Order) { + __spv::MemorySemanticsMask::Flag SpvOrder = __spv::MemorySemanticsMask::None; + switch (Order) { + case intel::memory_order::relaxed: + SpvOrder = __spv::MemorySemanticsMask::None; + break; + case intel::memory_order::__consume_unsupported: + case intel::memory_order::acquire: + SpvOrder = __spv::MemorySemanticsMask::Acquire; + break; + case intel::memory_order::release: + SpvOrder = __spv::MemorySemanticsMask::Release; + break; + case intel::memory_order::acq_rel: + SpvOrder = __spv::MemorySemanticsMask::AcquireRelease; + break; + case intel::memory_order::seq_cst: + SpvOrder = __spv::MemorySemanticsMask::SequentiallyConsistent; + break; + } + return static_cast<__spv::MemorySemanticsMask::Flag>( + SpvOrder | __spv::MemorySemanticsMask::SubgroupMemory | + __spv::MemorySemanticsMask::WorkgroupMemory | + __spv::MemorySemanticsMask::CrossWorkgroupMemory); +} + +static inline constexpr __spv::Scope::Flag getScope(intel::memory_scope Scope) { + switch (Scope) { + case intel::memory_scope::work_item: + return __spv::Scope::Invocation; + case intel::memory_scope::sub_group: + return __spv::Scope::Subgroup; + case intel::memory_scope::work_group: + return __spv::Scope::Workgroup; + case intel::memory_scope::device: + return __spv::Scope::Device; + case intel::memory_scope::system: + return __spv::Scope::CrossDevice; + } +} + +template +inline typename detail::enable_if_t::value, T> +AtomicCompareExchange(multi_ptr MPtr, + intel::memory_scope Scope, intel::memory_order Success, + intel::memory_order Failure, T Desired, T Expected) { + auto SPIRVSuccess = getMemorySemanticsMask(Success); + auto SPIRVFailure = getMemorySemanticsMask(Failure); + auto SPIRVScope = getScope(Scope); + auto *Ptr = MPtr.get(); + return __spirv_AtomicCompareExchange(Ptr, SPIRVScope, SPIRVSuccess, + SPIRVFailure, Desired, Expected); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicCompareExchange(multi_ptr MPtr, + intel::memory_scope Scope, intel::memory_order Success, + intel::memory_order Failure, T Desired, T Expected) { + using I = detail::make_unsinged_integer_t; + auto SPIRVSuccess = getMemorySemanticsMask(Success); + auto SPIRVFailure = getMemorySemanticsMask(Failure); + auto SPIRVScope = getScope(Scope); + auto *PtrInt = + reinterpret_cast::pointer_t>( + MPtr.get()); + I DesiredInt = detail::bit_cast(Desired); + I ExpectedInt = detail::bit_cast(Expected); + I ResultInt = __spirv_AtomicCompareExchange( + PtrInt, SPIRVScope, SPIRVSuccess, SPIRVFailure, DesiredInt, ExpectedInt); + return detail::bit_cast(ResultInt); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicLoad(Ptr, SPIRVScope, SPIRVOrder); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicLoad(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order) { + using I = detail::make_unsinged_integer_t; + auto *PtrInt = + reinterpret_cast::pointer_t>( + MPtr.get()); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + I ResultInt = __spirv_AtomicLoad(PtrInt, SPIRVScope, SPIRVOrder); + return detail::bit_cast(ResultInt); +} + +template +inline typename detail::enable_if_t::value> +AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + __spirv_AtomicStore(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value> +AtomicStore(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + using I = detail::make_unsinged_integer_t; + auto *PtrInt = + reinterpret_cast::pointer_t>( + MPtr.get()); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + I ValueInt = detail::bit_cast(Value); + __spirv_AtomicStore(PtrInt, SPIRVScope, SPIRVOrder, ValueInt); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicExchange(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicExchange(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + using I = detail::make_unsinged_integer_t; + auto *PtrInt = + reinterpret_cast::pointer_t>( + MPtr.get()); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + I ValueInt = detail::bit_cast(Value); + I ResultInt = + __spirv_AtomicExchange(PtrInt, SPIRVScope, SPIRVOrder, ValueInt); + return detail::bit_cast(ResultInt); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicIAdd(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicIAdd(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicISub(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicISub(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicAnd(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicAnd(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicOr(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicOr(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicXor(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicXor(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicMin(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicMin(Ptr, SPIRVScope, SPIRVOrder, Value); +} + +template +inline typename detail::enable_if_t::value, T> +AtomicMax(multi_ptr MPtr, intel::memory_scope Scope, + intel::memory_order Order, T Value) { + auto *Ptr = MPtr.get(); + auto SPIRVOrder = getMemorySemanticsMask(Order); + auto SPIRVScope = getScope(Scope); + return __spirv_AtomicMax(Ptr, SPIRVScope, SPIRVOrder, Value); +} + } // namespace spirv } // namespace detail } // namespace sycl diff --git a/sycl/include/CL/sycl/intel/atomic.hpp b/sycl/include/CL/sycl/intel/atomic.hpp new file mode 100644 index 0000000000000..bbc49ecc210d9 --- /dev/null +++ b/sycl/include/CL/sycl/intel/atomic.hpp @@ -0,0 +1,13 @@ +//==---------------- atomic.hpp - SYCL_INTEL_extended_atomics --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include diff --git a/sycl/include/CL/sycl/intel/atomic_enums.hpp b/sycl/include/CL/sycl/intel/atomic_enums.hpp new file mode 100644 index 0000000000000..a85c9902cd524 --- /dev/null +++ b/sycl/include/CL/sycl/intel/atomic_enums.hpp @@ -0,0 +1,103 @@ +//==---------------- atomic_enums.hpp - SYCL_INTEL_extended_atomics enums --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include + +#ifndef __SYCL_DEVICE_ONLY__ +#include +#endif +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace intel { + +enum class memory_order : int { + relaxed, + acquire, + __consume_unsupported, // helps optimizer when mapping to std::memory_order + release, + acq_rel, + seq_cst +}; +__SYCL_INLINE_CONSTEXPR memory_order memory_order_relaxed = + memory_order::relaxed; +__SYCL_INLINE_CONSTEXPR memory_order memory_order_acquire = + memory_order::acquire; +__SYCL_INLINE_CONSTEXPR memory_order memory_order_release = + memory_order::release; +__SYCL_INLINE_CONSTEXPR memory_order memory_order_acq_rel = + memory_order::acq_rel; +__SYCL_INLINE_CONSTEXPR memory_order memory_order_seq_cst = + memory_order::seq_cst; + +enum class memory_scope : int { + work_item, + sub_group, + work_group, + device, + system +}; +__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_work_item = + memory_scope::work_item; +__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_sub_group = + memory_scope::sub_group; +__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_work_group = + memory_scope::work_group; +__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_device = memory_scope::device; +__SYCL_INLINE_CONSTEXPR memory_scope memory_scope_system = memory_scope::system; + +#ifndef __SYCL_DEVICE_ONLY__ +namespace detail { +// Cannot use switch statement in constexpr before C++14 +// Nested ternary conditions in else branch required for C++11 +#if __cplusplus >= 201402L +static inline constexpr std::memory_order +getStdMemoryOrder(::cl::sycl::intel::memory_order order) { + switch (order) { + case memory_order::relaxed: + return std::memory_order_relaxed; + case memory_order::__consume_unsupported: + return std::memory_order_consume; + case memory_order::acquire: + return std::memory_order_acquire; + case memory_order::release: + return std::memory_order_release; + case memory_order::acq_rel: + return std::memory_order_acq_rel; + case memory_order::seq_cst: + return std::memory_order_seq_cst; + } +} +#else +static inline constexpr std::memory_order +getStdMemoryOrder(::cl::sycl::intel::memory_order order) { + return (order == memory_order::relaxed) + ? std::memory_order_relaxed + : (order == memory_order::__consume_unsupported) + ? std::memory_order_consume + : (order == memory_order::acquire) + ? std::memory_order_acquire + : (order == memory_order::release) + ? std::memory_order_release + : (order == memory_order::acq_rel) + ? std::memory_order_acq_rel + : std::memory_order_seq_cst; +} +#endif // __cplusplus +} // namespace detail +#endif // __SYCL_DEVICE_ONLY__ + +} // namespace intel +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic_fence.hpp b/sycl/include/CL/sycl/intel/atomic_fence.hpp new file mode 100644 index 0000000000000..aba95c060b878 --- /dev/null +++ b/sycl/include/CL/sycl/intel/atomic_fence.hpp @@ -0,0 +1,40 @@ +//==----- atomic_fence.hpp - SYCL_INTEL_extended_atomics atomic_fence ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#ifndef __SYCL_DEVICE_ONLY__ +#include +#endif + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace intel { +namespace detail { +using namespace cl::sycl::detail; +} + +static inline void atomic_fence(memory_order order, memory_scope scope) { +#ifdef __SYCL_DEVICE_ONLY__ + auto SPIRVOrder = detail::spirv::getMemorySemanticsMask(order); + auto SPIRVScope = detail::spirv::getScope(scope); + __spirv_MemoryBarrier(SPIRVScope, static_cast(SPIRVOrder)); +#else + (void)scope; + auto StdOrder = detail::getStdMemoryOrder(order); + atomic_thread_fence(StdOrder); +#endif +} + +} // namespace intel +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/intel/atomic_ref.hpp b/sycl/include/CL/sycl/intel/atomic_ref.hpp new file mode 100644 index 0000000000000..89202cb7ca08c --- /dev/null +++ b/sycl/include/CL/sycl/intel/atomic_ref.hpp @@ -0,0 +1,531 @@ +//==----- atomic_ref.hpp - SYCL_INTEL_extended_atomics atomic_ref ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifndef __SYCL_DEVICE_ONLY__ +#include +#endif +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +// Forward declaration +template +class multi_ptr; + +namespace intel { +namespace detail { + +// Import from detail:: into intel::detail:: to improve readability later +using namespace ::cl::sycl::detail; + +using memory_order = cl::sycl::intel::memory_order; +using memory_scope = cl::sycl::intel::memory_scope; + +template +using IsValidAtomicType = + bool_constant::value || std::is_pointer::value>; + +template +using IsValidAtomicAddressSpace = + bool_constant; + +// DefaultOrder parameter is limited to read-modify-write orders +template +using IsValidDefaultOrder = bool_constant; + +template struct memory_order_traits; + +template <> struct memory_order_traits { + static constexpr memory_order read_order = memory_order::relaxed; + static constexpr memory_order write_order = memory_order::relaxed; +}; + +template <> struct memory_order_traits { + static constexpr memory_order read_order = memory_order::acquire; + static constexpr memory_order write_order = memory_order::release; +}; + +template <> struct memory_order_traits { + static constexpr memory_order read_order = memory_order::seq_cst; + static constexpr memory_order write_order = memory_order::seq_cst; +}; + +// Cannot use switch statement in constexpr before C++14 +// Nested ternary conditions in else branch required for C++11 +#if __cplusplus >= 201402L +inline constexpr memory_order getLoadOrder(memory_order order) { + switch (order) { + case memory_order_relaxed: + return memory_order_relaxed; + + case memory_order_acquire: + case memory_order::__consume_unsupported: + case memory_order_acq_rel: + case memory_order_release: + return memory_order_acquire; + + case memory_order_seq_cst: + return memory_order_seq_cst; + } +} +#else +inline constexpr memory_order getLoadOrder(memory_order order) { + return (order == memory_order_relaxed) + ? memory_order_relaxed + : (order == memory_order_seq_cst) ? memory_order_seq_cst + : memory_order_acquire; +} +#endif + +template struct bit_equal; + +template +struct bit_equal::value>> { + bool operator()(const T &lhs, const T &rhs) { return lhs == rhs; } +}; + +template <> struct bit_equal { + bool operator()(const float &lhs, const float &rhs) { + auto LhsInt = detail::bit_cast(lhs); + auto RhsInt = detail::bit_cast(rhs); + return LhsInt == RhsInt; + } +}; + +template <> struct bit_equal { + bool operator()(const double &lhs, const double &rhs) { + auto LhsInt = detail::bit_cast(lhs); + auto RhsInt = detail::bit_cast(rhs); + return LhsInt == RhsInt; + } +}; + +// Functionality for any atomic of type T, reused by partial specializations +template +class atomic_ref_base { + static_assert( + detail::IsValidAtomicType::value, + "Invalid atomic type. Valid types are arithmetic and pointer types"); + static_assert(!std::is_same::value, + "intel::atomic_ref does not support bool type"); + static_assert(!(std::is_same::value || + std::is_same::value || + std::is_same::value), + "intel::atomic_ref does not support char type"); + static_assert(!(std::is_same::value || + std::is_same::value), + "intel::atomic_ref does not support short type"); + static_assert(!std::is_pointer::value, + "intel::atomic_ref does not yet support pointer types"); + static_assert(detail::IsValidAtomicAddressSpace::value, + "Invalid atomic address_space. Valid address spaces are: " + "global_space, local_space"); + static_assert( + detail::IsValidDefaultOrder::value, + "Invalid default memory_order for atomics. Valid defaults are: " + "relaxed, acq_rel, seq_cst"); + +public: + using value_type = T; + static constexpr size_t required_alignment = sizeof(T); + static constexpr bool is_always_lock_free = + detail::IsValidAtomicType::value; + static constexpr memory_order default_read_order = + detail::memory_order_traits::read_order; + static constexpr memory_order default_write_order = + detail::memory_order_traits::write_order; + static constexpr memory_order default_read_modify_write_order = DefaultOrder; + static constexpr memory_scope default_scope = DefaultScope; + + bool is_lock_free() const noexcept { + return detail::IsValidAtomicType::value; + } + +#ifdef __SYCL_DEVICE_ONLY__ + explicit atomic_ref_base(T &ref) : ptr(multi_ptr(&ref)) {} +#else + // FIXME: This reinterpret_cast is UB, but happens to work for now + explicit atomic_ref_base(T &ref) + : ptr(reinterpret_cast *>(&ref)) {} +#endif + // Our implementation of copy constructor could be trivial + // Defined this way for consistency with standard atomic_ref + atomic_ref_base(const atomic_ref_base &ref) noexcept { ptr = ref.ptr; }; + atomic_ref_base &operator=(const atomic_ref_base &) = delete; + + void store(T operand, memory_order order = default_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + detail::spirv::AtomicStore(ptr, scope, order, operand); +#else + (void)scope; + ptr->store(operand, detail::getStdMemoryOrder(order)); +#endif + } + + T operator=(T desired) const noexcept { + store(desired); + return desired; + } + + T load(memory_order order = default_read_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicLoad(ptr, scope, order); +#else + (void)scope; + return ptr->load(detail::getStdMemoryOrder(order)); +#endif + } + + operator T() const noexcept { return load(); } + + T exchange(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicExchange(ptr, scope, order, operand); +#else + (void)scope; + return ptr->exchange(operand, detail::getStdMemoryOrder(order)); +#endif + } + + bool compare_exchange_strong(T &expected, T desired, memory_order success, + memory_order failure, + memory_scope scope = default_scope) const + noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + T value = detail::spirv::AtomicCompareExchange(ptr, scope, success, failure, + desired, expected); + bool succeeded = detail::bit_equal()(value, expected); + if (!succeeded) { + expected = value; + } + return succeeded; +#else + (void)scope; + return ptr->compare_exchange_strong(expected, desired, + detail::getStdMemoryOrder(success), + detail::getStdMemoryOrder(failure)); +#endif + } + + bool + compare_exchange_strong(T &expected, T desired, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return compare_exchange_strong(expected, desired, order, order, scope); + } + + bool compare_exchange_weak(T &expected, T desired, memory_order success, + memory_order failure, + memory_scope scope = default_scope) const + noexcept { + // SPIR-V AtomicCompareExchangeWeak is deprecated and equivalent to + // AtomicCompareExchange. For now, use AtomicCompareExchange on device and + // compare_exchange_weak on host +#ifdef __SYCL_DEVICE_ONLY__ + return compare_exchange_strong(expected, desired, success, failure, scope); +#else + (void)scope; + return ptr->compare_exchange_weak(expected, desired, + detail::getStdMemoryOrder(success), + detail::getStdMemoryOrder(failure)); +#endif + } + + bool + compare_exchange_weak(T &expected, T desired, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return compare_exchange_weak(expected, desired, order, order, scope); + } + +protected: +#ifdef __SYCL_DEVICE_ONLY__ + multi_ptr ptr; +#else + std::atomic *ptr; +#endif +}; + +// Hook allowing partial specializations to inherit atomic_ref_base +template +class atomic_ref_impl + : public atomic_ref_base { +public: + using atomic_ref_base::atomic_ref_base; +}; + +// Partial specialization for integral types +template +class atomic_ref_impl::value>> + : public atomic_ref_base { + +public: + using value_type = T; + using difference_type = value_type; + static constexpr size_t required_alignment = sizeof(T); + static constexpr bool is_always_lock_free = + detail::IsValidAtomicType::value; + static constexpr memory_order default_read_order = + detail::memory_order_traits::read_order; + static constexpr memory_order default_write_order = + detail::memory_order_traits::write_order; + static constexpr memory_order default_read_modify_write_order = DefaultOrder; + static constexpr memory_scope default_scope = DefaultScope; + + using atomic_ref_base::atomic_ref_base; + using atomic_ref_base::load; + using atomic_ref_base::compare_exchange_weak; + + T fetch_add(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicIAdd(ptr, scope, order, operand); +#else + (void)scope; + return ptr->fetch_add(operand, detail::getStdMemoryOrder(order)); +#endif + } + + T operator+=(T operand) const noexcept { + return fetch_add(operand) + operand; + } + + T operator++(int) const noexcept { + // TODO: use AtomicIIncrement as an optimization + return fetch_add(1); + } + + T operator++() const noexcept { + // TODO: use AtomicIIncrement as an optimization + return fetch_add(1) + 1; + } + + T fetch_sub(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicISub(ptr, scope, order, operand); +#else + (void)scope; + return ptr->fetch_sub(operand, detail::getStdMemoryOrder(order)); +#endif + } + + T operator-=(T operand) const noexcept { + return fetch_sub(operand) - operand; + } + + T operator--(int) const noexcept { + // TODO: use AtomicIDecrement as an optimization + return fetch_sub(1); + } + + T operator--() const noexcept { + // TODO: use AtomicIDecrement as an optimization + return fetch_sub(1) - 1; + } + + T fetch_and(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicAnd(ptr, scope, order, operand); +#else + (void)scope; + return ptr->fetch_and(operand, detail::getStdMemoryOrder(order)); +#endif + } + + T operator&=(T operand) const noexcept { + return fetch_and(operand) & operand; + } + + T fetch_or(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicOr(ptr, scope, order, operand); +#else + (void)scope; + return ptr->fetch_or(operand, detail::getStdMemoryOrder(order)); +#endif + } + + T operator|=(T operand) const noexcept { return fetch_or(operand) | operand; } + + T fetch_xor(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicXor(ptr, scope, order, operand); +#else + (void)scope; + return ptr->fetch_xor(operand, detail::getStdMemoryOrder(order)); +#endif + } + + T operator^=(T operand) const noexcept { + return fetch_xor(operand) ^ operand; + } + + T fetch_min(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicMin(ptr, scope, order, operand); +#else + auto load_order = detail::getLoadOrder(order); + T old = load(load_order, scope); + while (operand < old && + !compare_exchange_weak(old, operand, order, scope)) { + } + return old; +#endif + } + + T fetch_max(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { +#ifdef __SYCL_DEVICE_ONLY__ + return detail::spirv::AtomicMax(ptr, scope, order, operand); +#else + auto load_order = detail::getLoadOrder(order); + T old = load(load_order, scope); + while (operand > old && + !compare_exchange_weak(old, operand, order, scope)) { + } + return old; +#endif + } + +private: + using atomic_ref_base::ptr; +}; + +// Partial specialization for floating-point types +// TODO: Leverage floating-point SPIR-V atomics instead of emulation +template +class atomic_ref_impl< + T, DefaultOrder, DefaultScope, AddressSpace, + typename detail::enable_if_t::value>> + : public atomic_ref_base { + +public: + using value_type = T; + using difference_type = value_type; + static constexpr size_t required_alignment = sizeof(T); + static constexpr bool is_always_lock_free = + detail::IsValidAtomicType::value; + static constexpr memory_order default_read_order = + detail::memory_order_traits::read_order; + static constexpr memory_order default_write_order = + detail::memory_order_traits::write_order; + static constexpr memory_order default_read_modify_write_order = DefaultOrder; + static constexpr memory_scope default_scope = DefaultScope; + + using atomic_ref_base::atomic_ref_base; + using atomic_ref_base::load; + using atomic_ref_base::compare_exchange_weak; + + T fetch_add(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + auto load_order = detail::getLoadOrder(order); + T expected = load(load_order, scope); + T desired; + do { + desired = expected + operand; + } while (!compare_exchange_weak(expected, desired, order, scope)); + return expected; + } + + T operator+=(T operand) const noexcept { + return fetch_add(operand) + operand; + } + + T fetch_sub(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + auto load_order = detail::getLoadOrder(order); + T expected = load(load_order, scope); + T desired; + do { + desired = expected - operand; + } while (!compare_exchange_weak(expected, desired, order, scope)); + return expected; + } + + T operator-=(T operand) const noexcept { + return fetch_sub(operand) - operand; + } + + T fetch_min(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + auto load_order = detail::getLoadOrder(order); + T old = load(load_order, scope); + while (operand < old && + !compare_exchange_weak(old, operand, order, scope)) { + } + return old; + } + + T fetch_max(T operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + auto load_order = detail::getLoadOrder(order); + T old = load(load_order, scope); + while (operand > old && + !compare_exchange_weak(old, operand, order, scope)) { + } + return old; + } + +private: + using atomic_ref_base::ptr; +}; + +// Partial specialization for pointer types +template +class atomic_ref_impl::value>> + : public atomic_ref_base { + // TODO: Implement partial specialization for pointer types +}; + +} // namespace detail + +template +class atomic_ref : public detail::atomic_ref_impl { +public: + using detail::atomic_ref_impl::atomic_ref_impl; +}; + +} // namespace intel +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp new file mode 100644 index 0000000000000..b152166e4f966 --- /dev/null +++ b/sycl/test/atomic_ref/add.cpp @@ -0,0 +1,189 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +void add_fetch_test(queue q, size_t N) { + T sum = 0; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer sum_buf(&sum, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto sum = sum_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(sum[0]); + out[gid] = atm.fetch_add(T(1)); + }); + }); + } + + // All work-items increment by 1, so final value should be equal to N + assert(sum == N); + + // Fetch returns original value: will be in [0, N-1] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 0 && *max_e == N - 1); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void add_plus_equal_test(queue q, size_t N) { + T sum = 0; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer sum_buf(&sum, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto sum = sum_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(sum[0]); + out[gid] = atm += T(1); + }); + }); + } + + // All work-items increment by 1, so final value should be equal to N + assert(sum == N); + + // += returns updated value: will be in [1, N] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 1 && *max_e == N); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void add_pre_inc_test(queue q, size_t N) { + T sum = 0; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer sum_buf(&sum, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto sum = sum_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(sum[0]); + out[gid] = ++atm; + }); + }); + } + + // All work-items increment by 1, so final value should be equal to N + assert(sum == N); + + // Pre-increment returns updated value: will be in [1, N] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 1 && *max_e == N); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void add_post_inc_test(queue q, size_t N) { + T sum = 0; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer sum_buf(&sum, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto sum = sum_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(sum[0]); + out[gid] = atm++; + }); + }); + } + + // All work-items increment by 1, so final value should be equal to N + assert(sum == N); + + // Post-increment returns original value: will be in [0, N-1] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 0 && *max_e == N - 1); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void add_test(queue q, size_t N) { + add_fetch_test(q, N); + add_plus_equal_test(q, N); + add_pre_inc_test(q, N); + add_post_inc_test(q, N); +} + +// Floating-point types do not support pre- or post-increment +template <> +void add_test(queue q, size_t N) { + add_fetch_test(q, N); + add_plus_equal_test(q, N); +} +template <> +void add_test(queue q, size_t N) { + add_fetch_test(q, N); + add_plus_equal_test(q, N); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + add_test(q, N); + add_test(q, N); + add_test(q, N); + add_test(q, N); + add_test(q, N); + add_test(q, N); + add_test(q, N); + add_test(q, N); + //add_test(q, N); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp new file mode 100644 index 0000000000000..8f563fccb65fd --- /dev/null +++ b/sycl/test/atomic_ref/compare_exchange.cpp @@ -0,0 +1,75 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +class compare_exchange_kernel; + +template +void compare_exchange_test(queue q, size_t N) { + const T initial = std::numeric_limits::max(); + T compare_exchange = initial; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer compare_exchange_buf(&compare_exchange, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto exc = compare_exchange_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for>(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(exc[0]); + T result = initial; + bool success = atm.compare_exchange_strong(result, (T)gid); + if (success) { + out[gid] = result; + } else { + out[gid] = gid; + } + }); + }); + } + + // Only one work-item should have received the initial sentinel value + assert(std::count(output.begin(), output.end(), initial) == 1); + + // All other values should be the index itself or the sentinel value + for (int i = 0; i < N; ++i) { + assert(output[i] == T(i) || output[i] == initial); + } +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + compare_exchange_test(q, N); + compare_exchange_test(q, N); + compare_exchange_test(q, N); + compare_exchange_test(q, N); + compare_exchange_test(q, N); + compare_exchange_test(q, N); + compare_exchange_test(q, N); + compare_exchange_test(q, N); + //compare_exchange_test(q, N); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp new file mode 100644 index 0000000000000..2ce1292cfdd55 --- /dev/null +++ b/sycl/test/atomic_ref/exchange.cpp @@ -0,0 +1,68 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +class exchange_kernel; + +template +void exchange_test(queue q, size_t N) { + const T initial = std::numeric_limits::max(); + T exchange = initial; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer exchange_buf(&exchange, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto exc = exchange_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for>(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(exc[0]); + out[gid] = atm.exchange(gid); + }); + }); + } + + // Only one work-item should have received the initial sentinel value + assert(std::count(output.begin(), output.end(), initial) == 1); + + // All other values should be unique; each work-item replaces the value it reads with its own ID + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + exchange_test(q, N); + exchange_test(q, N); + exchange_test(q, N); + exchange_test(q, N); + exchange_test(q, N); + exchange_test(q, N); + exchange_test(q, N); + exchange_test(q, N); + //exchange_test(q, N); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp new file mode 100644 index 0000000000000..274191b9a5ac3 --- /dev/null +++ b/sycl/test/atomic_ref/load.cpp @@ -0,0 +1,65 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +class load_kernel; + +template +void load_test(queue q, size_t N) { + T initial = 42; + T load = initial; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer load_buf(&load, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto ld = load_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for>(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(ld[0]); + out[gid] = atm.load(); + }); + }); + } + + // All work-items should read the same value + // Atomicity isn't tested here, but support for load() is + assert(std::all_of(output.begin(), output.end(), [&](T x) { return (x == initial); })); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + load_test(q, N); + load_test(q, N); + load_test(q, N); + load_test(q, N); + load_test(q, N); + load_test(q, N); + load_test(q, N); + load_test(q, N); + //load_test(q, N); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp new file mode 100644 index 0000000000000..38bb755fb5132 --- /dev/null +++ b/sycl/test/atomic_ref/max.cpp @@ -0,0 +1,74 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// XFAIL: cuda + +#include +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +void max_test(queue q, size_t N) { + T initial = std::numeric_limits::lowest(); + T val = initial; + std::vector output(N); + std::fill(output.begin(), output.end(), std::numeric_limits::max()); + { + buffer val_buf(&val, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto val = val_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(val[0]); + + // +1 accounts for lowest() returning 0 for unsigned types + out[gid] = atm.fetch_max(T(gid) + 1); + }); + }); + } + + // Final value should be equal to N + assert(val == N); + + // Only one work-item should have received the initial value + assert(std::count(output.begin(), output.end(), initial) == 1); + + // fetch_max returns original value + // Intermediate values should all be >= initial value + for (int i = 0; i < N; ++i) { + assert(output[i] >= initial); + } +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + max_test(q, N); + max_test(q, N); + max_test(q, N); + max_test(q, N); + max_test(q, N); + max_test(q, N); + max_test(q, N); + max_test(q, N); + //max_test(q, N); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp new file mode 100644 index 0000000000000..385fc13552a0d --- /dev/null +++ b/sycl/test/atomic_ref/min.cpp @@ -0,0 +1,72 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// XFAIL: cuda + +#include +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +void min_test(queue q, size_t N) { + T initial = std::numeric_limits::max(); + T val = initial; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer val_buf(&val, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto val = val_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(val[0]); + out[gid] = atm.fetch_min(T(gid)); + }); + }); + } + + // Final value should be equal to 0 + assert(val == 0); + + // Only one work-item should have received the initial value + assert(std::count(output.begin(), output.end(), initial) == 1); + + // fetch_min returns original value + // Intermediate values should all be <= initial value + for (int i = 0; i < N; ++i) { + assert(output[i] <= initial); + } +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + min_test(q, N); + min_test(q, N); + min_test(q, N); + min_test(q, N); + min_test(q, N); + min_test(q, N); + min_test(q, N); + min_test(q, N); + //min_test(q, N); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp new file mode 100644 index 0000000000000..eebdba5ced095 --- /dev/null +++ b/sycl/test/atomic_ref/store.cpp @@ -0,0 +1,61 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +class store_kernel; + +template +void store_test(queue q, size_t N) { + T initial = std::numeric_limits::max(); + T store = initial; + { + buffer store_buf(&store, 1); + q.submit([&](handler &cgh) { + auto st = store_buf.template get_access(cgh); + cgh.parallel_for>(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(st[0]); + atm.store(T(gid)); + }); + }); + } + + // The initial value should have been overwritten by a work-item ID + // Atomicity isn't tested here, but support for store() is + assert(store != initial); + assert(store >= T(0) && store <= T(N - 1)); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + store_test(q, N); + store_test(q, N); + store_test(q, N); + store_test(q, N); + store_test(q, N); + store_test(q, N); + store_test(q, N); + store_test(q, N); + //store_test(q, N); + + std::cout << "Test passed." << std::endl; +} diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp new file mode 100644 index 0000000000000..52e338048e7be --- /dev/null +++ b/sycl/test/atomic_ref/sub.cpp @@ -0,0 +1,189 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::intel; + +template +void sub_fetch_test(queue q, size_t N) { + T val = N; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer val_buf(&val, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto val = val_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(val[0]); + out[gid] = atm.fetch_sub(T(1)); + }); + }); + } + + // All work-items decrement by 1, so final value should be equal to 0 + assert(val == 0); + + // Fetch returns original value: will be in [1, N] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 1 && *max_e == N); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void sub_plus_equal_test(queue q, size_t N) { + T val = N; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer val_buf(&val, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto val = val_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(val[0]); + out[gid] = atm -= T(1); + }); + }); + } + + // All work-items decrement by 1, so final value should be equal to 0 + assert(val == 0); + + // -= returns updated value: will be in [0, N-1] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 0 && *max_e == N - 1); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void sub_pre_dec_test(queue q, size_t N) { + T val = N; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer val_buf(&val, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto val = val_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(val[0]); + out[gid] = --atm; + }); + }); + } + + // All work-items decrement by 1, so final value should be equal to 0 + assert(val == 0); + + // Pre-decrement returns updated value: will be in [0, N-1] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 0 && *max_e == N - 1); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void sub_post_dec_test(queue q, size_t N) { + T val = N; + std::vector output(N); + std::fill(output.begin(), output.end(), 0); + { + buffer val_buf(&val, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + auto val = val_buf.template get_access(cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto atm = atomic_ref(val[0]); + out[gid] = atm--; + }); + }); + } + + // All work-items decrement by 1, so final value should be equal to 0 + assert(val == 0); + + // Post-decrement returns original value: will be in [1, N] + auto min_e = std::min_element(output.begin(), output.end()); + auto max_e = std::max_element(output.begin(), output.end()); + assert(*min_e == 1 && *max_e == N); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); +} + +template +void sub_test(queue q, size_t N) { + sub_fetch_test(q, N); + sub_plus_equal_test(q, N); + sub_pre_dec_test(q, N); + sub_post_dec_test(q, N); +} + +// Floating-point types do not support pre- or post-decrement +template <> +void sub_test(queue q, size_t N) { + sub_fetch_test(q, N); + sub_plus_equal_test(q, N); +} +template <> +void sub_test(queue q, size_t N) { + sub_fetch_test(q, N); + sub_plus_equal_test(q, N); +} + +int main() { + queue q; + std::string version = q.get_device().get_info(); + if (version < std::string("2.0")) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 32; + + // TODO: Enable missing tests when supported + sub_test(q, N); + sub_test(q, N); + sub_test(q, N); + sub_test(q, N); + sub_test(q, N); + sub_test(q, N); + sub_test(q, N); + sub_test(q, N); + //sub_test(q, N); + + std::cout << "Test passed." << std::endl; +}