diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index cd619a94f71bd..bab10b79abe84 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -1350,11 +1350,18 @@ SmallPtrSet collectGenXVolatileTypes(Module &M) { continue; auto GTy = dyn_cast(PTy->getPointerElementType()); // TODO FIXME relying on type name in LLVM IR is fragile, needs rework - if (!GTy || !GTy->getName().endswith( - "cl::sycl::ext::intel::experimental::esimd::simd")) + if (!GTy || + !GTy->getName().endswith("sycl::ext::intel::experimental::esimd::simd")) continue; assert(GTy->getNumContainedTypes() == 1); auto VTy = GTy->getContainedType(0); + if (GTy = dyn_cast(VTy)) { + assert( + GTy && + GTy->getName().endswith( + "sycl::ext::intel::experimental::esimd::detail::simd_obj_impl")); + VTy = GTy->getContainedType(0); + } assert(VTy->isVectorTy()); GenXVolatileTypeSet.insert(VTy); } diff --git a/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp b/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp index 9b30c72d3436a..848b289352a03 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMDVecArg.cpp @@ -110,14 +110,18 @@ ModulePass *llvm::createESIMDLowerVecArgPass() { // nullptr. Type *ESIMDLowerVecArgPass::getSimdArgPtrTyOrNull(Value *arg) { auto ArgType = dyn_cast(arg->getType()); - if (!ArgType || !ArgType->getElementType()->isStructTy()) + if (!ArgType) return nullptr; - auto ContainedType = ArgType->getElementType(); - if ((ContainedType->getStructNumElements() != 1) || - !ContainedType->getStructElementType(0)->isVectorTy()) + Type *Res = nullptr; + StructType *ST = dyn_cast_or_null(ArgType->getElementType()); + + while (ST && (ST->getStructNumElements() == 1)) { + Res = ST->getStructElementType(0); + ST = dyn_cast(Res); + } + if (!Res || !Res->isVectorTy()) return nullptr; - return PointerType::get(ContainedType->getStructElementType(0), - ArgType->getPointerAddressSpace()); + return PointerType::get(Res, ArgType->getPointerAddressSpace()); } // F may have multiple arguments of type simd*. This diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp index d8a493cd6b3d7..5d8661bb85cd5 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/intrin.hpp @@ -18,9 +18,6 @@ #include #include -#define __SEIEED sycl::ext::intel::experimental::esimd::detail -#define __SEIEE sycl::ext::intel::experimental::esimd - // \brief __esimd_rdregion: region access intrinsic. // // @param T the element data type, one of i8, i16, i32, i64, half, float, @@ -125,14 +122,14 @@ template __esimd_wrregion(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, uint16_t Offset, - __SEIEE::mask_type_t Mask = 1); + __SEIEED::simd_mask_storage_t Mask = 1); template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_wrindirect(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, __SEIEED::vector_type_t Offset, - __SEIEE::mask_type_t Mask = 1); + __SEIEED::simd_mask_storage_t Mask = 1); __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -286,7 +283,7 @@ template __esimd_wrregion(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, uint16_t Offset, - __SEIEE::mask_type_t Mask) { + __SEIEED::simd_mask_storage_t Mask) { uint16_t EltOffset = Offset / sizeof(T); assert(Offset % sizeof(T) == 0); @@ -310,7 +307,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_wrindirect(__SEIEED::vector_type_t OldVal, __SEIEED::vector_type_t NewVal, __SEIEED::vector_type_t Offset, - __SEIEE::mask_type_t Mask) { + __SEIEED::simd_mask_storage_t Mask) { __SEIEED::vector_type_t Result = OldVal; for (int i = 0; i < M; ++i) { if (Mask[i]) { @@ -324,6 +321,3 @@ __esimd_wrindirect(__SEIEED::vector_type_t OldVal, } #endif // __SYCL_DEVICE_ONLY__ - -#undef __SEIEE -#undef __SEIEED diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp index 619f9f598f198..7f2f5fc5bdd10 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/math_intrin.hpp @@ -15,11 +15,10 @@ #include #include #include +#include #include -#define __SEIEED sycl::ext::intel::experimental::esimd::detail - // saturation intrinsics template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t @@ -385,8 +384,6 @@ inline T extract(const uint32_t &width, const uint32_t &offset, uint32_t src, return ret; } -#define __SEIEEED sycl::ext::intel::experimental::esimd::emu::detail - template inline __SEIEED::vector_type_t __esimd_satf(__SEIEED::vector_type_t src) { @@ -1327,8 +1324,4 @@ __esimd_reduced_smin(__SEIEED::vector_type_t src1, return __esimd_reduced_min(src1, src2); } -#undef __SEIEEED - #endif // #ifdef __SYCL_DEVICE_ONLY__ - -#undef __SEIEED diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 03ec0ab45b171..1751c32d37084 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -74,9 +74,6 @@ constexpr unsigned int ElemsPerAddrDecoding(unsigned int ElemsPerAddrEncoded) { } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) -#define __SEIEE sycl::ext::intel::experimental::esimd -#define __SEIEED sycl::ext::intel::experimental::esimd::detail - // flat_read does flat-address gather template __esimd_flat_read(__SEIEED::vector_type_t addrs, int ElemsPerAddr = NumBlk, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // flat_write does flat-address scatter template addrs, __SEIEED::vector_type_t vals, - int ElemsPerAddr = NumBlk, __SEIEED::vector_type_t pred = 1); + int ElemsPerAddr = NumBlk, __SEIEED::simd_mask_storage_t pred = 1); // flat_block_read reads a block of data from one flat address template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __esimd_flat_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // flat_write does flat-address scatter template addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // Low-level surface-based gather. Collects elements located at given offsets in // a surface and returns them as a single \ref simd object. Element can be @@ -205,7 +202,7 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void -__esimd_surf_write(__SEIEED::vector_type_t pred, int16_t scale, +__esimd_surf_write(__SEIEED::simd_mask_storage_t pred, int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, __SEIEED::vector_type_t elem_offsets, __SEIEED::vector_type_t vals) @@ -229,7 +226,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L3H = __SEIEE::CacheHint::None> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None, @@ -237,7 +234,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H = __SEIEE::CacheHint::None, @@ -246,7 +243,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); // esimd_barrier, generic group barrier SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_barrier(); @@ -262,14 +259,14 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_fence(uint8_t cntl); template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_write does SLM scatter template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_block_read reads a block of data from SLM template @@ -286,33 +283,33 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_write4 does SLM scatter4 template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_slm_write4( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred = 1); + __SEIEED::simd_mask_storage_t pred = 1); // slm_atomic: SLM atomic template <__SEIEE::atomic_op Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); template <__SEIEE::atomic_op Op, typename Ty, int N> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_slm_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred); + __SEIEED::simd_mask_storage_t pred); // Media block load // @@ -418,9 +415,9 @@ template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, - uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, + uint8_t numSrc1, uint8_t numDst, uint8_t sfid, + uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgSrc1, __SEIEED::vector_type_t msgDst); @@ -454,9 +451,9 @@ __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numDst, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, + uint8_t numDst, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgDst); @@ -485,13 +482,11 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, /// @param msgSrc1 the second source operand of send message. /// template -SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void -__esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgSrc1); +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_raw_sends_store( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_storage_t pred, + uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgSrc1); /// \brief Raw send store. /// @@ -515,9 +510,8 @@ __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, template SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, - uint32_t msgDesc, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, + uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0); #ifndef __SYCL_DEVICE_ONLY__ @@ -525,7 +519,7 @@ template inline __SEIEED::vector_type_t __esimd_flat_read(__SEIEED::vector_type_t addrs, int ElemsPerAddr, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk); __SEIEED::vector_type_t V; ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr); @@ -551,7 +545,7 @@ template inline __SEIEED::vector_type_t __esimd_flat_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t V; unsigned int Next = 0; @@ -601,7 +595,7 @@ inline void __esimd_flat_write( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - int ElemsPerAddr, __SEIEED::vector_type_t pred) { + int ElemsPerAddr, __SEIEED::simd_mask_storage_t pred) { auto NumBlkDecoded = __SEIEED::ElemsPerAddrDecoding(NumBlk); ElemsPerAddr = __SEIEED::ElemsPerAddrDecoding(ElemsPerAddr); @@ -626,7 +620,7 @@ template addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t V; unsigned int Next = 0; @@ -830,7 +824,7 @@ inline void __esimd_slm_fence(uint8_t cntl) {} template inline __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -839,7 +833,7 @@ __esimd_slm_read(__SEIEED::vector_type_t addrs, template inline void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::simd_mask_storage_t pred) {} // slm_block_read reads a block of data from SLM template @@ -857,7 +851,7 @@ inline void __esimd_slm_block_write(uint32_t addr, template inline __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -867,13 +861,13 @@ template inline void __esimd_slm_write4( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::simd_mask_storage_t pred) {} // slm_atomic: SLM atomic template <__SEIEE::atomic_op Op, typename Ty, int N> inline __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -882,7 +876,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N> inline __SEIEED::vector_type_t __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -892,7 +886,7 @@ inline __SEIEED::vector_type_t __esimd_slm_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -901,7 +895,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H, __SEIEE::CacheHint L3H> inline __SEIEED::vector_type_t __esimd_flat_atomic0(__SEIEED::vector_type_t addrs, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -911,7 +905,7 @@ template <__SEIEE::atomic_op Op, typename Ty, int N, __SEIEE::CacheHint L1H, inline __SEIEED::vector_type_t __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -922,7 +916,7 @@ inline __SEIEED::vector_type_t __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t src1, - __SEIEED::vector_type_t pred) { + __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; return retv; } @@ -986,14 +980,12 @@ inline uint32_t __esimd_get_value(AccessorTy acc) { /// template -inline __SEIEED::vector_type_t -__esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, - uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgSrc1, - __SEIEED::vector_type_t msgDst) { +inline __SEIEED::vector_type_t __esimd_raw_sends_load( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_storage_t pred, + uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t sfid, + uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgSrc1, + __SEIEED::vector_type_t msgDst) { throw cl::sycl::feature_not_supported(); return 0; } @@ -1025,13 +1017,11 @@ __esimd_raw_sends_load(uint8_t modifier, uint8_t execSize, /// Returns a simd vector of type Ty1 and size N1. /// template -inline __SEIEED::vector_type_t -__esimd_raw_send_load(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, - uint8_t numSrc0, uint8_t numDst, uint8_t sfid, - uint32_t exDesc, uint32_t msgDesc, - __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgDst) { +inline __SEIEED::vector_type_t __esimd_raw_send_load( + uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_storage_t pred, + uint8_t numSrc0, uint8_t numDst, uint8_t sfid, uint32_t exDesc, + uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgDst) { throw cl::sycl::feature_not_supported(); return 0; } @@ -1062,7 +1052,7 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, /// template inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, uint8_t numSrc1, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, @@ -1092,7 +1082,7 @@ inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, /// template inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, - __SEIEED::vector_type_t pred, + __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0) { @@ -1100,6 +1090,3 @@ inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, } #endif // __SYCL_DEVICE_ONLY__ - -#undef __SEIEED -#undef __SEIEE diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp new file mode 100644 index 0000000000000..039d27ca95874 --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/operators.hpp @@ -0,0 +1,424 @@ +//==-------------- operators.hpp - DPC++ Explicit SIMD API -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Binary operator definitions for ESIMD types. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include + +#include +#include + +// Table of contents: +// +// simd_obj_impl/simd/simd_mask global operators +// bitwise logic and arithmetic operators +// simd_obj_impl BINOP simd_obj_impl +// simd_obj_impl BINOP SCALAR +// SCALAR BINOP simd_obj_impl +// comparison operators +// simd_obj_impl CMPOP simd_obj_impl +// simd_obj_impl CMPOP SCALAR +// SCALAR CMPOP simd_obj_impl +// simd_view global operators +// bitwise logic and arithmetic operators +// simd_view BINOP simd_view +// simd* BINOP simd_view +// simd_view BINOP simd* +// SCALAR BINOP simd_view +// simd_view BINOP SCALAR +// comparison operators +// simd_view CMPOP simd_view +// simd_view CMPOP simd_obj_impl +// simd_obj_impl CMPOP simd_view +// simd_view CMPOP SCALAR +// SCALAR CMPOP simd_view +// +// Some operations are enabled only for particular element and simd object type +// (simd or simd_mask): +// - bitwise logic operations - for integral element types (both simd and +// simd_mask) +// - bit shift operations and and '%' - for the simd type (not for simd_mask) +// with integral element types. +// - arithmetic binary operations - for the simd type (not for simd_mask) +// In all cases, when an operation has a simd_view and a simd_obj_impl's +// subclass objects as operands, it is enabled only when: +// - simd_view's base type matches the simd object operand. I.e. only +// { simd_view, simd } and { simd_view, simd_mask } +// pairs are enabled (with any order of operand types). +// - simd_view's value length matches the length of the simd object operand + +// Put operators into the ESIMD namespace to make argument-dependent lookup find +// these operators instead of those defined in e.g. sycl namespace (which would +// stop further lookup, leaving just non-viable sycl::operator < etc. on the +// table). +namespace __SEIEED { + +//////////////////////////////////////////////////////////////////////////////// +// simd_obj_impl global operators +//////////////////////////////////////////////////////////////////////////////// + +// ========= simd_obj_impl bitwise logic and arithmetic operators + +#define __ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(BINOP, COND) \ + \ + /* simd_obj_impl BINOP simd_obj_impl */ \ + template class SimdT, \ + class SimdTx = SimdT, class = std::enable_if_t> \ + inline auto operator BINOP( \ + const __SEIEED::simd_obj_impl> &LHS, \ + const __SEIEED::simd_obj_impl> &RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + using SimdPromotedT = \ + __SEIEED::computation_type_t, SimdT>; \ + using VecT = typename SimdPromotedT::vector_type; \ + return SimdPromotedT(__SEIEED::convert(LHS.data()) \ + BINOP __SEIEED::convert(RHS.data())); \ + } else { \ + /* for SimdT=simd_mask_impl T1 and T2 are both equal to \ + * simd_mask_elem_type */ \ + return SimdT(LHS.data() BINOP RHS.data()); \ + } \ + } \ + \ + /* simd_obj_impl BINOP SCALAR */ \ + template class SimdT1, class T2, \ + class SimdTx = SimdT1, class = std::enable_if_t> \ + inline auto operator BINOP( \ + const __SEIEED::simd_obj_impl> &LHS, T2 RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + /* convert the SCALAR to vector type and reuse the basic operation over \ + * simd objects */ \ + return LHS BINOP SimdT1(RHS); \ + } else { \ + /* SimdT1 is a mask, T1 is mask element type - convert RHS implicitly to \ + * T1 */ \ + return LHS BINOP SimdT1(RHS); \ + } \ + } \ + \ + /* SCALAR BINOP simd_obj_impl */ \ + template class SimdT2, \ + class SimdTx = SimdT2, class = std::enable_if_t> \ + inline auto operator BINOP( \ + T1 LHS, const __SEIEED::simd_obj_impl> &RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + /* convert the SCALAR to vector type and reuse the basic operation over \ + * simd objects */ \ + return SimdT2(LHS) BINOP RHS; \ + } else { \ + /* simd_mask_case */ \ + return SimdT2(LHS) BINOP RHS; \ + } \ + } + +#define __ESIMD_BITWISE_OP_FILTER \ + std::is_integral_v &&std::is_integral_v +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(^, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(|, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(&, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILTER + +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v \ + &&__SEIEED::is_simd_type_v +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(%, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(<<, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(>>, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +#define __ESIMD_ARITH_OP_FILTER \ + __SEIEED::is_vectorizable_v &&__SEIEED::is_vectorizable_v \ + &&__SEIEED::is_simd_type_v + +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(+, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(-, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(*, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP(/, __ESIMD_ARITH_OP_FILTER) +#undef __ESIMD_ARITH_OP_FILTER + +#undef __ESIMD_DEF_SIMD_OBJ_IMPL_BIN_OP + +// ========= simd_obj_impl comparison operators +// Both simd and simd_mask will match simd_obj_impl argument when resolving +// operator overloads. + +#define __ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(CMPOP, COND) \ + \ + /* simd_obj_impl CMPOP simd_obj_impl */ \ + template class SimdT, \ + class SimdTx = SimdT, class = std::enable_if_t> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEED::simd_obj_impl> &LHS, \ + const __SEIEED::simd_obj_impl> &RHS) { \ + using MaskVecT = typename __SEIEE::simd_mask::vector_type; \ + \ + if constexpr (__SEIEED::is_simd_type_v>) { \ + using PromSimdT = \ + __SEIEED::computation_type_t, SimdT>; \ + using PromVecT = typename PromSimdT::vector_type; \ + auto ResVec = __SEIEED::convert(LHS.data()) \ + CMPOP __SEIEED::convert(RHS.data()); \ + return __SEIEE::simd_mask(__SEIEED::convert(ResVec) & \ + MaskVecT(1)); \ + } else { \ + /* this is comparison of masks, don't perform type promotion */ \ + auto ResVec = LHS.data() CMPOP RHS.data(); \ + return __SEIEE::simd_mask(__SEIEED::convert(ResVec) & \ + MaskVecT(1)); \ + } \ + } \ + \ + /* simd_obj_impl CMPOP SCALAR */ \ + template class SimdT1, class T2, \ + class SimdTx = SimdT1, \ + class = std::enable_if_t<__SEIEED::is_vectorizable_v && COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEED::simd_obj_impl> &LHS, T2 RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) \ + /* simd case */ \ + return LHS CMPOP SimdT1(RHS); \ + else \ + /* simd_mask case - element type is fixed */ \ + return LHS CMPOP SimdT1((T1)RHS); \ + } \ + \ + /* SCALAR CMPOP simd_obj_impl */ \ + template class SimdT2, \ + class SimdTx = SimdT2, \ + class = std::enable_if_t<__SEIEED::is_vectorizable_v && COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + T1 LHS, const __SEIEED::simd_obj_impl> &RHS) { \ + if constexpr (__SEIEED::is_simd_type_v>) \ + /* simd case */ \ + return SimdT2(LHS) CMPOP RHS; \ + else \ + /* simd_mask case - element type is fixed */ \ + return SimdT2((T2)LHS) CMPOP RHS; \ + } + +// Equality comparison is defined for all simd_obj_impl subclasses. +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(==, true) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(!=, true) + +// Relational operators are defined only for the simd type. +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(<, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(>, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(<=, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(>=, __SEIEED::is_simd_type_v) + +// Logical operators are defined only for the simd_mask type +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(&&, __SEIEED::is_simd_mask_type_v) +__ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP(||, __SEIEED::is_simd_mask_type_v) + +#undef __ESIMD_DEF_SIMD_OBJ_IMPL_CMP_OP +} // namespace __SEIEED + +namespace __SEIEE { +//////////////////////////////////////////////////////////////////////////////// +// simd_view global operators +//////////////////////////////////////////////////////////////////////////////// + +// ========= simd_view bitwise logic and arithmetic operators + +#define __ESIMD_DEF_SIMD_VIEW_BIN_OP(BINOP, COND) \ + \ + /* simd_view BINOP simd_view */ \ + template ::element_type, \ + class T2 = typename __SEIEE::shape_type::element_type, \ + auto N1 = __SEIEE::shape_type::length, \ + auto N2 = __SEIEE::shape_type::length, \ + class = \ + std::enable_if_t<__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v && \ + (N1 == N2 || N1 == 1 || N2 == 1) && COND>> \ + inline auto operator BINOP( \ + const __SEIEE::simd_view &LHS, \ + const __SEIEE::simd_view &RHS) { \ + if constexpr (N1 == 1) \ + return (T1)LHS.read()[0] BINOP RHS.read(); \ + else if constexpr (N2 == 1) \ + return LHS.read() BINOP(T2) RHS.read()[0]; \ + else \ + return LHS.read() BINOP RHS.read(); \ + } \ + \ + /* simd* BINOP simd_view */ \ + template ::element_type, \ + class = std::enable_if_t< \ + __SEIEED::is_simd_obj_impl_derivative_v && \ + (__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v)&&(SimdT1::length == \ + __SEIEE::shape_type< \ + RegionT2>::length) && \ + COND>> \ + inline auto operator BINOP( \ + const SimdT1 &LHS, const __SEIEE::simd_view &RHS) { \ + return LHS BINOP RHS.read(); \ + } \ + \ + /* simd_view BINOP simd* */ \ + template < \ + class SimdT1, class RegionT1, class SimdT2, \ + class T1 = typename __SEIEE::shape_type::element_type, \ + class T2 = typename SimdT2::element_type, \ + class = std::enable_if_t< \ + __SEIEED::is_simd_obj_impl_derivative_v && \ + __SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v && \ + (SimdT2::length == __SEIEE::shape_type::length) && COND>> \ + inline auto operator BINOP(const __SEIEE::simd_view &LHS, \ + const SimdT2 &RHS) { \ + return LHS.read() BINOP RHS; \ + } \ + \ + /* SCALAR BINOP simd_view */ \ + template && COND>> \ + inline auto operator BINOP(T1 LHS, const SimdViewT2 &RHS) { \ + using SimdT = typename SimdViewT2::value_type; \ + return SimdT(LHS) BINOP RHS.read(); \ + } \ + \ + /* simd_view BINOP SCALAR */ \ + template && COND>> \ + inline auto operator BINOP(const SimdViewT1 &LHS, T2 RHS) { \ + using SimdT = typename SimdViewT1::value_type; \ + return LHS.read() BINOP SimdT(RHS); \ + } + +#define __ESIMD_BITWISE_OP_FILTER \ + std::is_integral_v &&std::is_integral_v +__ESIMD_DEF_SIMD_VIEW_BIN_OP(^, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(|, __ESIMD_BITWISE_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(&, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILITER + +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v \ + &&__SEIEED::is_simd_type_v + +__ESIMD_DEF_SIMD_VIEW_BIN_OP(%, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(<<, __ESIMD_SHIFT_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(>>, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +#define __ESIMD_ARITH_OP_FILTER \ + __SEIEED::is_simd_type_v &&__SEIEED::is_vectorizable_v \ + &&__SEIEED::is_vectorizable_v + +__ESIMD_DEF_SIMD_VIEW_BIN_OP(+, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(-, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(*, __ESIMD_ARITH_OP_FILTER) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(/, __ESIMD_ARITH_OP_FILTER) +#undef __ESIMD_ARITH_OP_FILTER + +__ESIMD_DEF_SIMD_VIEW_BIN_OP(&&, __SEIEED::is_simd_mask_type_v) +__ESIMD_DEF_SIMD_VIEW_BIN_OP(||, __SEIEED::is_simd_mask_type_v) + +#undef __ESIMD_DEF_SIMD_VIEW_BIN_OP + +// ========= simd_view comparison operators + +#define __ESIMD_DEF_SIMD_VIEW_CMP_OP(CMPOP, COND) \ + \ + /* simd_view CMPOP simd_view */ \ + template ::length, \ + auto N2 = __SEIEE::shape_type::length, \ + class = std::enable_if_t == \ + __SEIEED::is_simd_type_v< \ + SimdT2>)&&/* the length of the views \ + must match as well: */ \ + (N1 == N2 || N1 == 1 || N2 == 1) && \ + COND>> \ + inline auto operator CMPOP( \ + const __SEIEE::simd_view &LHS, \ + const __SEIEE::simd_view &RHS) { \ + using T1 = typename __SEIEE::shape_type::element_type; \ + using T2 = typename __SEIEE::shape_type::element_type; \ + if constexpr (N1 == 1) \ + return (T1)LHS.read()[0] CMPOP RHS.read(); \ + else if constexpr (N2 == 1) \ + return LHS.read() CMPOP(T2) RHS.read()[0]; \ + else \ + return LHS.read() CMPOP RHS.read(); \ + } \ + \ + /* simd_view CMPOP simd_obj_impl */ \ + template ::length == N2) && \ + (__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v)&&COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEE::simd_view &LHS, \ + const __SEIEED::simd_obj_impl &RHS) { \ + return LHS.read() CMPOP SimdT2(RHS.data()); \ + } \ + \ + /* simd_obj_impl CMPOP simd_view */ \ + template ::length == N1) && \ + (__SEIEED::is_simd_type_v == \ + __SEIEED::is_simd_type_v)&&COND>> \ + inline __SEIEE::simd_mask operator CMPOP( \ + const __SEIEED::simd_obj_impl &LHS, \ + const __SEIEE::simd_view &RHS) { \ + return SimdT1(LHS.data()) CMPOP RHS.read(); \ + } \ + \ + /* simd_view CMPOP SCALAR */ \ + template && COND>> \ + inline auto operator CMPOP(const __SEIEE::simd_view &LHS, \ + T2 RHS) { \ + using SimdValueT = \ + typename __SEIEE::simd_view::value_type; \ + return LHS.read() CMPOP SimdValueT(RHS); \ + } \ + \ + /* SCALAR CMPOP simd_view */ \ + template && COND>> \ + inline auto operator CMPOP( \ + T1 LHS, const __SEIEE::simd_view &RHS) { \ + using SimdValueT = \ + typename __SEIEE::simd_view::value_type; \ + return SimdValueT(LHS) CMPOP RHS.read(); \ + } + +// Equality comparison is defined for views of all simd_obj_impl derivatives. +__ESIMD_DEF_SIMD_VIEW_CMP_OP(==, true) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(!=, true) + +// Relational operators are defined only for views of the simd class. +__ESIMD_DEF_SIMD_VIEW_CMP_OP(<, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(>, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(<=, __SEIEED::is_simd_type_v) +__ESIMD_DEF_SIMD_VIEW_CMP_OP(>=, __SEIEED::is_simd_type_v) + +#undef __ESIMD_DEF_SIMD_VIEW_CMP_OP + +} // namespace __SEIEE diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp index 39092fea83616..df541f4e81d14 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/region.hpp @@ -50,15 +50,16 @@ struct region_base { // A basic 1D region type. template -using region1d_t = region_base; +using region1d_t = region_base; // A basic 2D region type. template using region2d_t = region_base; // A region with a single element. -template -using region1d_scalar_t = region_base; +template +using region1d_scalar_t = + region_base; // simd_view forward declaration. template class simd_view; @@ -92,12 +93,20 @@ template struct shape_type> { using element_type = Ty; using type = region1d_t; + static inline constexpr int length = type::length; +}; + +template struct shape_type> { + using element_type = Ty; + using type = region1d_t; + static inline constexpr int length = type::length; }; template struct shape_type> { using element_type = Ty; using type = region2d_t; + static inline constexpr int length = type::length; }; // Forward the shape computation on the top region type. diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp new file mode 100644 index 0000000000000..407d592cf780c --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_mask_impl.hpp @@ -0,0 +1,128 @@ +//==------------ - simd_mask_impl.hpp - DPC++ Explicit SIMD API ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implementation detail of Explicit SIMD mask class. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace intel { +namespace experimental { +namespace esimd { +namespace detail { + +#define __ESIMD_MASK_DEPRECATION_MSG \ + "Use of 'simd'/'simd_view' class to represent predicate or mask " \ + "is deprecated. Use " \ + "'simd_mask'/'simd_view' instead." + +template +class simd_mask_impl + : public detail::simd_obj_impl< + T, N, simd_mask_impl, + std::enable_if_t>> { + using base_type = detail::simd_obj_impl>; + +public: + using element_type = T; + using vector_type = typename base_type::vector_type; + static_assert(std::is_same_v> && + "mask impl type mismatch"); + + simd_mask_impl() = default; + simd_mask_impl(const simd_mask_impl &other) : base_type(other) {} + + /// Broadcast constructor with conversion. + template >> + simd_mask_impl(T1 Val) : base_type((T)Val) {} + + /// Implicit conversion constructor from a raw vector object. + // TODO this should be made inaccessible from user code. + simd_mask_impl(const vector_type &Val) : base_type(Val) {} + + /// Initializer list constructor. + __SYCL_DEPRECATED("use constructor from array, e.g: simd_mask<3> x({0,1,1});") + simd_mask_impl(std::initializer_list Ilist) : base_type(Ilist) {} + + /// Construct from an array. To allow e.g. simd_mask m({1,0,0,1,...}). + template > + simd_mask_impl(const element_type(&&Arr)[N1]) { + base_type::template init_from_array(std::move(Arr)); + } + + /// Implicit conversion from simd. + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask_impl(const simd &Val) : base_type(Val.data()) {} + + /// Implicit conversion from simd_view. + template < + // viewed simd class parameters + int N1, class T1, + // view region + class RegionT2, + // view element type + class T2 = typename __SEIEE::shape_type::element_type, + // view size in elements + int N2 = __SEIEE::shape_type::length, + // enable only if view length and element type match this object + class = std::enable_if_t>> + __SYCL_DEPRECATED(__ESIMD_MASK_DEPRECATION_MSG) + simd_mask_impl(const simd_view, RegionT2> &Val) + : base_type(Val.read().data()) {} + +private: + static inline constexpr bool mask_size_ok_for_mem_io() { + constexpr unsigned Sz = sizeof(element_type) * N; + return (Sz >= detail::OperandSize::OWORD) && + (Sz % detail::OperandSize::OWORD == 0) && + detail::isPowerOf2(Sz / detail::OperandSize::OWORD) && + (Sz <= 8 * detail::OperandSize::OWORD); + } + +public: + // TODO add accessor-based mask memory operations. + + /// Load constructor. + // Implementation note: use SFINAE to avoid overload ambiguity: + // 1) with 'simd_mask(element_type v)' in 'simd_mask m(0)' + // 2) with 'simd_mask(const T1(&&arr)[N])' in simd_mask + // m((element_type*)p)' + template >> + explicit simd_mask_impl(const T1 *ptr) { + base_type::copy_from(ptr); + } + + /// Broadcast assignment operator to support simd_mask_impl n = a > b; + simd_mask_impl &operator=(element_type val) noexcept { + base_type::set(val); + return *this; + } + + template > + operator bool() { + return base_type::data()[0] != 0; + } +}; + +#undef __ESIMD_MASK_DEPRECATION_MSG + +} // namespace detail +} // namespace esimd +} // namespace experimental +} // namespace intel +} // namespace ext +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp new file mode 100644 index 0000000000000..e7bb5ff37eace --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp @@ -0,0 +1,720 @@ +//==------------ - simd_obj_impl.hpp - DPC++ Explicit SIMD API -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Implement Explicit SIMD vector APIs. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace intel { +namespace experimental { +namespace esimd { +namespace detail { + +/// The simd_obj_impl vector class. +/// +/// This is a base class for all ESIMD simd classes with real storage (simd, +/// simd_mask_impl). It wraps a clang vector as the storage for the elements. +/// Additionally this class supports region operations that map to Intel GPU +/// regions. The type of a region select or bit_cast_view operation is of +/// simd_view type, which models read-update-write semantics. +/// +/// For the is_simd_obj_impl_derivative helper to work correctly, all derived +/// classes must be templated by element type and number of elements. If fewer +/// template arguments are needed, template aliases can be used +/// (simd_mask_type). +/// +/// \tparam Ty the element type +/// \tparam N number of elements +/// \tparam Derived - a class derived from this one; this class and its +/// derivatives must follow the 'curiously recurring template' pattern. +/// \tparam SFINAE - defaults to 'void' in the forward declarion within +/// types.hpp, used to disable invalid specializations. +/// +/// \ingroup sycl_esimd +template class simd_obj_impl { + template friend class simd_view; + template friend class simd; + template friend class simd_mask_impl; + +public: + /// The underlying builtin data type. + using vector_type = vector_type_t; + + /// The element type of this simd_obj_impl object. + using element_type = Ty; + + /// The number of elements in this simd_obj_impl object. + static constexpr int length = N; + +protected: + template > + void init_from_array(const Ty(&&Arr)[N1]) noexcept { + for (auto I = 0; I < N; ++I) { + M_data[I] = Arr[I]; + } + } + +private: + Derived &cast_this_to_derived() { return reinterpret_cast(*this); } + +public: + /// @{ + /// Constructors. + simd_obj_impl() = default; + simd_obj_impl(const simd_obj_impl &other) { + __esimd_dbg_print(simd_obj_impl(const simd_obj_impl &other)); + set(other.data()); + } + + /// Implicit conversion constructor from another \c simd_obj_impl object. + template + simd_obj_impl( + const simd_obj_impl, + SFINAE> &other) { + __esimd_dbg_print(simd_obj_impl(const simd_obj_impl... > &other)); + if constexpr (std::is_same_v) + set(other.data()); + else + set(__builtin_convertvector(other.data(), vector_type)); + } + + /// Implicit conversion constructor from a raw vector object. + simd_obj_impl(const vector_type &Val) { + __esimd_dbg_print(simd_obj_impl(const vector_type &Val)); + set(Val); + } + + /// This constructor is deprecated for two reasons: + /// 1) it adds confusion between + /// simd s1(1,2); //calls next constructor + /// simd s2{1,2}; //calls this constructor (uniform initialization syntax) + /// 2) no compile-time control over the size of the initializer; e.g. the + /// following will compile: + /// simd x = {1, 2, 3, 4}; + __SYCL_DEPRECATED("use constructor from array, e.g: simd x({1,2,3});") + simd_obj_impl(std::initializer_list Ilist) noexcept { + __esimd_dbg_print(simd_obj_impl(std::initializer_list Ilist)); + int i = 0; + for (auto It = Ilist.begin(); It != Ilist.end() && i < N; ++It) { + M_data[i++] = *It; + } + } + + /// Initialize a simd_obj_impl object with an initial value and step. + simd_obj_impl(Ty Val, Ty Step) noexcept { + __esimd_dbg_print(simd_obj_impl(Ty Val, Ty Step)); +#pragma unroll + for (int i = 0; i < N; ++i) { + M_data[i] = Val; + Val += Step; + } + } + + /// Broadcast constructor + simd_obj_impl(Ty Val) noexcept { + __esimd_dbg_print(simd_obj_impl(Ty Val)); + M_data = Val; + } + + /// Construct from an array. To allow e.g. simd_mask_type m({1,0,0,1,...}). + template > + simd_obj_impl(const Ty(&&Arr)[N1]) noexcept { + __esimd_dbg_print(simd_obj_impl(const Ty(&&Arr)[N1])); + init_from_array(std::move(Arr)); + } + + /// @} + + // Load the object's value from array. + template std::enable_if_t copy_from(const Ty (&Arr)[N1]) { + __esimd_dbg_print(copy_from(const Ty(&Arr)[N1])); + vector_type Tmp; + for (auto I = 0; I < N; ++I) { + Tmp[I] = Arr[I]; + } + set(Tmp); + } + + // Store the object's value to array. + template std::enable_if_t copy_to(Ty (&Arr)[N1]) const { + __esimd_dbg_print(copy_to(Ty(&Arr)[N1])); + for (auto I = 0; I < N; ++I) { + Arr[I] = data()[I]; + } + } + + /// @{ + /// Conversion operators. + explicit operator const vector_type &() const & { + __esimd_dbg_print(explicit operator const vector_type &() const &); + return M_data; + } + explicit operator vector_type &() & { + __esimd_dbg_print(explicit operator vector_type &() &); + return M_data; + } + + /// Explicit conversion for simd_obj_impl into T. + template > + operator Ty() const { + __esimd_dbg_print(explicit operator Ty()); + return data()[0]; + } + /// @} + + vector_type data() const { + __esimd_dbg_print(vector_type data()); +#ifndef __SYCL_DEVICE_ONLY__ + return M_data; +#else + return __esimd_vload(&M_data); +#endif + } + + /// Whole region read. + Derived read() const { return Derived{data()}; } + + /// Whole region write. + Derived &write(const Derived &Val) { + set(Val.data()); + return cast_this_to_derived(); + } + + /// Whole region update with predicates. + void merge(const Derived &Val, const simd_mask_type &Mask) { + set(__esimd_wrregion(data(), Val.data(), 0, + Mask.data())); + } + + void merge(const Derived &Val1, Derived Val2, const simd_mask_type &Mask) { + Val2.merge(Val1, Mask); + set(Val2.data()); + } + + /// View this simd_obj_impl object in a different element type. + template auto bit_cast_view() &[[clang::lifetimebound]] { + using TopRegionTy = compute_format_type_t; + using RetTy = simd_view; + return RetTy{cast_this_to_derived(), TopRegionTy{0}}; + } + + template + __SYCL_DEPRECATED("use simd_obj_impl::bit_cast_view.") + auto format() & { + return bit_cast_view(); + } + + /// View as a 2-dimensional simd_view. + template + auto bit_cast_view() &[[clang::lifetimebound]] { + using TopRegionTy = + compute_format_type_2d_t; + using RetTy = simd_view; + return RetTy{cast_this_to_derived(), TopRegionTy{0, 0}}; + } + + template + __SYCL_DEPRECATED("use simd_obj_impl::bit_cast_view.") + auto format() & { + return bit_cast_view(); + } + + /// 1D region select, apply a region on top of this LValue object. + /// + /// \tparam Size is the number of elements to be selected. + /// \tparam Stride is the element distance between two consecutive elements. + /// \param Offset is the starting element offset. + /// \return the representing region object. + template + simd_view> + select(uint16_t Offset = 0) &[[clang::lifetimebound]] { + static_assert(Size > 1 || Stride == 1, + "Stride must be 1 in single-element region"); + region1d_t Reg(Offset); + return {cast_this_to_derived(), std::move(Reg)}; + } + + /// 1D region select, apply a region on top of this RValue object. + /// + /// \tparam Size is the number of elements to be selected. + /// \tparam Stride is the element distance between two consecutive elements. + /// \param Offset is the starting element offset. + /// \return the value this region object refers to. + template + resize_a_simd_type_t select(uint16_t Offset = 0) && { + static_assert(Size > 1 || Stride == 1, + "Stride must be 1 in single-element region"); + Derived &&Val = std::move(cast_this_to_derived()); + return __esimd_rdregion(Val.data(), + Offset); + } + + /// Read single element, return value only (not reference). + Ty operator[](int i) const { return data()[i]; } + + /// Read single element, return value only (not reference). + __SYCL_DEPRECATED("use operator[] form.") + Ty operator()(int i) const { return data()[i]; } + + /// Return writable view of a single element. + simd_view> operator[](int i) + [[clang::lifetimebound]] { + return select<1, 1>(i); + } + + /// Return writable view of a single element. + __SYCL_DEPRECATED("use operator[] form.") + simd_view> operator()(int i) { + return select<1, 1>(i); + } + + // TODO ESIMD_EXPERIMENTAL + /// Read multiple elements by their indices in vector + template + resize_a_simd_type_t + iselect(const simd &Indices) { + vector_type_t Offsets = Indices.data() * sizeof(Ty); + return __esimd_rdindirect(data(), Offsets); + } + // TODO ESIMD_EXPERIMENTAL + /// update single element + void iupdate(ushort Index, Ty V) { + auto Val = data(); + Val[Index] = V; + set(Val); + } + + // TODO ESIMD_EXPERIMENTAL + /// update multiple elements by their indices in vector + template + void iupdate(const simd &Indices, + const resize_a_simd_type_t &Val, + const simd_mask_type &Mask) { + vector_type_t Offsets = Indices.data() * sizeof(Ty); + set(__esimd_wrindirect(data(), Val.data(), Offsets, + Mask.data())); + } + + /// \name Replicate + /// Replicate simd_obj_impl instance given a region. + /// @{ + /// + + /// \tparam Rep is number of times region has to be replicated. + /// \return replicated simd_obj_impl instance. + template resize_a_simd_type_t replicate() { + return replicate(0); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam W is width of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + __SYCL_DEPRECATED("use simd_obj_impl::replicate_w") + resize_a_simd_type_t replicate(uint16_t Offset) { + return replicate_w(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam W is width of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + resize_a_simd_type_t replicate_w(uint16_t Offset) { + return replicate_vs_w_hs(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W is width of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + __SYCL_DEPRECATED("use simd_obj_impl::replicate_vs_w") + resize_a_simd_type_t replicate(uint16_t Offset) { + return replicate_vs_w(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W width of src region to replicate. + /// \param Offset offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + resize_a_simd_type_t replicate_vs_w(uint16_t Offset) { + return replicate_vs_w_hs(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W is width of src region to replicate. + /// \tparam HS horizontal stride of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + __SYCL_DEPRECATED("use simd_obj_impl::replicate_vs_w_hs") + resize_a_simd_type_t replicate(uint16_t Offset) { + return replicate_vs_w_hs(Offset); + } + + /// \tparam Rep is number of times region has to be replicated. + /// \tparam VS vertical stride of src region to replicate. + /// \tparam W is width of src region to replicate. + /// \tparam HS horizontal stride of src region to replicate. + /// \param Offset is offset in number of elements in src region. + /// \return replicated simd_obj_impl instance. + template + resize_a_simd_type_t replicate_vs_w_hs(uint16_t Offset) { + return __esimd_rdregion(data(), + Offset * sizeof(Ty)); + } + ///@} + + /// Any operation. + /// + /// \return 1 if any element is set, 0 otherwise. + template ::value>> + uint16_t any() { + return __esimd_any(data()); + } + + /// All operation. + /// + /// \return 1 if all elements are set, 0 otherwise. + template ::value>> + uint16_t all() { + return __esimd_all(data()); + } + + /// Write a simd_obj_impl-vector into a basic region of a simd_obj_impl + /// object. + template + ESIMD_INLINE void writeRegion( + RTy Region, + const vector_type_t &Val) { + using ElemTy = typename RTy::element_type; + if constexpr (N * sizeof(Ty) == RTy::length * sizeof(ElemTy)) + // update the entire vector + set(bitcast(Val)); + else { + static_assert(!RTy::Is_2D); + // If element type differs, do bitcast conversion first. + auto Base = bitcast(data()); + constexpr int BN = (N * sizeof(Ty)) / sizeof(ElemTy); + // Access the region information. + constexpr int M = RTy::Size_x; + constexpr int Stride = RTy::Stride_x; + uint16_t Offset = Region.M_offset_x * sizeof(ElemTy); + + // Merge and update. + auto Merged = __esimd_wrregion(Base, Val, Offset); + // Convert back to the original element type, if needed. + set(bitcast(Merged)); + } + } + + /// Write a simd_obj_impl-vector into a nested region of a simd_obj_impl + /// object. + template + ESIMD_INLINE void + writeRegion(std::pair Region, + const vector_type_t &Val) { + // parent-region type + using PaTy = typename shape_type::type; + using ElemTy = typename TR::element_type; + using BT = typename PaTy::element_type; + constexpr int BN = PaTy::length; + + if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) { + writeRegion(Region.second, bitcast(Val)); + } else { + // Recursively read the base + auto Base = readRegion(data(), Region.second); + // If element type differs, do bitcast conversion first. + auto Base1 = bitcast(Base); + constexpr int BN1 = PaTy::Size_in_bytes / sizeof(ElemTy); + + if constexpr (!TR::Is_2D) { + // Access the region information. + constexpr int M = TR::Size_x; + constexpr int Stride = TR::Stride_x; + uint16_t Offset = Region.first.M_offset_x * sizeof(ElemTy); + + // Merge and update. + Base1 = __esimd_wrregion(Base1, Val, Offset); + } else { + static_assert(std::is_same::value); + // Read columns with non-trivial horizontal stride. + constexpr int M = TR::length; + constexpr int VS = PaTy::Size_x * TR::Stride_y; + constexpr int W = TR::Size_x; + constexpr int HS = TR::Stride_x; + constexpr int ParentWidth = PaTy::Size_x; + + // Compute the byte offset for the starting element. + uint16_t Offset = static_cast( + (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) * + sizeof(ElemTy)); + + // Merge and update. + Base1 = __esimd_wrregion( + Base1, Val, Offset); + } + // Convert back to the original element type, if needed. + auto Merged1 = bitcast(Base1); + // recursively write it back to the base + writeRegion(Region.second, Merged1); + } + } + + /// @name Memory operations + /// TODO NOTE: These APIs do not support cache hint specification yet, as this + /// is WIP. Later addition of hints is not expected to break code using these + /// APIs. + /// + /// @{ + + /// Copy a contiguous block of data from memory into this simd_obj_impl + /// object. The amount of memory copied equals the total size of vector + /// elements in this object. + /// @param addr the memory address to copy from. Must be a pointer to the + /// global address space, otherwise behavior is undefined. + ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION; + + /// Copy a contiguous block of data from memory into this simd_obj_impl + /// object. The amount of memory copied equals the total size of vector + /// elements in this object. Source memory location is represented via a + /// global accessor and offset. + /// @param acc accessor to copy from. + /// @param offset offset to copy from. + template + ESIMD_INLINE EnableIfAccessor + copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; + + /// Copy all vector elements of this object into a contiguous block in memory. + /// @param addr the memory address to copy to. Must be a pointer to the + /// global address space, otherwise behavior is undefined. + ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION; + + /// Copy all vector elements of this object into a contiguous block in memory. + /// Destination memory location is represented via a global accessor and + /// offset. + /// @param acc accessor to copy from. + /// @param offset offset to copy from. + template + ESIMD_INLINE EnableIfAccessor + copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; + + /// @} // Memory operations + + /// Bitwise inversion, available in all subclasses. + template >> + Derived operator~() { + return Derived(~data()); + } + + /// Unary logical negation operator, available in all subclasses. + template >> + simd_mask_type operator!() { + using MaskVecT = typename simd_mask_type::vector_type; + auto R = data() == vector_type(0); + return simd_mask_type{__builtin_convertvector(R, MaskVecT) & + MaskVecT(1)}; + } + +#define __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \ + \ + /* OPASSIGN simd_obj_impl */ \ + template == \ + is_simd_type_v)&&COND>> \ + Derived &operator OPASSIGN( \ + const __SEIEED::simd_obj_impl &RHS) { \ + auto Res = *this BINOP RHS; \ + set(__SEIEED::convert(Res.data())); \ + return cast_this_to_derived(); \ + } \ + \ + /* OPASSIGN simd_view */ \ + template == \ + is_simd_type_v)&&(RegionT1::length == length) && \ + COND>> \ + Derived &operator OPASSIGN( \ + const __SEIEE::simd_view &RHS) { \ + auto Res = *this BINOP RHS.read(); \ + set(__SEIEED::convert(Res.data())); \ + return cast_this_to_derived(); \ + } \ + \ + /* OPASSIGN SCALAR */ \ + template > \ + Derived &operator OPASSIGN(T1 RHS) { \ + if constexpr (is_simd_type_v) { \ + using RHSVecT = __SEIEED::construct_a_simd_type_t; \ + return *this OPASSIGN RHSVecT(RHS); \ + } else { \ + return *this OPASSIGN Derived((Ty)RHS); \ + } \ + } + +// Bitwise operations are defined for simd objects and masks, and both operands +// must be integral +#define __ESIMD_BITWISE_OP_FILTER \ + std::is_integral_v &&std::is_integral_v + + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(^, ^=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(|, |=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(&, &=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(%, %=, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILTER + +// Bit shift operations are defined only for simd objects (not for masks), and +// both operands must be integral +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v \ + &&__SEIEED::is_simd_type_v + + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(<<, <<=, __ESIMD_SHIFT_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(>>, >>=, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +// Arithmetic operations are defined only for simd objects, and the second +// operand's element type must be vectorizable. This requirement for 'this' +// is fulfilled, because otherwise 'this' couldn't have been constructed. +#define __ESIMD_ARITH_OP_FILTER \ + __SEIEED::is_simd_type_v &&__SEIEED::is_vectorizable_v + + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(+, +=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(-, -=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(*, *=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_OBJ_IMPL_OPASSIGN(/, /=, __ESIMD_ARITH_OP_FILTER) +#undef __ESIMD_ARITH_OP_FILTER + +private: + // The underlying data for this vector. + vector_type M_data; + +protected: + void set(const vector_type &Val) { +#ifndef __SYCL_DEVICE_ONLY__ + M_data = Val; +#else + __esimd_vstore(&M_data, Val); +#endif + } +}; + +// ----------- Outlined implementations of simd_obj_impl class APIs. + +template +void simd_obj_impl::copy_from(const T *const Addr) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); + + uintptr_t AddrVal = reinterpret_cast(Addr); + *this = + __esimd_flat_block_read_unaligned( + AddrVal); +} + +template +template +ESIMD_INLINE EnableIfAccessor +simd_obj_impl::copy_from(AccessorT acc, uint32_t offset) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); +#if defined(__SYCL_DEVICE_ONLY__) + auto surf_ind = AccessorPrivateProxy::getNativeImageObj(acc); + *this = __esimd_block_read(surf_ind, offset); +#else + *this = __esimd_block_read(acc, offset); +#endif // __SYCL_DEVICE_ONLY__ +} + +template +void simd_obj_impl::copy_to(T *addr) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); + + uintptr_t AddrVal = reinterpret_cast(addr); + __esimd_flat_block_write(AddrVal, + data()); +} + +template +template +ESIMD_INLINE EnableIfAccessor +simd_obj_impl::copy_to(AccessorT acc, uint32_t offset) { + constexpr unsigned Sz = sizeof(T) * N; + static_assert(Sz >= OperandSize::OWORD, + "block size must be at least 1 oword"); + static_assert(Sz % OperandSize::OWORD == 0, + "block size must be whole number of owords"); + static_assert(isPowerOf2(Sz / OperandSize::OWORD), + "block must be 1, 2, 4 or 8 owords long"); + static_assert(Sz <= 8 * OperandSize::OWORD, + "block size must be at most 8 owords"); + +#if defined(__SYCL_DEVICE_ONLY__) + auto surf_ind = AccessorPrivateProxy::getNativeImageObj(acc); + __esimd_block_write(surf_ind, offset >> 4, data()); +#else + __esimd_block_write(acc, offset >> 4, data()); +#endif // __SYCL_DEVICE_ONLY__ +} +} // namespace detail + +} // namespace esimd +} // namespace experimental +} // namespace intel +} // namespace ext +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp index 56c4e626118b8..72f9b9381ad70 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/simd_view_impl.hpp @@ -25,23 +25,22 @@ namespace detail { /// It is an internal class implementing basic functionality of simd_view. /// /// \ingroup sycl_esimd -template -class simd_view_impl { +template class simd_view_impl { + using Derived = simd_view; + template friend class simd_obj_impl; template friend class simd; - template friend class simd_view_impl; + template friend class simd_view_impl; + template friend class simd_mask_impl; public: - static_assert(!detail::is_simd_view_v::value); + static_assert(is_simd_obj_impl_derivative_v); // Deduce the corresponding value type from its region type. using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; - /// The simd type if reading the object. - using value_type = simd; - - /// The underlying builtin value type - using vector_type = - detail::vector_type_t; + using base_type = BaseTy; + template + using get_simd_t = construct_a_simd_type_t; /// The region type of this class. using region_type = RegionTy; @@ -50,43 +49,42 @@ class simd_view_impl { /// type of the base object type. using element_type = typename ShapeTy::element_type; - /// @{ - /// Constructors. + /// The simd type if reading the object. + using value_type = get_simd_t; + + /// The underlying builtin vector type backing the value read from the object. + using vector_type = vector_type_t; private: Derived &cast_this_to_derived() { return reinterpret_cast(*this); } protected: + /// @{ + /// Constructors. simd_view_impl(BaseTy &Base, RegionTy Region) : M_base(Base), M_region(Region) {} simd_view_impl(BaseTy &&Base, RegionTy Region) : M_base(Base), M_region(Region) {} - + /// @} public: // Default copy and move constructors. simd_view_impl(const simd_view_impl &Other) = default; simd_view_impl(simd_view_impl &&Other) = default; - /// @} - /// Conversion to simd type. - template operator simd() const { - if constexpr (std::is_same::value) + /// Implicit conversion to simd type. + template >> + inline operator simd() const { + if constexpr (std::is_same_v) return read(); else return convert(read()); } - /// @{ - /// Assignment operators. - simd_view_impl &operator=(const simd_view_impl &Other) { - return write(Other.read()); - } - simd_view_impl &operator=(const value_type &Val) { return write(Val); } - /// @} - - /// Move assignment operator. - simd_view_impl &operator=(simd_view_impl &&Other) { - return write(Other.read()); + /// Implicit conversion to simd_mask_impl type, if element type is compatible. + template >> + inline operator simd_mask_type() const { + return read(); } /// @{ @@ -97,9 +95,11 @@ class simd_view_impl { static constexpr int getStrideX() { return ShapeTy::Stride_x; } static constexpr int getSizeY() { return ShapeTy::Size_y; } static constexpr int getStrideY() { return ShapeTy::Stride_y; } + constexpr uint16_t getOffsetX() const { return getTopRegion(M_region).M_offset_x; } + constexpr uint16_t getOffsetY() const { return getTopRegion(M_region).M_offset_y; } @@ -109,9 +109,11 @@ class simd_view_impl { value_type read() const { using BT = typename BaseTy::element_type; constexpr int BN = BaseTy::length; - return detail::readRegion(M_base.data(), M_region); + return value_type{readRegion(M_base.data(), M_region)}; } + typename value_type::vector_type data() const { return read().data(); } + /// Write to this object. Derived &write(const value_type &Val) { M_base.writeRegion(M_region, Val.data()); @@ -120,12 +122,12 @@ class simd_view_impl { /// @{ /// Whole region update with predicates. - void merge(const value_type &Val, const mask_type_t &Mask) { + void merge(const value_type &Val, const simd_mask_type &Mask) { merge(Val, read(), Mask); } void merge(const value_type &Val1, value_type Val2, - const mask_type_t &Mask) { + const simd_mask_type &Mask) { Val2.merge(Val1, Mask); write(Val2.read()); } @@ -200,84 +202,139 @@ class simd_view_impl { TopRegionTy TopReg(OffsetY, OffsetX); return RetTy{this->M_base, std::make_pair(TopReg, M_region)}; } - -#define DEF_BINOP(BINOP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BINOP(const Derived &X, \ - const Derived &Y) { \ - return (X BINOP Y.read()); \ +#define __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(BINOP, OPASSIGN, COND) \ + \ + /* OPASSIGN simd_obj_impl */ \ + template == \ + is_simd_type_v)&&(N1 == length) && \ + COND>> \ + Derived &operator OPASSIGN(const simd_obj_impl &RHS) { \ + auto Res = read() BINOP RHS; \ + write(Res); \ + return cast_this_to_derived(); \ } \ - Derived &operator OPASSIGN(const value_type &RHS) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(read().data()); \ - auto V1 = detail::convert(RHS.data()); \ - auto V2 = V0 BINOP V1; \ - auto V3 = detail::convert(V2); \ - write(V3); \ + \ + /* OPASSIGN simd_view_impl */ \ + template ::element_type, \ + class T = element_type, class SimdT = BaseTy, \ + class = std::enable_if_t< \ + (is_simd_type_v == is_simd_type_v)&&( \ + length == __SEIEE::shape_type::length) && \ + COND>> \ + Derived &operator OPASSIGN(const simd_view_impl &RHS) { \ + *this OPASSIGN RHS.read(); \ return cast_this_to_derived(); \ } \ - Derived &operator OPASSIGN(const Derived &RHS) { \ - return (*this OPASSIGN RHS.read()); \ + \ + /* OPASSIGN scalar */ \ + template > \ + Derived &operator OPASSIGN(T1 RHS) { \ + auto Res = read() BINOP RHS; \ + write(Res); \ + return cast_this_to_derived(); \ } - DEF_BINOP(+, +=) - DEF_BINOP(-, -=) - DEF_BINOP(*, *=) - DEF_BINOP(/, /=) - DEF_BINOP(%, %=) +#define __ESIMD_BITWISE_OP_FILTER std::is_integral_v &&std::is_integral_v + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(^, ^=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(|, |=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(&, &=, __ESIMD_BITWISE_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(%, %=, __ESIMD_BITWISE_OP_FILTER) +#undef __ESIMD_BITWISE_OP_FILTER -#undef DEF_BINOP +#define __ESIMD_SHIFT_OP_FILTER \ + std::is_integral_v &&std::is_integral_v &&is_simd_type_v -#define DEF_BITWISE_OP(BITWISE_OP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BITWISE_OP(const Derived &X, \ - const Derived &Y) { \ - return (X BITWISE_OP Y.read()); \ - } \ - Derived &operator OPASSIGN(const value_type &RHS) { \ - static_assert(std::is_integral(), "not integeral type"); \ - auto V2 = read().data() BITWISE_OP RHS.data(); \ - auto V3 = detail::convert(V2); \ - write(V3); \ - return cast_this_to_derived(); \ - } \ - Derived &operator OPASSIGN(const Derived &RHS) { \ - return (*this OPASSIGN RHS.read()); \ - } - DEF_BITWISE_OP(&, &=) - DEF_BITWISE_OP(|, |=) - DEF_BITWISE_OP(^, ^=) - DEF_BITWISE_OP(>>, >>=) - DEF_BITWISE_OP(<<, <<=) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(<<, <<=, __ESIMD_SHIFT_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(>>, >>=, __ESIMD_SHIFT_OP_FILTER) +#undef __ESIMD_SHIFT_OP_FILTER + +#define __ESIMD_ARITH_OP_FILTER \ + is_vectorizable_v &&is_vectorizable_v &&is_simd_type_v -#undef DEF_BITWISE_OP + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(+, +=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(-, -=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(*, *=, __ESIMD_ARITH_OP_FILTER) + __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN(/, /=, __ESIMD_ARITH_OP_FILTER) -#define DEF_UNARY_OP(UNARY_OP) \ +#undef __ESIMD_ARITH_OP_FILTER +#undef __ESIMD_DEF_SIMD_VIEW_IMPL_OPASSIGN + +#define __ESIMD_DEF_UNARY_OP(UNARY_OP, COND) \ + template > \ auto operator UNARY_OP() { \ auto V = UNARY_OP(read().data()); \ - return simd(V); \ + return get_simd_t(V); \ + } + __ESIMD_DEF_UNARY_OP(~, std::is_integral_v &&is_simd_type_v) + __ESIMD_DEF_UNARY_OP(+, is_simd_type_v) + __ESIMD_DEF_UNARY_OP(-, is_simd_type_v) + +#undef __ESIMD_DEF_UNARY_OP + + /// Unary logical negeation operator. Applies only to integer element types. + template >> + auto operator!() { + using MaskVecT = typename simd_mask_type::vector_type; + auto V = read().data() == 0; + return simd_mask_type{__builtin_convertvector(V, MaskVecT) & + MaskVecT(1)}; + } + + /// @{ + /// Assignment operators. + simd_view_impl &operator=(const simd_view_impl &Other) { + return write(Other.read()); } - DEF_UNARY_OP(~) - DEF_UNARY_OP(+) - DEF_UNARY_OP(-) -#undef DEF_UNARY_OP + Derived &operator=(const Derived &Other) { return write(Other.read()); } - // negation operator - auto operator!() { return cast_this_to_derived() == 0; } + Derived &operator=(const value_type &Val) { return write(Val); } + + /// Move assignment operator. + Derived &operator=(Derived &&Other) { return write(Other.read()); } + simd_view_impl &operator=(simd_view_impl &&Other) { + return write(Other.read()); + } + + template == + is_simd_type_v)&&(length == + SimdT::length)>> + Derived &operator=(const simd_obj_impl &Other) { + return write(convert(reinterpret_cast(Other))); + } + + template >> + Derived &operator=(T1 RHS) { + return write(value_type((element_type)RHS)); + } + + /// @} // Operator ++, -- Derived &operator++() { *this += 1; return cast_this_to_derived(); } + value_type operator++(int) { value_type Ret(read()); operator++(); return Ret; } + Derived &operator--() { *this -= 1; return cast_this_to_derived(); } + value_type operator--(int) { value_type Ret(read()); operator--(); @@ -289,7 +346,7 @@ class simd_view_impl { template > auto row(int i) { - return select<1, 0, getSizeX(), 1>(i, 0) + return select<1, 1, getSizeX(), 1>(i, 0) .template bit_cast_view(); } @@ -298,7 +355,7 @@ class simd_view_impl { template > auto column(int i) { - return select(0, i); + return select(0, i); } /// Read a single element from a 1D region, by value only. @@ -322,7 +379,7 @@ class simd_view_impl { template > auto operator[](int i) { - return select<1, 0>(i); + return select<1, 1>(i); } /// Return a writeable view of a single element. @@ -330,16 +387,16 @@ class simd_view_impl { typename = sycl::detail::enable_if_t> __SYCL_DEPRECATED("use operator[] form.") auto operator()(int i) { - return select<1, 0>(i); + return select<1, 1>(i); } /// \name Replicate - /// Replicate simd instance given a simd_view + /// Replicate simd instance given a simd_view_impl /// @{ /// /// \tparam Rep is number of times region has to be replicated. - template simd replicate() { + template get_simd_t replicate() { return read().replicate(0); } @@ -348,7 +405,7 @@ class simd_view_impl { /// \param OffsetX is column offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetX) { return replicate(0, OffsetX); } @@ -358,7 +415,8 @@ class simd_view_impl { /// \param OffsetY is row offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetY, uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetY, + uint16_t OffsetX) { return replicate(OffsetY, OffsetX); } @@ -368,7 +426,7 @@ class simd_view_impl { /// \param OffsetX is column offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetX) { return replicate(0, OffsetX); } @@ -379,7 +437,8 @@ class simd_view_impl { /// \param OffsetY is row offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetY, uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetY, + uint16_t OffsetX) { return replicate(OffsetY, OffsetX); } @@ -390,7 +449,7 @@ class simd_view_impl { /// \param OffsetX is column offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetX) { return read().template replicate(OffsetX); } @@ -402,29 +461,28 @@ class simd_view_impl { /// \param OffsetY is row offset in number of elements in src region. /// \return replicated simd instance. template - simd replicate(uint16_t OffsetY, uint16_t OffsetX) { + get_simd_t replicate(uint16_t OffsetY, + uint16_t OffsetX) { constexpr int RowSize = is2D() ? getSizeX() : 0; return read().template replicate(OffsetY * RowSize + OffsetX); } /// @} - /// Any operation. + /// 'any' operation. /// /// \return 1 if any element is set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = BaseTy, - typename = sycl::detail::enable_if_t::value, T2>> + template ::value, T2>> uint16_t any() { return read().any(); } - /// All operation. + /// 'all' operation. /// /// \return 1 if all elements are set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = BaseTy, - typename = sycl::detail::enable_if_t::value, T2>> + template ::value, T2>> uint16_t all() { return read().all(); } diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp index e14d2e9c5306e..2746886c8253b 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/types.hpp @@ -16,6 +16,12 @@ #include #include +#if defined(__ESIMD_DBG_HOST) && !defined(__SYCL_DEVICE_ONLY__) +#define __esimd_dbg_print(a) std::cout << ">>> " << #a << "\n" +#else +#define __esimd_dbg_print(a) +#endif // defined(__ESIMD_DBG_HOST) && !defined(__SYCL_DEVICE_ONLY__) + #include __SYCL_INLINE_NAMESPACE(cl) { @@ -25,14 +31,139 @@ namespace intel { namespace experimental { namespace esimd { -// simd and simd_view forward declarations +// simd and simd_view_impl forward declarations template class simd; template class simd_view; namespace detail { -namespace csd = cl::sycl::detail; +// forward declarations of major internal simd classes +template class simd_mask_impl; +template +class simd_obj_impl; + +// @{ +// Helpers for major simd classes, which don't require their definitions to +// compile. Error checking/SFINAE is not used as these are only used internally. + +using simd_mask_elem_type = unsigned short; +template using simd_mask_type = simd_mask_impl; + +// @{ +// Checks if given type T is a raw clang vector type, plus provides some info +// about it if it is. + +struct invalid_element_type; + +template struct is_clang_vector_type : std::false_type { + static inline constexpr int length = 0; + using element_type = invalid_element_type; +}; + +template +struct is_clang_vector_type + : std::true_type { + static inline constexpr int length = N; + using element_type = T; +}; +template +static inline constexpr bool is_clang_vector_type_v = + is_clang_vector_type::value; + +// @} + +// @{ +// Checks if given type T derives from simd_obj_impl or is equal to it. +template +struct is_simd_obj_impl_derivative : public std::false_type { + using element_type = invalid_element_type; +}; + +// Specialization for the simd_obj_impl type itself. +template +struct is_simd_obj_impl_derivative> + : public std::true_type { + using element_type = ElT; +}; + +// Specialization for all other types. +template class Derived> +struct is_simd_obj_impl_derivative> + : public std::conditional_t< + std::is_base_of_v>, + Derived>, + std::true_type, std::false_type> { + using element_type = std::conditional_t< + std::is_base_of_v>, + Derived>, + ElT, void>; +}; + +// Convenience shortcut. +template +inline constexpr bool is_simd_obj_impl_derivative_v = + is_simd_obj_impl_derivative::value; +// @} + +// @{ +// "Resizes" given simd type \c T to given number of elements \c N. +template struct resize_a_simd_type; + +// Specialization for the simd_obj_impl type. +template class SimdT> +struct resize_a_simd_type>, Ndst> { + using type = simd_obj_impl>; +}; + +// Specialization for the simd_obj_impl type derivatives. +template class SimdT> +struct resize_a_simd_type, Ndst> { + using type = SimdT; +}; + +// Convenience shortcut. +template +using resize_a_simd_type_t = typename resize_a_simd_type::type; +// @} + +// @{ +// Converts element type of given simd type \c SimdT to +// given scalar type \c DstElemT. +template struct convert_simd_elem_type; + +// Specialization for the simd_obj_impl type. +template class SimdT> +struct convert_simd_elem_type>, + DstElemT> { + using type = simd_obj_impl>; +}; + +// Specialization for the simd_obj_impl type derivatives. +template class SimdT> +struct convert_simd_elem_type, DstElemT> { + using type = SimdT; +}; + +// Convenience shortcut. +template +using convert_simd_elem_type_t = + typename convert_simd_elem_type::type; + +// @} + +// Constructs a simd type with the same template type as in \c SimdT, and +// given element type and number. +template +using construct_a_simd_type_t = + convert_simd_elem_type_t, ElT>; +// @} + +namespace csd = cl::sycl::detail; using half = cl::sycl::detail::half_impl::StorageT; template @@ -52,22 +183,25 @@ struct is_esimd_arithmetic_type< decltype(std::declval() - std::declval()), decltype(std::declval() * std::declval()), decltype(std::declval() / std::declval())>> - : std::true_type {}; + : std::conditional_t, std::true_type, + std::false_type> {}; -// is_vectorizable_type template -struct is_vectorizable : public is_esimd_arithmetic_type {}; +static inline constexpr bool is_esimd_arithmetic_type_v = + is_esimd_arithmetic_type::value; -template <> struct is_vectorizable : public std::false_type {}; +// is_vectorizable_type +template +struct is_vectorizable : std::conditional_t, + std::true_type, std::false_type> {}; template -struct is_vectorizable_v - : std::integral_constant::value> {}; +static inline constexpr bool is_vectorizable_v = is_vectorizable::value; // vector_type, using clang vector type extension. template struct vector_type { static_assert(!std::is_const::value, "const element type not supported"); - static_assert(is_vectorizable_v::value, "element type not supported"); + static_assert(is_vectorizable_v, "element type not supported"); static_assert(N > 0, "zero-element vector not supported"); static constexpr int length = N; @@ -77,16 +211,28 @@ template struct vector_type { template using vector_type_t = typename vector_type::type; +// must match simd_mask::element_type +template +using simd_mask_storage_t = vector_type_t; + // Compute the simd_view type of a 1D format operation. template struct compute_format_type; -template -struct compute_format_type, EltTy> { +template struct compute_format_type_impl { static constexpr int Size = sizeof(Ty) * N / sizeof(EltTy); static constexpr int Stride = 1; using type = region1d_t; }; +template class SimdT> +struct compute_format_type, EltTy> + : compute_format_type_impl {}; + +template +struct compute_format_type, EltTy> + : compute_format_type_impl {}; + template struct compute_format_type, EltTy> { using ShapeTy = typename shape_type::type; @@ -103,7 +249,7 @@ template struct compute_format_type_2d; template -struct compute_format_type_2d, EltTy, Height, Width> { +struct compute_format_type_2d_impl { static constexpr int Prod = sizeof(Ty) * N / sizeof(EltTy); static_assert(Prod == Width * Height, "size mismatch"); @@ -114,6 +260,16 @@ struct compute_format_type_2d, EltTy, Height, Width> { using type = region2d_t; }; +template class SimdT> +struct compute_format_type_2d, EltTy, Height, Width> + : compute_format_type_2d_impl {}; + +template +struct compute_format_type_2d, EltTy, Height, Width> + : compute_format_type_2d_impl {}; + template struct compute_format_type_2d, EltTy, Height, @@ -133,49 +289,120 @@ template using compute_format_type_2d_t = typename compute_format_type_2d::type; -// Check if a type is simd_view type -template struct is_simd_view_type : std::false_type {}; +// @{ +// Checks if given type is a view of any simd type (simd or simd_mask). +template struct is_any_simd_view_type : std::false_type {}; template -struct is_simd_view_type> : std::true_type {}; +struct is_any_simd_view_type> : std::true_type {}; + +template +static inline constexpr bool is_any_simd_view_type_v = + is_any_simd_view_type::value; +// @} + +// @{ +// Check if a type is one of internal 'simd_xxx_impl' types exposing simd-like +// interfaces and behaving like a simd object type. template -struct is_simd_view_v - : std::integral_constant>::value> {}; +static inline constexpr bool is_simd_like_type_v = + is_any_simd_view_type_v || is_simd_obj_impl_derivative_v; +// @} -// Check if a type is simd or simd_view type +// @{ +// Checks if given type is a any of the user-visible simd types (simd or +// simd_mask). template struct is_simd_type : std::false_type {}; +template +struct is_simd_type> : std::true_type {}; +template +static inline constexpr bool is_simd_type_v = is_simd_type::value; -template -struct is_simd_type> : std::true_type {}; +template struct is_simd_mask_type : std::false_type {}; +template +struct is_simd_mask_type> + : std::true_type {}; +template +static inline constexpr bool is_simd_mask_type_v = is_simd_mask_type::value; +// @} -template -struct is_simd_type> : std::true_type {}; +// @{ +// Checks if given type is a view of the simd type. +template struct is_simd_view_type_impl : std::false_type {}; + +template +struct is_simd_view_type_impl> + : std::conditional_t, std::true_type, + std::false_type> {}; + +template +struct is_simd_view_type : is_simd_view_type_impl> {}; template -struct is_simd_v - : std::integral_constant>::value> {}; +static inline constexpr bool is_simd_view_type_v = is_simd_view_type::value; +// @} + +template +static inline constexpr bool is_simd_or_view_type_v = + is_simd_view_type_v || is_simd_type_v; + +// @{ +// Get the element type if it is a scalar, clang vector, simd or simd_view type. + +struct cant_deduce_element_type; -// Get the element type if it is a simd or simd_view type. -template struct element_type { using type = remove_cvref_t; }; -template struct element_type> { - using type = Ty; +template struct element_type { + using type = cant_deduce_element_type; }; -template -struct element_type> { - using type = typename RegionTy::element_type; + +template +struct element_type>> { + using type = remove_cvref_t; +}; + +template +struct element_type>> { + using type = typename T::element_type; +}; + +template +struct element_type>> { + using type = typename is_clang_vector_type::element_type; }; -// Get the common type of a binary operator. -template ::value && is_simd_v::value>> -struct common_type { +// @} + +// @{ +// Get computation type of a binary operator given its operand types: +// - if both types are arithmetic - return CPP's "common real type" of the +// computation (matches C++) +// - if both types are simd types, they must be of the same length N, +// and the returned type is simd, where N is the "common real type" of +// the element type of the operands (diverges from clang) +// - otherwise, one type is simd and another is arithmetic - the simd type is +// returned (matches clang) + +struct invalid_computation_type; + +template struct computation_type { + using type = invalid_computation_type; +}; + +template +struct computation_type< + T1, T2, std::enable_if_t && is_vectorizable_v>> { + using type = decltype(std::declval() + std::declval()); +}; + +template +struct computation_type< + T1, T2, + std::enable_if_t && is_simd_like_type_v>> { private: using Ty1 = typename element_type::type; using Ty2 = typename element_type::type; - using EltTy = decltype(Ty1() + Ty2()); + using EltTy = typename computation_type::type; static constexpr int N1 = T1::length; static constexpr int N2 = T2::length; static_assert(N1 == N2, "size mismatch"); @@ -184,20 +411,18 @@ struct common_type { using type = simd; }; -template -using compute_type_t = - typename common_type, remove_cvref_t>::type; +template +using computation_type_t = + typename computation_type, remove_cvref_t>::type; + +// @} -template To convert(From Val) { +template +std::enable_if_t && is_clang_vector_type_v, To> +convert(From Val) { return __builtin_convertvector(Val, To); } -/// Get the computation type. -template struct computation_type { - // Currently only arithmetic operations are needed. - typedef decltype(T1() + T2()) type; -}; - /// Base case for checking if a type U is one of the types. template constexpr bool is_type() { return false; } @@ -248,18 +473,11 @@ inline std::istream &operator>>(std::istream &I, half &rhs) { rhs = ValFloat; return I; } + } // namespace detail -// TODO @rolandschulz on May 21 -// {quote} -// - The mask should also be a wrapper around the clang - vector type rather -// than the clang - vector type itself. -// - The internal storage should be implementation defined.uint16_t is a bad -// choice for some HW.Nor is it how clang - vector types works(using the same -// size int as the corresponding vector type used for comparison(e.g. long for -// double and int for float)). -template -using mask_type_t = typename detail::vector_type::type; +// Alias for backward compatibility. +template using mask_type_t = detail::simd_mask_storage_t; } // namespace esimd } // namespace experimental diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp index c462f839e6ac5..021452042f097 100755 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/util.hpp @@ -15,6 +15,10 @@ #include +#define __SEIEED sycl::ext::intel::experimental::esimd::detail +#define __SEIEE sycl::ext::intel::experimental::esimd +#define __SEIEEED sycl::ext::intel::experimental::esimd::emu::detail + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace ext { diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp index 5f87f1b4595c7..04d93061e02b5 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/math.hpp @@ -13,9 +13,11 @@ #include #include #include +#include #include #include #include +#include #include @@ -61,7 +63,7 @@ namespace detail { template ESIMD_NODEBUG ESIMD_INLINE simd __esimd_abs_common_internal(simd src0, int flag = saturation_off) { - simd Result = __esimd_abs(src0.data()); + simd Result = simd(__esimd_abs(src0.data())); if (flag != saturation_on) return Result; @@ -96,7 +98,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< typename sycl::detail::remove_const_t>::value, simd> esimd_abs(simd src0, int flag = saturation_off) { - return detail::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0.data(), flag); } /// Get absolute value (scalar version) @@ -129,7 +131,7 @@ esimd_abs(T1 src0, int flag = saturation_off) { template ESIMD_NODEBUG ESIMD_INLINE simd esimd_abs(simd src0, int flag = saturation_off) { - return detail::__esimd_abs_common_internal(src0, flag); + return detail::__esimd_abs_common_internal(src0.data(), flag); } /// Get absolute value (scalar version). This is a specialization of a version @@ -165,8 +167,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_shl(simd src0, U src1, int flag = saturation_off) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; @@ -213,7 +214,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_shl(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_shl(Src0, Src1, flag); @@ -237,8 +238,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_shr(simd src0, U src1, int flag = saturation_off) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; typename detail::simd_type::type Result = @@ -266,7 +266,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_shr(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_shr(Src0, Src1, flag); @@ -285,7 +285,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, simd> esimd_rol(simd src0, simd src1) { - return __esimd_rol(src0, src1); + return __esimd_rol(src0.data(), src1.data()); } /// Rotate left operation with a vector and a scalar inputs @@ -303,8 +303,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_rol(simd src0, U src1) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; return __esimd_rol(Src0.data(), Src1.data()); @@ -324,7 +323,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_rol(T1 src0, T2 src1) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_rol(Src0, Src1); @@ -343,7 +342,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, simd> esimd_ror(simd src0, simd src1) { - return __esimd_ror(src0, src1); + return __esimd_ror(src0.data(), src1.data()); } /// Rotate right operation with a vector and a scalar inputs @@ -361,8 +360,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_ror(simd src0, U src1) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; return __esimd_ror(Src0.data(), Src1.data()); @@ -382,7 +380,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_ror(T1 src0, T2 src1) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_ror(Src0, Src1); @@ -406,7 +404,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_lsr(simd src0, U src1, int flag = saturation_off) { - typedef typename detail::computation_type::type IntermedTy; + using IntermedTy = detail::computation_type_t; typedef typename std::make_unsigned::type ComputationTy; simd Src0 = src0; simd Result = Src0.data() >> src1.data(); @@ -434,7 +432,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_lsr(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_lsr(Src0, Src1, flag); @@ -458,7 +456,7 @@ ESIMD_NODEBUG ESIMD_INLINE std::is_integral::value, simd> esimd_asr(simd src0, U src1, int flag = saturation_off) { - typedef typename detail::computation_type::type IntermedTy; + using IntermedTy = detail::computation_type_t; typedef typename std::make_signed::type ComputationTy; simd Src0 = src0; simd Result = Src0 >> src1; @@ -486,7 +484,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< std::is_integral::value && std::is_integral::value, typename sycl::detail::remove_const_t> esimd_asr(T1 src0, T2 src1, int flag = saturation_off) { - typedef typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; simd Result = esimd_asr(Src0, Src1, flag); @@ -503,8 +501,7 @@ ESIMD_NODEBUG ESIMD_INLINE detail::is_dword_type::value, simd> esimd_imul(simd &rmd, simd src0, U src1) { - typedef - typename detail::computation_type::type ComputationTy; + using ComputationTy = detail::computation_type_t; typename detail::simd_type::type Src0 = src0; typename detail::simd_type::type Src1 = src1; rmd = Src0 * Src1; @@ -524,8 +521,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< detail::is_dword_type::value && SZ == 1, simd> esimd_imul(simd &rmd, simd src0, U src1) { - typedef typename detail::computation_type::type - ComputationTy; + using ComputationTy = detail::computation_type_t; ComputationTy Product = convert(src0); Product *= src1; rmd = Product.bit_cast_view().select<1, 1>[0]; @@ -538,8 +534,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t< detail::is_dword_type::value && SZ != 1, simd> esimd_imul(simd &rmd, simd src0, U src1) { - typedef typename detail::computation_type::type - ComputationTy; + using ComputationTy = detail::computation_type_t; ComputationTy Product = convert(src0); Product *= src1; rmd = Product.bit_cast_view().select(0); @@ -697,15 +692,18 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_max(simd src0, simd src1, int flag = saturation_off) { if constexpr (std::is_floating_point::value) { auto Result = __esimd_fmax(src0.data(), src1.data()); - return (flag == saturation_off) ? Result : __esimd_satf(Result); + Result = (flag == saturation_off) ? Result : __esimd_satf(Result); + return simd(Result); } else if constexpr (std::is_unsigned::value) { auto Result = __esimd_umax(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_uutrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_uutrunc_sat(Result); + return simd(Result); } else { auto Result = __esimd_smax(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_sstrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_sstrunc_sat(Result); + return simd(Result); } } @@ -781,15 +779,18 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_min(simd src0, simd src1, int flag = saturation_off) { if constexpr (std::is_floating_point::value) { auto Result = __esimd_fmin(src0.data(), src1.data()); - return (flag == saturation_off) ? Result : __esimd_satf(Result); + Result = (flag == saturation_off) ? Result : __esimd_satf(Result); + return simd(Result); } else if constexpr (std::is_unsigned::value) { auto Result = __esimd_umin(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_uutrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_uutrunc_sat(Result); + return simd(Result); } else { auto Result = __esimd_smin(src0.data(), src1.data()); - return (flag == saturation_off) ? Result - : __esimd_sstrunc_sat(Result); + Result = (flag == saturation_off) ? Result + : __esimd_sstrunc_sat(Result); + return simd(Result); } } @@ -1207,7 +1208,7 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE simd esimd_frc(simd src0) { simd Src0 = src0; - return __esimd_frc(Src0); + return __esimd_frc(Src0.data()); } /// Performs truncate-to-minus-infinity fraction operation of \p src0. @@ -1227,7 +1228,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd esimd_lzd(simd src0, int flag = saturation_off) { // Saturation parameter ignored simd Src0 = src0; - return __esimd_lzd(Src0); + return __esimd_lzd(Src0.data()); } template @@ -1251,7 +1252,8 @@ esimd_lrp(simd src0, U src1, V src2, int flag = saturation_off) { "vector size must be a multiple of 4"); simd Src1 = src1; simd Src2 = src2; - simd Result = __esimd_lrp(src0, Src1, Src2); + simd Result = + __esimd_lrp(src0.data(), Src1.data(), Src2.data()); if (flag != saturation_on) return Result; @@ -1309,7 +1311,7 @@ esimd_pln(simd src0, simd src1, simd src2, Src12.select<(SZ >> 3), 1, 8, 1>(0, 8) = src2.template bit_cast_view> 3), 8>(); - simd Result = __esimd_pln(src0, Src12.read()); + simd Result = __esimd_pln(src0.data(), Src12.read().data()); if (flag != saturation_on) return Result; @@ -1321,7 +1323,7 @@ esimd_pln(simd src0, simd src1, simd src2, template ESIMD_NODEBUG ESIMD_INLINE simd esimd_bf_reverse(simd src0) { simd Src0 = src0; - return __esimd_bfrev(Src0); + return __esimd_bfrev(Src0.data()); } template @@ -1348,7 +1350,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd Src2 = src2; simd Src3 = src3; - return __esimd_bfins(Src0, Src1, Src2, Src3); + return __esimd_bfins(Src0.data(), Src1.data(), Src2.data(), Src3.data()); } template @@ -1374,7 +1376,7 @@ ESIMD_NODEBUG ESIMD_INLINE simd Src1 = src1; simd Src2 = src2; - return __esimd_bfext(Src0, Src1, Src2); + return __esimd_bfext(Src0.data(), Src1.data(), Src2.data()); } template @@ -1505,8 +1507,8 @@ ESIMD_NODEBUG ESIMD_INLINE esimd_atan(simd src0, int flag = saturation_off) { simd Src0 = esimd_abs(src0); - simd Neg = src0 < T(0.0); - simd Gt1 = Src0 > T(1.0); + simd_mask Neg = src0 < T(0.0); + simd_mask Gt1 = Src0 > T(1.0); Src0.merge(esimd_inv(Src0), Gt1); @@ -1548,8 +1550,8 @@ ESIMD_NODEBUG ESIMD_INLINE esimd_acos(simd src0, int flag = saturation_off) { simd Src0 = esimd_abs(src0); - simd Neg = src0 < T(0.0); - simd TooBig = Src0 >= T(0.999998); + simd_mask Neg = src0 < T(0.0); + simd_mask TooBig = Src0 >= T(0.999998); // Replace oversized values to ensure no possibility of sqrt of // a negative value later @@ -1591,7 +1593,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t::value, simd> esimd_asin(simd src0, int flag = saturation_off) { - simd Neg = src0 < T(0.0); + simd_mask Neg = src0 < T(0.0); simd Result = T(ESIMD_HDR_CONST_PI / 2.0) - esimd_acos(esimd_abs(src0)); @@ -1644,14 +1646,14 @@ ESIMD_INTRINSIC_DEF(rndz) template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t<(N == 8 || N == 16 || N == 32), uint> - esimd_pack_mask(simd src0) { + esimd_pack_mask(simd_mask src0) { return __esimd_pack_mask(src0.data()); } template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t<(N == 8 || N == 16 || N == 32), - simd> + simd_mask> esimd_unpack_mask(uint src0) { return __esimd_unpack_mask(src0); } @@ -1659,8 +1661,8 @@ ESIMD_NODEBUG ESIMD_INLINE template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t<(N != 8 && N != 16 && N < 32), uint> - esimd_pack_mask(simd src0) { - simd src_0 = 0; + esimd_pack_mask(simd_mask src0) { + simd_mask<(N < 8 ? 8 : N < 16 ? 16 : 32)> src_0 = 0; src_0.template select() = src0.template bit_cast_view(); return esimd_pack_mask(src_0); } @@ -1973,7 +1975,7 @@ ESIMD_INLINE simd esimd_atan2_fast(simd y, simd x, simd a1; simd atan2; - simd mask = (y >= 0.0f); + simd_mask mask = (y >= 0.0f); a0.merge(ESIMD_CONST_PI * 0.5f, ESIMD_CONST_PI * 1.5f, mask); a1.merge(0, ESIMD_CONST_PI * 2.0f, mask); @@ -2009,7 +2011,7 @@ ESIMD_INLINE simd esimd_atan2(simd y, simd x, simd v_distance; simd v_y0; simd atan2; - simd mask; + simd_mask mask; mask = (x < 0); v_y0.merge(ESIMD_CONST_PI, 0, mask); @@ -2027,10 +2029,10 @@ template <> ESIMD_INLINE float esimd_atan2(float y, float x, const uint flags) { float v_distance; float v_y0; simd atan2; - unsigned short mask; + simd_mask<1> mask; mask = (x < 0); - v_y0 = mask ? ESIMD_CONST_PI : 0; + v_y0 = mask[0] ? ESIMD_CONST_PI : 0; v_distance = esimd_sqrt(x * x + y * y); mask = (esimd_abs(y) < 0.000001f); atan2.merge(v_y0, (2 * esimd_atan((v_distance - x) / y)), mask); @@ -2345,11 +2347,11 @@ template struct esimd_apply_reduced_max { template simd operator()(simd v1, simd v2) { if constexpr (std::is_floating_point::value) { - return __esimd_reduced_fmax(v1, v2); + return __esimd_reduced_fmax(v1.data(), v2.data()); } else if constexpr (std::is_unsigned::value) { - return __esimd_reduced_umax(v1, v2); + return __esimd_reduced_umax(v1.data(), v2.data()); } else { - return __esimd_reduced_smax(v1, v2); + return __esimd_reduced_smax(v1.data(), v2.data()); } } }; @@ -2358,11 +2360,11 @@ template struct esimd_apply_reduced_min { template simd operator()(simd v1, simd v2) { if constexpr (std::is_floating_point::value) { - return __esimd_reduced_fmin(v1, v2); + return __esimd_reduced_fmin(v1.data(), v2.data()); } else if constexpr (std::is_unsigned::value) { - return __esimd_reduced_umin(v1, v2); + return __esimd_reduced_umin(v1.data(), v2.data()); } else { - return __esimd_reduced_smin(v1, v2); + return __esimd_reduced_smin(v1.data(), v2.data()); } } }; @@ -2426,7 +2428,7 @@ T0 esimd_reduce(simd v) { template ESIMD_INLINE ESIMD_NODEBUG T0 esimd_sum(simd v) { - using TT = compute_type_t>; + using TT = detail::computation_type_t>; using RT = typename TT::element_type; T0 retv = esimd_reduce(v); return retv; @@ -2434,7 +2436,7 @@ ESIMD_INLINE ESIMD_NODEBUG T0 esimd_sum(simd v) { template ESIMD_INLINE ESIMD_NODEBUG T0 esimd_prod(simd v) { - using TT = compute_type_t>; + using TT = detail::computation_type_t>; using RT = typename TT::element_type; T0 retv = esimd_reduce(v); return retv; @@ -2471,7 +2473,7 @@ ESIMD_INLINE ESIMD_NODEBUG T0 reduce(simd v, BinaryOperation op) { template simd esimd_dp4(simd v1, simd v2) { - auto retv = __esimd_dp4(v1, v2); + auto retv = __esimd_dp4(v1.data(), v2.data()); return retv; } diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 02cfd986a0410..96b6e7bbfbcdb 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -76,7 +76,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< ((n == 8 || n == 16 || n == 32) && (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), simd> -gather(T *p, simd offsets, simd pred = 1) { +gather(T *p, simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); @@ -126,7 +126,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (ElemsPerAddr == 1 || ElemsPerAddr == 2 || ElemsPerAddr == 4)), void> scatter(T *p, simd vals, simd offsets, - simd pred = 1) { + simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); addrs = addrs + offsets_i; @@ -271,21 +271,21 @@ ESIMD_INLINE ESIMD_NODEBUG const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); const simd promo_vals = __esimd_surf_read(scale, surf_ind, glob_offset, offsets); + L3H>(scale, surf_ind, glob_offset, offsets.data()); #else const simd promo_vals = __esimd_surf_read( - scale, acc, glob_offset, offsets); + scale, acc, glob_offset, offsets.data()); #endif return convert(promo_vals); } else { #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); return __esimd_surf_read( - scale, surf_ind, glob_offset, offsets); + scale, surf_ind, glob_offset, offsets.data()); #else return __esimd_surf_read( - scale, acc, glob_offset, offsets); + scale, acc, glob_offset, offsets.data()); #endif } } @@ -317,7 +317,7 @@ ESIMD_INLINE ESIMD_NODEBUG !std::is_pointer::value, void> scatter(AccessorTy acc, simd vals, simd offsets, - uint32_t glob_offset = 0, simd pred = 1) { + uint32_t glob_offset = 0, simd_mask pred = simd_mask(1)) { constexpr int TypeSizeLog2 = detail::ElemsPerAddrEncoding(); // TODO (performance) use hardware-supported scale once BE supports it @@ -338,19 +338,21 @@ ESIMD_INLINE ESIMD_NODEBUG #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); __esimd_surf_write( - pred, scale, surf_ind, glob_offset, offsets, promo_vals); + pred.data(), scale, surf_ind, glob_offset, offsets.data(), + promo_vals.data()); #else __esimd_surf_write( - pred, scale, acc, glob_offset, offsets, promo_vals); + pred.data(), scale, acc, glob_offset, offsets.data(), + promo_vals.data()); #endif } else { #if defined(__SYCL_DEVICE_ONLY__) const auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); __esimd_surf_write( - pred, scale, surf_ind, glob_offset, offsets, vals); + pred.data(), scale, surf_ind, glob_offset, offsets.data(), vals.data()); #else __esimd_surf_write( - pred, scale, acc, glob_offset, offsets, vals); + pred.data(), scale, acc, glob_offset, offsets.data(), vals.data()); #endif } } @@ -360,7 +362,7 @@ ESIMD_INLINE ESIMD_NODEBUG template ESIMD_INLINE ESIMD_NODEBUG T scalar_load(AccessorTy acc, uint32_t offset) { - const simd Res = gather(acc, simd{offset}); + const simd Res = gather(acc, simd(offset)); return Res[0]; } @@ -370,7 +372,7 @@ template ESIMD_INLINE ESIMD_NODEBUG void scalar_store(AccessorTy acc, uint32_t offset, T val) { - scatter(acc, simd{val}, simd{offset}); + scatter(acc, simd(val), simd(offset)); } /// Gathering read for the given starting pointer \p p and \p offsets. @@ -388,7 +390,7 @@ template > -gather_rgba(T *p, simd offsets, simd pred = 1) { +gather_rgba(T *p, simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); @@ -407,8 +409,8 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< simd> gather4(T *p, simd offsets, - simd - pred = 1) { + simd_mask pred = + 1) { return gather_rgba(p, offsets, pred); } @@ -429,7 +431,7 @@ ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(N == 16 || N == 32) && (sizeof(T) == 4), void> scatter_rgba(T *p, simd vals, - simd offsets, simd pred = 1) { + simd offsets, simd_mask pred = 1) { simd offsets_i = convert(offsets); simd addrs(reinterpret_cast(p)); addrs = addrs + offsets_i; @@ -445,7 +447,7 @@ __SYCL_DEPRECATED("use scatter_rgba.") ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (n == 16 || n == 32) && (sizeof(T) == 4), void> scatter4(T *p, simd vals, - simd offsets, simd pred = 1) { + simd offsets, simd_mask pred = 1) { scatter_rgba(p, vals, offsets, pred); } @@ -555,7 +557,7 @@ template (), simd> - flat_atomic(T *p, simd offset, simd pred) { + flat_atomic(T *p, simd offset, simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -570,7 +572,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> flat_atomic(T *p, simd offset, simd src0, - simd pred) { + simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -586,7 +588,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> flat_atomic(T *p, simd offset, simd src0, - simd src1, simd pred) { + simd src1, simd_mask pred) { simd vAddr(reinterpret_cast(p)); simd offset_i1 = convert(offset); vAddr += offset_i1; @@ -655,8 +657,8 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void slm_init(uint32_t size); template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 16 || n == 32), simd> - slm_load(simd offsets, simd pred = 1) { - return __esimd_slm_read(offsets.data(), pred.data()); + slm_load(simd offsets, simd_mask Pred = 1) { + return __esimd_slm_read(offsets.data(), Pred.data()); } /// SLM scatter. @@ -664,7 +666,7 @@ template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t<(n == 16 || n == 32), void> slm_store(simd vals, simd offsets, - simd pred = 1) { + simd_mask pred = 1) { __esimd_slm_write(offsets.data(), vals.data(), pred.data()); } @@ -675,7 +677,7 @@ template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (n == 8 || n == 16 || n == 32) && (sizeof(T) == 4), simd> -slm_load4(simd offsets, simd pred = 1) { +slm_load4(simd offsets, simd_mask pred = 1) { return __esimd_slm_read4(offsets.data(), pred.data()); } @@ -684,7 +686,7 @@ template ESIMD_INLINE ESIMD_NODEBUG typename sycl::detail::enable_if_t< (n == 8 || n == 16 || n == 32) && (sizeof(T) == 4), void> slm_store4(simd vals, - simd offsets, simd pred = 1) { + simd offsets, simd_mask pred = 1) { __esimd_slm_write4(offsets.data(), vals.data(), pred.data()); } @@ -727,7 +729,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> - slm_atomic(simd offsets, simd pred) { + slm_atomic(simd offsets, simd_mask pred) { return __esimd_slm_atomic0(offsets.data(), pred.data()); } @@ -736,8 +738,7 @@ template ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> - slm_atomic(simd offsets, simd src0, - simd pred) { + slm_atomic(simd offsets, simd src0, simd_mask pred) { return __esimd_slm_atomic1(offsets.data(), src0.data(), pred.data()); } @@ -748,7 +749,7 @@ ESIMD_NODEBUG ESIMD_INLINE typename sycl::detail::enable_if_t(), simd> slm_atomic(simd offsets, simd src0, simd src1, - simd pred) { + simd_mask pred) { return __esimd_slm_atomic2(offsets.data(), src0.data(), src1.data(), pred.data()); } @@ -830,14 +831,15 @@ media_block_store(AccessorTy acc, unsigned x, unsigned y, simd vals) { temp_ref.template select() = vals_ref; __esimd_media_block_store( 0, detail::AccessorPrivateProxy::getNativeImageObj(acc), plane, - sizeof(T) * n, x, y, temp); + sizeof(T) * n, x, y, temp.data()); } else { __esimd_media_block_store( 0, detail::AccessorPrivateProxy::getNativeImageObj(acc), plane, - sizeof(T) * n, x, y, vals); + sizeof(T) * n, x, y, vals.data()); } #else - __esimd_media_block_store(0, acc, plane, sizeof(T) * n, x, y, vals); + __esimd_media_block_store(0, acc, plane, sizeof(T) * n, x, y, + vals.data()); #endif // __SYCL_DEVICE_ONLY__ } @@ -902,7 +904,7 @@ esimd_raw_sends_load(simd msgDst, simd msgSrc0, simd msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t numDst, uint8_t isEOT = 0, - uint8_t isSendc = 0, simd mask = 1) { + uint8_t isSendc = 0, simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -942,7 +944,7 @@ ESIMD_INLINE ESIMD_NODEBUG simd esimd_raw_send_load(simd msgDst, simd msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numDst, uint8_t isEOT = 0, - uint8_t isSendc = 0, simd mask = 1) { + uint8_t isSendc = 0, simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send rspVar"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -980,7 +982,7 @@ esimd_raw_sends_store(simd msgSrc0, simd msgSrc1, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t numSrc1, uint8_t isEOT = 0, uint8_t isSendc = 0, - simd mask = 1) { + simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0"); constexpr unsigned _Width2 = n2 * sizeof(T2); @@ -1014,7 +1016,7 @@ ESIMD_INLINE ESIMD_NODEBUG void esimd_raw_send_store(simd msgSrc0, uint32_t exDesc, uint32_t msgDesc, uint8_t execSize, uint8_t sfid, uint8_t numSrc0, uint8_t isEOT = 0, uint8_t isSendc = 0, - simd mask = 1) { + simd_mask mask = 1) { constexpr unsigned _Width1 = n1 * sizeof(T1); static_assert(_Width1 % 32 == 0, "Invalid size for raw send msgSrc0"); diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp index 26abc25916ae0..bbab78bdd103f 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd.hpp @@ -10,12 +10,18 @@ #pragma once -#include +#include +#include #include #include #include #include +#include + +#ifndef __SYCL_DEVICE_ONLY__ +#include +#endif // __SYCL_DEVICE_ONLY__ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -32,654 +38,96 @@ namespace esimd { /// read-update-write semantics. /// /// \ingroup sycl_esimd -template class simd { - template friend class simd_view; +template +class simd + : public detail::simd_obj_impl< + Ty, N, simd, std::enable_if_t>> { + using base_type = detail::simd_obj_impl>; public: - /// The underlying builtin data type. - using vector_type = detail::vector_type_t; - - /// The element type of this simd object. - using element_type = Ty; - - /// The number of elements in this simd object. + using base_type::base_type; + using element_type = typename base_type::element_type; + using vector_type = typename base_type::vector_type; static constexpr int length = N; - /// @{ - /// Constructors. - simd() = default; - simd(const simd &other) { set(other.data()); } - template simd(const simd &other) { - set(__builtin_convertvector(other.data(), detail::vector_type_t)); - } - template simd(simd &&other) { - if constexpr (std::is_same::value) - set(other.data()); - else - set(__builtin_convertvector(other.data(), detail::vector_type_t)); - } - simd(const vector_type &Val) { set(Val); } - - // TODO @rolandschulz - // {quote} - // Providing both an overload of initializer-list and the same type itself - // causes really weird behavior. E.g. - // simd s1(1,2); //calls next constructor - // simd s2{1,2}; //calls this constructor - // This might not be confusing for all users but to everyone using - // uniform-initialization syntax. Therefore if you want to use this - // constructor the other one should have a special type (see - // https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines#es64-use-the-tenotation-for-construction) - // to avoid this issue. Also this seems like one of those areas where this - // simd-type needless differs from std::simd. Why should these constructors be - // different? Why reinvent the wheel and have all the work of fixing these - // problems if we could just use the existing solution. Especially if that is - // anyhow the long-term goal. Adding extra stuff like the select is totally - // fine. But differ on things which have no apparent advantage and aren't as - // thought through seems to have only downsides. - // {/quote} - - simd(std::initializer_list Ilist) noexcept { - int i = 0; - for (auto It = Ilist.begin(); It != Ilist.end() && i < N; ++It) { - M_data[i++] = *It; - } - } - - /// Initialize a simd with an initial value and step. - simd(Ty Val, Ty Step = Ty()) noexcept { - if (Step == Ty()) - M_data = Val; - else { -#pragma unroll - for (int i = 0; i < N; ++i) { - M_data[i] = Val; - Val += Step; - } - } - } - /// @} - - /// conversion operator - operator const vector_type &() const & { return M_data; } - operator vector_type &() & { return M_data; } - - /// Implicit conversion for simd into T. - template > - operator element_type() const { - return data()[0]; - } - - vector_type data() const { -#ifndef __SYCL_DEVICE_ONLY__ - return M_data; -#else - return __esimd_vload(&M_data); -#endif - } - - /// Whole region read. - simd read() const { return data(); } - - /// Whole region write. - simd &write(const simd &Val) { - set(Val.data()); - return *this; - } - - /// Whole region update with predicates. - void merge(const simd &Val, const mask_type_t &Mask) { - set(__esimd_wrregion( - data(), Val.data(), 0, Mask)); - } - void merge(const simd &Val1, simd Val2, const mask_type_t &Mask) { - Val2.merge(Val1, Mask); - set(Val2.data()); - } - - /// View this simd object in a different element type. - template auto bit_cast_view() &[[clang::lifetimebound]] { - using TopRegionTy = detail::compute_format_type_t; - using RetTy = simd_view; - TopRegionTy R(0); - return RetTy{*this, R}; + // Implicit conversion constructor from another simd object of the same + // length. + template && + (length == SimdT::length)>> + simd(const SimdT &RHS) + : base_type(__builtin_convertvector(RHS.data(), vector_type)) { + __esimd_dbg_print(simd(const SimdT &RHS)); } - template - __SYCL_DEPRECATED("use simd::bit_cast_view.") - auto format() & { - return bit_cast_view(); + // Broadcast constructor with conversion. + template >> + simd(T1 Val) : base_type((Ty)Val) { + __esimd_dbg_print(simd(T1 Val)); } - /// View as a 2-dimensional simd_view. - template - auto bit_cast_view() &[[clang::lifetimebound]] { - using TopRegionTy = - detail::compute_format_type_2d_t; - using RetTy = simd_view; - TopRegionTy R(0, 0); - return RetTy{*this, R}; + /// Explicit conversion for simd_obj_impl into T. + template >> + operator To() const { + __esimd_dbg_print(explicit operator To()); + return (To)base_type::data()[0]; } - template - __SYCL_DEPRECATED("use simd::bit_cast_view.") - auto format() & { - return bit_cast_view(); - } - - /// 1D region select, apply a region on top of this LValue object. - /// - /// \tparam Size is the number of elements to be selected. - /// \tparam Stride is the element distance between two consecutive elements. - /// \param Offset is the starting element offset. - /// \return the representing region object. - template - simd_view> select(uint16_t Offset = 0) &[ - [clang::lifetimebound]] { - region1d_t Reg(Offset); - return {*this, Reg}; - } - - /// 1D region select, apply a region on top of this RValue object. - /// - /// \tparam Size is the number of elements to be selected. - /// \tparam Stride is the element distance between two consecutive elements. - /// \param Offset is the starting element offset. - /// \return the value this region object refers to. - template - simd select(uint16_t Offset = 0) && { - simd &&Val = *this; - return __esimd_rdregion(Val.data(), - Offset); - } - - /// Read single element, return value only (not reference). - Ty operator[](int i) const { return data()[i]; } - - /// Read single element, return value only (not reference). - __SYCL_DEPRECATED("use operator[] form.") - Ty operator()(int i) const { return data()[i]; } - - /// Return writable view of a single element. - simd_view> operator[](int i) - [[clang::lifetimebound]] { - return select<1, 0>(i); - } - - /// Return writable view of a single element. - __SYCL_DEPRECATED("use operator[] form.") - simd_view> operator()(int i) { - return select<1, 0>(i); - } - - // TODO ESIMD_EXPERIMENTAL - /// Read multiple elements by their indices in vector - template - simd iselect(const simd &Indices) { - detail::vector_type_t Offsets = Indices.data() * sizeof(Ty); - return __esimd_rdindirect(data(), Offsets); - } - // TODO ESIMD_EXPERIMENTAL - /// update single element - void iupdate(ushort Index, Ty V) { - auto Val = data(); - Val[Index] = V; - set(Val); - } - // TODO ESIMD_EXPERIMENTAL - /// update multiple elements by their indices in vector - template - void iupdate(const simd &Indices, const simd &Val, - mask_type_t Mask) { - detail::vector_type_t Offsets = Indices.data() * sizeof(Ty); - set(__esimd_wrindirect(data(), Val.data(), Offsets, Mask)); - } - - // Use auto as a return type to honor C++ integer promotion rules, - // e.g. simd + simd -> simd -#define DEF_BINOP(BINOP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BINOP(const simd &X, const simd &Y) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(X.data()); \ - auto V1 = detail::convert(Y.data()); \ - auto V2 = V0 BINOP V1; \ - return ComputeTy(V2); \ - } \ - template >> \ - ESIMD_INLINE friend auto operator BINOP(const simd &X, T1 Y) { \ - return X BINOP simd((Ty)Y); \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const simd &RHS) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(LHS.data()); \ - auto V1 = detail::convert(RHS.data()); \ - auto V2 = V0 BINOP V1; \ - LHS.write(detail::convert(V2)); \ - return LHS; \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const Ty &RHS) { \ - LHS OPASSIGN simd(RHS); \ - return LHS; \ - } - - DEF_BINOP(+, +=) - DEF_BINOP(-, -=) - DEF_BINOP(*, *=) - DEF_BINOP(/, /=) - DEF_BINOP(%, %=) - -#undef DEF_BINOP - - // TODO @rolandschulz, @mattkretz - // Introduce simd_mask type and let user use this type instead of specific - // type representation (simd) to make it more portable - // TODO @iburyl should be mask_type_t, which might become more abstracted in - // the future revisions. - // -#define DEF_RELOP(RELOP) \ - ESIMD_INLINE friend simd operator RELOP(const simd &X, \ - const simd &Y) { \ - auto R = X.data() RELOP Y.data(); \ - mask_type_t M(1); \ - return M & detail::convert>(R); \ - } \ - template >> \ - ESIMD_INLINE friend bool operator RELOP(const simd &X, T1 Y) { \ - return (Ty)X RELOP(Ty) Y; \ - } - - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP - -#define DEF_BITWISE_OP(BITWISE_OP, OPASSIGN) \ - ESIMD_INLINE friend simd operator BITWISE_OP(const simd &X, const simd &Y) { \ - static_assert(std::is_integral(), "not integeral type"); \ - auto V2 = X.data() BITWISE_OP Y.data(); \ - return simd(V2); \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const simd &RHS) { \ - static_assert(std::is_integral(), "not integeral type"); \ - auto V2 = LHS.data() BITWISE_OP RHS.data(); \ - LHS.write(detail::convert(V2)); \ - return LHS; \ - } \ - ESIMD_INLINE friend simd &operator OPASSIGN(simd &LHS, const Ty &RHS) { \ - LHS OPASSIGN simd(RHS); \ - return LHS; \ - } - - DEF_BITWISE_OP(&, &=) - DEF_BITWISE_OP(|, |=) - DEF_BITWISE_OP(^, ^=) - DEF_BITWISE_OP(<<, <<=) - DEF_BITWISE_OP(>>, >>=) - -#undef DEF_BITWISE_OP - - // Operator ++, -- + /// @{ + /// Infix and postfix operators ++, -- simd &operator++() { *this += 1; return *this; } + simd operator++(int) { - simd Ret(*this); + simd Ret(base_type::data()); operator++(); return Ret; } + simd &operator--() { *this -= 1; return *this; } + simd operator--(int) { - simd Ret(*this); + simd Ret(base_type::data()); operator--(); return Ret; } + /// @} -#define DEF_UNARY_OP(UNARY_OP) \ - simd operator UNARY_OP() { \ - auto V = UNARY_OP(data()); \ - return simd(V); \ - } - DEF_UNARY_OP(~) - DEF_UNARY_OP(+) - DEF_UNARY_OP(-) - -#undef DEF_UNARY_OP - - // negation operator - auto operator!() { return *this == 0; } - - /// \name Replicate - /// Replicate simd instance given a region. - /// @{ - /// - - /// \tparam Rep is number of times region has to be replicated. - /// \return replicated simd instance. - template simd replicate() { - return replicate(0); - } - - /// \tparam Rep is number of times region has to be replicated. - /// \tparam W is width of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - __SYCL_DEPRECATED("use simd::replicate_w") - simd replicate(uint16_t Offset) { - return replicate_w(Offset); - } - - /// \tparam Rep is number of times region has to be replicated. - /// \tparam W is width of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template simd replicate_w(uint16_t Offset) { - return replicate_vs_w_hs(Offset); - } - - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W is width of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - __SYCL_DEPRECATED("use simd::replicate_vs_w") - simd replicate(uint16_t Offset) { - return replicate_vs_w(Offset); - } - - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W width of src region to replicate. - /// \param Offset offset in number of elements in src region. - /// \return replicated simd instance. - template - simd replicate_vs_w(uint16_t Offset) { - return replicate_vs_w_hs(Offset); - } - - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W is width of src region to replicate. - /// \tparam HS horizontal stride of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - __SYCL_DEPRECATED("use simd::replicate_vs_w_hs") - simd replicate(uint16_t Offset) { - return replicate_vs_w_hs(Offset); - } - - /// \tparam Rep is number of times region has to be replicated. - /// \tparam VS vertical stride of src region to replicate. - /// \tparam W is width of src region to replicate. - /// \tparam HS horizontal stride of src region to replicate. - /// \param Offset is offset in number of elements in src region. - /// \return replicated simd instance. - template - simd replicate_vs_w_hs(uint16_t Offset) { - return __esimd_rdregion( - data(), Offset * sizeof(Ty)); - } - ///@} - - /// Any operation. - /// - /// \return 1 if any element is set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = Ty, - typename = sycl::detail::enable_if_t::value, T2>> - uint16_t any() { - return __esimd_any(data()); - } - - /// All operation. - /// - /// \return 1 if all elements are set, 0 otherwise. - template < - typename T1 = element_type, typename T2 = Ty, - typename = sycl::detail::enable_if_t::value, T2>> - uint16_t all() { - return __esimd_all(data()); - } - - /// Write a simd-vector into a basic region of a simd object. - template - ESIMD_INLINE void - writeRegion(RTy Region, - const detail::vector_type_t &Val) { - using ElemTy = typename RTy::element_type; - if constexpr (N * sizeof(Ty) == RTy::length * sizeof(ElemTy)) - // update the entire vector - set(detail::bitcast(Val)); - else { - static_assert(!RTy::Is_2D); - // If element type differs, do bitcast conversion first. - auto Base = detail::bitcast(data()); - constexpr int BN = (N * sizeof(Ty)) / sizeof(ElemTy); - // Access the region information. - constexpr int M = RTy::Size_x; - constexpr int Stride = RTy::Stride_x; - uint16_t Offset = Region.M_offset_x * sizeof(ElemTy); - - // Merge and update. - auto Merged = __esimd_wrregion(Base, Val, Offset); - // Convert back to the original element type, if needed. - set(detail::bitcast(Merged)); - } - } - - /// Write a simd-vector into a nested region of a simd object. - template - ESIMD_INLINE void writeRegion( - std::pair Region, - const detail::vector_type_t &Val) { - // parent-region type - using PaTy = typename shape_type::type; - using ElemTy = typename TR::element_type; - using BT = typename PaTy::element_type; - constexpr int BN = PaTy::length; - - if constexpr (PaTy::Size_in_bytes == TR::Size_in_bytes) { - writeRegion(Region.second, detail::bitcast(Val)); - } else { - // Recursively read the base - auto Base = detail::readRegion(data(), Region.second); - // If element type differs, do bitcast conversion first. - auto Base1 = detail::bitcast(Base); - constexpr int BN1 = PaTy::Size_in_bytes / sizeof(ElemTy); - - if constexpr (!TR::Is_2D) { - // Access the region information. - constexpr int M = TR::Size_x; - constexpr int Stride = TR::Stride_x; - uint16_t Offset = Region.first.M_offset_x * sizeof(ElemTy); - - // Merge and update. - Base1 = __esimd_wrregion(Base1, Val, Offset); - } else { - static_assert(std::is_same::value); - // Read columns with non-trivial horizontal stride. - constexpr int M = TR::length; - constexpr int VS = PaTy::Size_x * TR::Stride_y; - constexpr int W = TR::Size_x; - constexpr int HS = TR::Stride_x; - constexpr int ParentWidth = PaTy::Size_x; - - // Compute the byte offset for the starting element. - uint16_t Offset = static_cast( - (Region.first.M_offset_y * PaTy::Size_x + Region.first.M_offset_x) * - sizeof(ElemTy)); - - // Merge and update. - Base1 = __esimd_wrregion( - Base1, Val, Offset); - } - // Convert back to the original element type, if needed. - auto Merged1 = detail::bitcast(Base1); - // recursively write it back to the base - writeRegion(Region.second, Merged1); - } +#define __ESIMD_DEF_SIMD_ARITH_UNARY_OP(ARITH_UNARY_OP) \ + template simd operator ARITH_UNARY_OP() { \ + static_assert(!std::is_unsigned_v, \ + #ARITH_UNARY_OP "doesn't apply to unsigned types"); \ + return simd(ARITH_UNARY_OP(base_type::data())); \ } - /// @name Memory operations - /// TODO NOTE: These APIs do not support cache hint specification yet, as this - /// is WIP. Later addition of hints is not expected to break code using these - /// APIs. - /// - /// @{ - - /// Copy a contiguous block of data from memory into this simd object. - /// The amount of memory copied equals the total size of vector elements in - /// this object. - /// @param addr the memory address to copy from. Must be a pointer to the - /// global address space, otherwise behavior is undefined. - ESIMD_INLINE void copy_from(const Ty *const addr) SYCL_ESIMD_FUNCTION; - - /// Copy a contiguous block of data from memory into this simd object. - /// The amount of memory copied equals the total size of vector elements in - /// this object. - /// Source memory location is represented via a global accessor and offset. - /// @param acc accessor to copy from. - /// @param offset offset to copy from. - template - ESIMD_INLINE - detail::EnableIfAccessor - copy_from(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; - - /// Copy all vector elements of this object into a contiguous block in memory. - /// @param addr the memory address to copy to. Must be a pointer to the - /// global address space, otherwise behavior is undefined. - ESIMD_INLINE void copy_to(Ty *addr) SYCL_ESIMD_FUNCTION; - - /// Copy all vector elements of this object into a contiguous block in memory. - /// Destination memory location is represented via a global accessor and - /// offset. - /// @param acc accessor to copy from. - /// @param offset offset to copy from. - template - ESIMD_INLINE - detail::EnableIfAccessor - copy_to(AccessorT acc, uint32_t offset) SYCL_ESIMD_FUNCTION; - - /// @} // Memory operations -private: - // The underlying data for this vector. - vector_type M_data; - - void set(const vector_type &Val) { -#ifndef __SYCL_DEVICE_ONLY__ - M_data = Val; -#else - __esimd_vstore(&M_data, Val); -#endif - } + __ESIMD_DEF_SIMD_ARITH_UNARY_OP(-) + __ESIMD_DEF_SIMD_ARITH_UNARY_OP(+) +#undef __ESIMD_DEF_SIMD_ARITH_UNARY_OP }; -template -ESIMD_INLINE simd convert(simd val) { - return __builtin_convertvector(val.data(), detail::vector_type_t); +/// Covert from a simd object with element type \c From to a simd object with +/// element type \c To. +template +ESIMD_INLINE simd convert(const simd &val) { + if constexpr (std::is_same_v) + return val; + else + return __builtin_convertvector(val.data(), detail::vector_type_t); } -// ----------- Outlined implementations of esimd class APIs. - -template void simd::copy_from(const T *const Addr) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); +#undef __ESIMD_DEF_RELOP +#undef __ESIMD_DEF_BITWISE_OP - uintptr_t AddrVal = reinterpret_cast(Addr); - *this = - __esimd_flat_block_read_unaligned( - AddrVal); -} - -template -template -ESIMD_INLINE - detail::EnableIfAccessor - simd::copy_from(AccessorT acc, uint32_t offset) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); -#if defined(__SYCL_DEVICE_ONLY__) - auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); - *this = __esimd_block_read(surf_ind, offset); -#else - *this = __esimd_block_read(acc, offset); -#endif // __SYCL_DEVICE_ONLY__ -} - -template void simd::copy_to(T *addr) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); - - uintptr_t AddrVal = reinterpret_cast(addr); - __esimd_flat_block_write(AddrVal, - data()); -} - -template -template -ESIMD_INLINE - detail::EnableIfAccessor - simd::copy_to(AccessorT acc, uint32_t offset) { - constexpr unsigned Sz = sizeof(T) * N; - static_assert(Sz >= detail::OperandSize::OWORD, - "block size must be at least 1 oword"); - static_assert(Sz % detail::OperandSize::OWORD == 0, - "block size must be whole number of owords"); - static_assert(detail::isPowerOf2(Sz / detail::OperandSize::OWORD), - "block must be 1, 2, 4 or 8 owords long"); - static_assert(Sz <= 8 * detail::OperandSize::OWORD, - "block size must be at most 8 owords"); - -#if defined(__SYCL_DEVICE_ONLY__) - auto surf_ind = detail::AccessorPrivateProxy::getNativeImageObj(acc); - __esimd_block_write(surf_ind, offset >> 4, data()); -#else - __esimd_block_write(acc, offset >> 4, data()); -#endif // __SYCL_DEVICE_ONLY__ -} +/// Represents a simd mask. +template using simd_mask = detail::simd_mask_type; } // namespace esimd } // namespace experimental @@ -688,11 +136,12 @@ ESIMD_INLINE } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) -#ifndef __SYCL_DEVICE_ONLY__ template -std::ostream & -operator<<(std::ostream &OS, - const sycl::ext::intel::experimental::esimd::simd &V) { +std::ostream &operator<<(std::ostream &OS, const __SEIEE::simd &V) +#ifdef __SYCL_DEVICE_ONLY__ + ; +#else +{ OS << "{"; for (int I = 0; I < N; I++) { OS << V[I]; @@ -702,5 +151,4 @@ operator<<(std::ostream &OS, OS << "}"; return OS; } - -#endif +#endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp index 51751e3cca65f..a6d18fcde31a4 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/simd_view.hpp @@ -11,6 +11,7 @@ #pragma once #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -25,143 +26,79 @@ namespace esimd { /// /// \ingroup sycl_esimd template -class simd_view : public detail::simd_view_impl> { +class simd_view : public detail::simd_view_impl { + template friend class detail::simd_obj_impl; + template friend class detail::simd_mask_impl; + template friend class simd_view; template friend class simd; - template friend class detail::simd_view_impl; + template friend class detail::simd_view_impl; public: - using BaseClass = - detail::simd_view_impl>; + static_assert(detail::is_simd_obj_impl_derivative_v); + using BaseClass = detail::simd_view_impl; + + // Deduce the corresponding value type from its region type. using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; + using base_type = BaseTy; + template + using get_simd_t = typename BaseClass::template get_simd_t; + + /// The region type of this class. + using region_type = RegionTy; + + /// The element type of this class, which could be different from the element + /// type of the base object type. using element_type = typename ShapeTy::element_type; - /// The simd type if reading this simd_view object. - using value_type = simd; + /// The simd type if reading the object. + using value_type = get_simd_t; -private: + /// The underlying builtin value type + using vector_type = detail::vector_type_t; + +protected: + /// @{ + /// Constructors. simd_view(BaseTy &Base, RegionTy Region) : BaseClass(Base, Region) {} simd_view(BaseTy &&Base, RegionTy Region) : BaseClass(Base, Region) {} + /// @} public: // Default copy and move constructors for simd_view. simd_view(const simd_view &Other) = default; simd_view(simd_view &&Other) = default; - /// @{ - /// Assignment operators. simd_view &operator=(const simd_view &Other) { - *this = Other.read(); - return *this; - } - simd_view &operator=(const value_type &Val) { - this->M_base.writeRegion(this->M_region, Val.data()); + BaseClass::operator=(Other); return *this; } - /// @} - /// Move assignment operator. - simd_view &operator=(simd_view &&Other) { - *this = Other.read(); - return *this; - } + using BaseClass::operator--; + using BaseClass::operator++; + using BaseClass::operator=; +}; -#define DEF_BINOP(BINOP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BINOP(const simd_view &X, \ - const value_type &Y) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = \ - detail::convert(X.read().data()); \ - auto V1 = detail::convert(Y.data()); \ - auto V2 = V0 BINOP V1; \ - return ComputeTy(V2); \ - } \ - ESIMD_INLINE friend auto operator BINOP(const simd_view &X, \ - const element_type &Y) { \ - return X BINOP(value_type) Y; \ - } \ - ESIMD_INLINE friend auto operator BINOP(const element_type &X, \ - const simd_view &Y) { \ - return (value_type)X BINOP Y; \ - } \ - ESIMD_INLINE friend auto operator BINOP(const value_type &X, \ +#define __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(RELOP) \ + /* simd_view RELOP simd_view */ \ + ESIMD_INLINE friend bool operator RELOP(const simd_view &X, \ const simd_view &Y) { \ - using ComputeTy = detail::compute_type_t; \ - auto V0 = detail::convert(X.data()); \ - auto V1 = \ - detail::convert(Y.read().data()); \ - auto V2 = V0 BINOP V1; \ - return ComputeTy(V2); \ - } - - DEF_BINOP(+, +=) - DEF_BINOP(-, -=) - DEF_BINOP(*, *=) - DEF_BINOP(/, /=) - DEF_BINOP(%, %=) - -#undef DEF_BINOP - -#define DEF_BITWISE_OP(BITWISE_OP, OPASSIGN) \ - ESIMD_INLINE friend auto operator BITWISE_OP(const simd_view &X, \ - const value_type &Y) { \ - static_assert(std::is_integral(), "not integral type"); \ - auto V2 = X.read().data() BITWISE_OP Y.data(); \ - return simd(V2); \ - } \ - ESIMD_INLINE friend auto operator BITWISE_OP(const simd_view &X, \ - const element_type &Y) { \ - return X BITWISE_OP(value_type) Y; \ - } \ - ESIMD_INLINE friend auto operator BITWISE_OP(const value_type &X, \ - const simd_view &Y) { \ - static_assert(std::is_integral(), "not integral type"); \ - auto V2 = X.data() BITWISE_OP Y.read().data(); \ - return simd(V2); \ - } - - DEF_BITWISE_OP(&, &=) - DEF_BITWISE_OP(|, |=) - DEF_BITWISE_OP(^, ^=) - DEF_BITWISE_OP(>>, >>=) - DEF_BITWISE_OP(<<, <<=) - -#undef DEF_BITWISE_OP - -#define DEF_RELOP(RELOP) \ - ESIMD_INLINE friend simd operator RELOP( \ - const simd_view &X, const value_type &Y) { \ - auto R = X.read().data() RELOP Y.data(); \ - mask_type_t M(1); \ - return M & detail::convert>(R); \ - } \ - ESIMD_INLINE friend simd operator RELOP( \ - const value_type &X, const simd_view &Y) { \ - auto R = X.data() RELOP Y.read().data(); \ - mask_type_t M(1); \ - return M & detail::convert>(R); \ + return (element_type)X RELOP(element_type) Y; \ } \ - ESIMD_INLINE friend simd operator RELOP( \ - const simd_view &X, const element_type &Y) { \ - return X RELOP(value_type) Y; \ + \ + /* simd_view RELOP SCALAR */ \ + template >> \ + ESIMD_INLINE friend bool operator RELOP(const simd_view &X, T1 Y) { \ + return (element_type)X RELOP Y; \ } \ - ESIMD_INLINE friend simd operator RELOP( \ - const simd_view &X, const simd_view &Y) { \ - return (X RELOP Y.read()); \ + \ + /* SCALAR RELOP simd_view */ \ + template >> \ + ESIMD_INLINE friend bool operator RELOP(T1 X, const simd_view &Y) { \ + return X RELOP(element_type) Y; \ } - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP -}; - /// This is a specialization of simd_view class with a single element. /// Objects of such a class are created in the following situation: /// simd v = 1; @@ -174,24 +111,26 @@ class simd_view : public detail::simd_view_impl v[1] && v[2] < 42; /// /// \ingroup sycl_esimd -template -class simd_view> - : public detail::simd_view_impl< - BaseTy, region1d_scalar_t, - simd_view>> { - template friend class simd; - template friend class detail::simd_view_impl; +template +class simd_view> + : public detail::simd_view_impl> { + template friend class detail::simd_obj_impl; + template friend class detail::simd_view_impl; public: - using RegionTy = region1d_scalar_t; - using BaseClass = - detail::simd_view_impl>; + using RegionTy = region1d_scalar_t; + using BaseClass = detail::simd_view_impl; using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; static_assert(1 == length, "length of this view is not equal to 1"); /// The element type of this class, which could be different from the element /// type of the base object type. - using element_type = T; + using element_type = typename ShapeTy::element_type; + using base_type = BaseTy; + template + using get_simd_t = typename BaseClass::template get_simd_t; + /// The simd type if reading the object. + using value_type = get_simd_t; private: simd_view(BaseTy &Base, RegionTy Region) : BaseClass(Base, Region) {} @@ -203,28 +142,16 @@ class simd_view> return v[0]; } + using BaseClass::operator--; + using BaseClass::operator++; using BaseClass::operator=; -#define DEF_RELOP(RELOP) \ - ESIMD_INLINE friend bool operator RELOP(const simd_view &X, \ - const simd_view &Y) { \ - return (element_type)X RELOP(element_type) Y; \ - } \ - template ::value && \ - detail::is_vectorizable_v::value>> \ - ESIMD_INLINE friend bool operator RELOP(const simd_view &X, T1 Y) { \ - return (element_type)X RELOP Y; \ - } - - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(==) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(!=) }; // TODO: remove code duplication in two class specializations for a simd_view @@ -235,23 +162,16 @@ class simd_view> /// simd v = 1; /// auto v1 = v.select<2, 1>(0); /// auto v2 = v1[0]; // simd_view of a nested region for a single element -template -class simd_view, NestedRegion>> +template +class simd_view, NestedRegion>> : public detail::simd_view_impl< - BaseTy, - std::pair, NestedRegion>, - simd_view, - NestedRegion>>> { + BaseTy, std::pair, NestedRegion>> { template friend class simd; - template friend class detail::simd_view_impl; + template friend class detail::simd_view_impl; public: - using RegionTy = - std::pair, NestedRegion>; - using BaseClass = - detail::simd_view_impl>; + using RegionTy = std::pair, NestedRegion>; + using BaseClass = detail::simd_view_impl; using ShapeTy = typename shape_type::type; static constexpr int length = ShapeTy::Size_x * ShapeTy::Size_y; static_assert(1 == length, "length of this view is not equal to 1"); @@ -264,35 +184,23 @@ class simd_view::value && \ - detail::is_vectorizable_v::value>> \ - ESIMD_INLINE friend bool operator RELOP(const simd_view &X, T1 Y) { \ - return (element_type)X RELOP Y; \ - } - - DEF_RELOP(>) - DEF_RELOP(>=) - DEF_RELOP(<) - DEF_RELOP(<=) - DEF_RELOP(==) - DEF_RELOP(!=) - -#undef DEF_RELOP + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(>=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(<=) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(==) + __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP(!=) }; +#undef __ESIMD_DEF_SCALAR_SIMD_VIEW_RELOP + } // namespace esimd } // namespace experimental } // namespace intel diff --git a/sycl/test/esimd/esimd-util-compiler-eval.cpp b/sycl/test/esimd/esimd-util-compiler-eval.cpp index ed03ea3ce81d1..52cac182d32b3 100644 --- a/sycl/test/esimd/esimd-util-compiler-eval.cpp +++ b/sycl/test/esimd/esimd-util-compiler-eval.cpp @@ -19,11 +19,8 @@ static_assert(log2<1024 * 1024>() == 20, ""); using BaseTy = simd; using RegionTy = region1d_t; -using RegionTy1 = region1d_scalar_t; -static_assert( - !is_simd_view_v< - simd_view_impl>>::value, - ""); -static_assert(is_simd_view_v>::value, ""); -static_assert(is_simd_view_v>::value, ""); -static_assert(!is_simd_view_v::value, ""); +using RegionTy1 = region1d_scalar_t; +static_assert(!is_simd_view_type_v>, ""); +static_assert(is_simd_view_type_v>, ""); +static_assert(is_simd_view_type_v>, ""); +static_assert(!is_simd_view_type_v, ""); diff --git a/sycl/test/esimd/esimd_math.cpp b/sycl/test/esimd/esimd_math.cpp index 17420b678fd2c..e5b0b5e7d2d3e 100644 --- a/sycl/test/esimd/esimd_math.cpp +++ b/sycl/test/esimd/esimd_math.cpp @@ -8,12 +8,12 @@ using namespace sycl::ext::intel::experimental::esimd; bool test_esimd_mask() __attribute__((sycl_device)) { - simd a(0); + simd_mask<16> a(0); a.select<4, 1>(4) = 1; a.select<4, 1>(12) = 1; unsigned int b = esimd_pack_mask(a); - simd c = esimd_unpack_mask<16>(b); + simd_mask<16> c = esimd_unpack_mask<16>(b); unsigned int d = esimd_pack_mask(c); diff --git a/sycl/test/esimd/intrins_trans.cpp b/sycl/test/esimd/intrins_trans.cpp index 23726115e189e..989ae37b6818b 100644 --- a/sycl/test/esimd/intrins_trans.cpp +++ b/sycl/test/esimd/intrins_trans.cpp @@ -42,7 +42,7 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { simd v1(0, x + z); simd offsets(0, y); simd v_addr(reinterpret_cast(ptr)); - simd pred; + simd_mask pred; v_addr += offsets; __esimd_flat_atomic0( @@ -50,10 +50,10 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.inc.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) __esimd_flat_atomic1( - v_addr.data(), v1, pred.data()); + v_addr.data(), v1.data(), pred.data()); // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.add.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) __esimd_flat_atomic2( - v_addr.data(), v1, v1, pred.data()); + v_addr.data(), v1.data(), v1.data(), pred.data()); // CHECK: %{{[0-9a-zA-Z_.]+}} = call <32 x i32> @llvm.genx.svm.atomic.cmpxchg.v32i32.v32i1.v32i64(<32 x i1> %{{[0-9a-zA-Z_.]+}}, <32 x i64> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> %{{[0-9a-zA-Z_.]+}}, <32 x i32> undef) uintptr_t addr = reinterpret_cast(ptr); diff --git a/sycl/test/esimd/lane_id.cpp b/sycl/test/esimd/lane_id.cpp index df6031ac1810f..0f25e7cc4c6e6 100644 --- a/sycl/test/esimd/lane_id.cpp +++ b/sycl/test/esimd/lane_id.cpp @@ -26,7 +26,7 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo(int x) { SIMT_BEGIN(16, lane) //CHECK: define internal spir_func void @_ZZ3fooiENKUlvE_clEv({{.*}}) {{.*}} #[[ATTR:[0-9]+]] //CHECK: %{{[0-9a-zA-Z_.]+}} = tail call spir_func i32 @_Z15__esimd_lane_idv() - v.select<1, 0>(lane) = x++; + v.select<1, 1>(lane) = x++; SIMT_END return v; } diff --git a/sycl/test/esimd/operators.cpp b/sycl/test/esimd/operators.cpp new file mode 100644 index 0000000000000..6e9334cba2175 --- /dev/null +++ b/sycl/test/esimd/operators.cpp @@ -0,0 +1,514 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +using namespace sycl::ext::intel::experimental::esimd; + +// dummy use of 'v' - storing it to memory +#define USE_v *(decltype(v) *)(++out) = v + +// --- bitwise +template +[[intel::sycl_explicit_simd]] auto +bitwise_op_test_impl(const simd &x, simd &x1, + const simd &y, simd &y1, + const simd_mask<8> &m, simd_mask<8> &m1, + simd *out) { + int a = 1; + + // simd ^ simd + { + auto k = x1 ^= y; + auto v = k ^ y; + USE_v; + } + { + auto v = x ^ y; + USE_v; + } + // simd ^ SCALAR + { + auto k = x1 ^= 5; + auto v = k ^ y; + USE_v; + } + { + auto v = x ^ (T2)5; + USE_v; + } + // SCALAR ^ simd + { + auto v = (T1)5 ^ y; + USE_v; + } + + // mask ^ mask + { + auto k = m1 ^= m; + auto v = k ^ m; + USE_v; + } + { + auto v = m ^ m1; + USE_v; + } + // mask ^ SCALAR + { + auto k = m1 ^= a; + auto v = m ^ k; + USE_v; + } + { + auto v = m ^ 5; + USE_v; + } + // SCALAR ^ mask + { + auto v = 5 ^ m; + USE_v; + } + + // simd_view ^ simd_view + { + simd k = x1.template select<8, 1>() ^= y1.template select<8, 1>(); + auto v = k ^ y; + USE_v; + } + { + simd k = x1.template select<8, 1>().template select<8, 1>() ^= + y1.template select<8, 1>().template select<8, 1>(); + auto v = k ^ y; + USE_v; + } + { + auto v = x1.template select<8, 1>() ^ y1.template select<8, 1>(); + USE_v; + } + { + auto v = x1.template select<8, 1>().template select<8, 1>() ^ + y1.template select<8, 1>().template select<8, 1>(); + USE_v; + } + // simd ^ simd_view + { + auto k = x1 ^= y1.template select<8, 1>(); + auto v = k ^ y; + USE_v; + } + { + auto v = x ^ y1.template select<8, 1>(); + USE_v; + } + // simd_view ^ simd + { + simd k = x1.template select<8, 1>() ^= y; + auto v = k ^ y; + USE_v; + } + { + auto v = x1.template select<8, 1>() ^ y; + USE_v; + } + + // simd_view ^ simd_view + { + simd_mask<8> k = m1.select<8, 1>() ^= m1.select<8, 1>(); + auto v = k ^ m; + USE_v; + } + { + simd_mask<8> k = m1.select<8, 1>().select<8, 1>() ^= + m1.select<8, 1>().select<8, 1>(); + auto v = k ^ m; + USE_v; + } + { + auto v = m1.select<8, 1>() ^ m1.select<8, 1>(); + USE_v; + } + { + auto v = + m1.select<8, 1>().select<8, 1>() ^ m1.select<8, 1>().select<8, 1>(); + USE_v; + } + // simd_mask ^ simd_view + { + auto k = m1 ^= m1.select<8, 1>(); + auto v = k ^ m; + USE_v; + } + { + auto v = m ^ m1.select<8, 1>(); + USE_v; + } + { + auto v = m ^ m1.select<8, 1>().select<8, 1>(); + USE_v; + } + // simd_view ^ simd_mask + { + simd_mask<8> k = m1.select<8, 1>() ^= m; + auto v = k ^ m; + USE_v; + } + { + auto v = m1.select<8, 1>() ^ m; + USE_v; + } + { + auto v = m1.select<8, 1>().select<8, 1>() ^ m; + USE_v; + } + + // simd_view ^ SCALAR + { + simd k = x1.template select<8, 1>() ^= (T2)5; + auto v = k ^ y; + USE_v; + } + { + auto v = x1.template select<8, 1>() ^ (T2)5; + USE_v; + } + { + auto v = x1.template select<8, 1>().template select<8, 1>() ^ (T2)5; + USE_v; + } + // SCALAR ^ simd_view + { + auto v = (T1)5 ^ y1.template select<8, 1>(); + USE_v; + } + { + auto v = (T1)5 ^ y1.template select<8, 1>().template select<8, 1>(); + USE_v; + } + + // simd_view ^ SCALAR + { + simd_mask<8> k = m1.template select<8, 1>() ^= a; + auto v = k ^ m; + USE_v; + } + { + auto v = m1.template select<8, 1>() ^ a; + USE_v; + } + { + auto v = m1.template select<8, 1>().template select<8, 1>() ^ a; + USE_v; + } + // SCALAR ^ simd_view + { + auto v = a ^ y1.template select<8, 1>(); + USE_v; + } + { + auto v = a ^ y1.template select<8, 1>().template select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void bitwise_op_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + const simd_mask<8> m(1); + simd_mask<8> m1(0); + + bitwise_op_test_impl(x, x1, y, y1, m, m1, out); +} + +[[intel::sycl_explicit_simd]] void bitwise_op_tests(simd *out) { + bitwise_op_test(out); + bitwise_op_test(out); + bitwise_op_test(out); +} + +// --- arithmetic +template +[[intel::sycl_explicit_simd]] auto +arith_bin_op_test_impl(const simd &x, simd &x1, + const simd &y, simd &y1, + simd *out) { + // simd * simd + { + auto k = x1 *= y; + auto v = x * k; + USE_v; + } + { + auto v = x * y; + USE_v; + } + // simd * SCALAR + { + auto k = x1 *= (T2)5; + auto v = x * k; + USE_v; + } + { + auto v = x * (T2)5; + USE_v; + } + // SCALAR * simd + { + auto v = (T1)5 * y; + USE_v; + } + + // simd_view * simd_view + { + simd k = x1.template select<8, 1>() *= y1.template select<8, 1>(); + auto v = x1.template select<8, 1>() * k; + USE_v; + } + { + auto v = x1.template select<8, 1>() * y1.template select<8, 1>(); + USE_v; + } + // simd * simd_view + { + auto k = x1 *= y1.template select<8, 1>(); + auto v = x * k; + USE_v; + } + { + auto v = x * y1.template select<8, 1>(); + USE_v; + } + // simd_view * simd + { + simd k = x1.template select<8, 1>() *= y; + auto v = k * y; + USE_v; + } + { + auto v = x1.template select<8, 1>() * y; + USE_v; + } + + // simd_view * SCALAR + { + simd k = x1.template select<8, 1>() *= (T2)5; + auto v = k * (T2)5; + USE_v; + } + { + auto v = x1.template select<8, 1>() * (T2)5; + USE_v; + } + // SCALAR * simd_view + { + auto v = (T1)5 * y1.template select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void arith_bin_op_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + + arith_bin_op_test_impl(x, x1, y, y1, out); +} + +[[intel::sycl_explicit_simd]] void +arith_bin_op_tests(simd *out) { + arith_bin_op_test(out); + arith_bin_op_test(out); + arith_bin_op_test(out); + arith_bin_op_test(out); + arith_bin_op_test(out); +} + +// --- equality comparison + +template +[[intel::sycl_explicit_simd]] auto +equ_cmp_test_impl(const simd &x, simd &x1, const simd &y, + simd &y1, const simd_mask<8> &m, simd_mask<8> &m1, + simd *out) { + // simd == simd + { + auto v = x == y; + USE_v; + } + // simd == SCALAR + { + auto v = x == (T2)5; + USE_v; + } + // SCALAR == simd + { + auto v = (T1)5 == y; + USE_v; + } + + // mask == mask + { + auto v = m == m1; + USE_v; + } + // mask == SCALAR + { + auto v = m == 5; + USE_v; + } + // SCALAR == mask + { + auto v = 5 == m; + USE_v; + } + + // simd_view == simd_view + { + auto v = x1.template select<8, 1>() == y1.template select<8, 1>(); + USE_v; + } + // simd == simd_view + { + auto v = x == y1.template select<8, 1>(); + USE_v; + } + // simd_view == simd + { + auto v = x1.template select<8, 1>() == y; + USE_v; + } + + // simd_view == simd_view + { + auto v = m1.select<8, 1>() == m1.select<8, 1>(); + USE_v; + } + // simd_mask == simd_view + { + auto v = m == m1.select<8, 1>(); + USE_v; + } + // simd_view == simd_mask + { + auto v = m1.select<8, 1>() == m; + USE_v; + } + + // simd_view == SCALAR + { + auto v = x1.template select<8, 1>() == (T2)5; + USE_v; + } + // SCALAR == simd_view + { + auto v = (T1)5 == y1.template select<8, 1>(); + USE_v; + } + + // simd_view == SCALAR + int a = 1; + { + auto v = m1.select<8, 1>() == a; + USE_v; + } + // SCALAR == simd_view + { + auto v = a == m1.select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void equ_cmp_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + const simd_mask<8> m(1); + simd_mask<8> m1(0); + + equ_cmp_test_impl(x, x1, y, y1, m, m1, out); +} + +[[intel::sycl_explicit_simd]] void equ_cmp_tests(simd *out) { + equ_cmp_test(out); + equ_cmp_test(out); + equ_cmp_test(out); + equ_cmp_test(out); + equ_cmp_test(out); +} + +// --- comparison + +template +[[intel::sycl_explicit_simd]] auto +lt_cmp_test_impl(const simd &x, simd &x1, const simd &y, + simd &y1, const simd_mask<8> &m, simd_mask<8> &m1, + simd *out) { + // simd < simd + { + auto v = x < y; + USE_v; + } + // simd < SCALAR + { + auto v = x < (T2)5; + USE_v; + } + // SCALAR < simd + { + auto v = (T1)5 == y; + USE_v; + } + + // simd_view < simd_view + { + auto v = x1.template select<8, 1>() < y1.template select<8, 1>(); + USE_v; + } + // simd < simd_view + { + auto v = x < y1.template select<8, 1>(); + USE_v; + } + // simd_view < simd + { + auto v = x1.template select<8, 1>() < y; + USE_v; + } + + // simd_view < SCALAR + { + auto v = x1.template select<8, 1>() < (T2)5; + USE_v; + } + // SCALAR == simd_view + { + auto v = (T1)5 < y1.template select<8, 1>(); + USE_v; + } +} + +template +[[intel::sycl_explicit_simd]] void lt_cmp_test(simd *out) { + simd x((T1)10); + simd x1((T1)11); + const simd y((T2)17); + simd y1((T2)19); + const simd_mask<8> m(1); + simd_mask<8> m1(0); + + lt_cmp_test_impl(x, x1, y, y1, m, m1, out); +} + +[[intel::sycl_explicit_simd]] void lt_cmp_tests(simd *out) { + lt_cmp_test(out); + lt_cmp_test(out); + lt_cmp_test(out); + lt_cmp_test(out); + lt_cmp_test(out); +} diff --git a/sycl/test/esimd/regression/simd_wrapper.cpp b/sycl/test/esimd/regression/simd_wrapper.cpp index 0655e220a6e73..dee22b23fa5b3 100644 --- a/sycl/test/esimd/regression/simd_wrapper.cpp +++ b/sycl/test/esimd/regression/simd_wrapper.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// RUN: %clangxx -fsycl -fsyntax-only -fsycl-device-only -Xclang -verify %s #include #include diff --git a/sycl/test/esimd/simd.cpp b/sycl/test/esimd/simd.cpp index 5b19035211d15..a38b732751312 100644 --- a/sycl/test/esimd/simd.cpp +++ b/sycl/test/esimd/simd.cpp @@ -43,6 +43,7 @@ void test_conversion() SYCL_ESIMD_FUNCTION { simd f = v; simd c = f; simd c1 = f.select<16, 1>(0); + c.select<32, 1>(0) = f; f = v + static_cast>(c); } @@ -64,16 +65,24 @@ bool test_simd_format() SYCL_ESIMD_FUNCTION { (decltype(ref3)::getSizeX() == 4) && (decltype(ref3)::getSizeY() == 8); } -bool test_simd_select() SYCL_ESIMD_FUNCTION { - simd v(0, 1); - auto ref0 = v.select<4, 2>(1); // r{1, 3, 5, 7} - auto ref1 = v.bit_cast_view(); // 0,1,2,3; - // 4,5,6,7; - // 8,9,10,11; - // 12,13,14,15 - auto ref2 = ref1.select<2, 1, 2, 2>(0, 1); - return ref0[0] == 1 && decltype(ref2)::getSizeX() == 2 && - decltype(ref2)::getStrideY() == 1; +bool test_simd_select(int a) SYCL_ESIMD_FUNCTION { + { + simd f = a; + simd c1 = 2; + c1.select<16, 1>(0) = f.select<16, 1>(0); + c1.select<16, 1>(0).select<16, 1>(0) = f.select<16, 1>(0).select<16, 1>(0); + } + { + simd v(0, 1); + auto ref0 = v.select<4, 2>(1); // r{1, 3, 5, 7} + auto ref1 = v.bit_cast_view(); // 0,1,2,3; + // 4,5,6,7; + // 8,9,10,11; + // 12,13,14,15 + auto ref2 = ref1.select<2, 1, 2, 2>(0, 1); + return ref0[0] == 1 && decltype(ref2)::getSizeX() == 2 && + decltype(ref2)::getStrideY() == 1; + } } bool test_2d_offset() SYCL_ESIMD_FUNCTION { @@ -111,7 +120,6 @@ bool test_simd_unary_ops() SYCL_ESIMD_FUNCTION { v0 <<= v1; v1 = -v0; v0 = ~v1; - v1 = !v0; return v1[0] == 1; } @@ -259,7 +267,7 @@ bool test_simd_iselect() SYCL_ESIMD_FUNCTION { simd a(0, 2); auto data = v.iselect(a); data += 16; - v.iupdate(a, data, 1); + v.iupdate(a, data, simd_mask<8>(1)); auto ref = v.select<8, 2>(0); return ref[0] == 16 && ref[14] == 32; } diff --git a/sycl/test/esimd/simd_mask.cpp b/sycl/test/esimd/simd_mask.cpp new file mode 100644 index 0000000000000..a83c1f53545f9 --- /dev/null +++ b/sycl/test/esimd/simd_mask.cpp @@ -0,0 +1,78 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note %s +// RUN: %clangxx -fsycl -fsyntax-only %s + +// This test checks that both host and device compilers can +// successfully compile simd_mask APIs. + +#include +#include +#include +#include + +using namespace sycl::ext::intel::experimental::esimd; +using namespace cl::sycl; + +#define DEFINE_BIN_OP_TEST(op, name) \ + template \ + SYCL_EXTERNAL SYCL_ESIMD_FUNCTION simd_mask test_impl_##name( \ + simd_mask &m1, simd_mask &m2) { \ + return m1 op m2; \ + } \ + \ + simd_mask<1> test_impl_1_##name(simd_mask<1> &m1, simd_mask<1> &m2) { \ + return test_impl_##name(m1, m2); \ + } \ + \ + simd_mask<17> test_impl_17_##name(simd_mask<17> &m1, simd_mask<17> &m2) { \ + return test_impl_##name(m1, m2); \ + } \ + \ + simd_mask<32> test_impl_32_##name(simd_mask<32> &m1, simd_mask<32> &m2) { \ + return test_impl_##name(m1, m2); \ + } + +DEFINE_BIN_OP_TEST(&&, and) +DEFINE_BIN_OP_TEST(||, or) +DEFINE_BIN_OP_TEST(&, bit_and) +DEFINE_BIN_OP_TEST(|, bit_or) +DEFINE_BIN_OP_TEST(^, xor) +DEFINE_BIN_OP_TEST(==, eq) +DEFINE_BIN_OP_TEST(!=, ne) +DEFINE_BIN_OP_TEST(&=, bit_and_eq) +DEFINE_BIN_OP_TEST(|=, bit_or_eq) +DEFINE_BIN_OP_TEST(^=, xor_eq) + +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION simd_mask<8> misc_tests(bool val) { + simd_mask<8> m1(val); // broadcast constructor + simd_mask<8> m2; // default constructor + simd_mask<8> m3(m1[4]); // operator[] + simd_mask<8> m4 = !m3; // operator! + static_assert(m4.length == 8, "size() failed"); + simd ch1(1); + simd ch2(2); + simd_mask<8> m5 = ch1 > ch2; + m1[3] ^= 1; // binop on writable single-element view + ch1.merge(ch2, m1.select<8, 1>(0)); // simd_view used as mask + + return m5; +} + +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void compat_test(float *ptr) { + simd pred(1); + simd offsets; + simd pred1(1); + auto pred2 = pred1.bit_cast_view(); + + // expected-warning@+1 {{deprecated}} + auto x1 = gather(ptr, offsets, pred); + // expected-warning@+1 {{deprecated}} + auto x11 = gather(ptr, offsets, pred2); + // expected-warning@+1 {{deprecated}} + auto x2 = gather(ptr, offsets, simd{}); + simd_mask<16> m1(0); + // expected-warning@+1 {{deprecated}} + m1 = pred; + simd_mask<16> m2(0); + // expected-warning@+1 {{deprecated}} + m2 = std::move(pred); +} diff --git a/sycl/test/esimd/simd_merge.cpp b/sycl/test/esimd/simd_merge.cpp index 5fed799cf90c8..bd52b914df74c 100644 --- a/sycl/test/esimd/simd_merge.cpp +++ b/sycl/test/esimd/simd_merge.cpp @@ -10,7 +10,7 @@ using namespace sycl::ext::intel::experimental::esimd; bool test_simd_merge1() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; v0.merge(v1, mask); return v0[0] == 2 && v0[4] == 2 && v0[8] == 2 && v0[12] == 2; @@ -20,7 +20,7 @@ bool test_simd_merge2() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; simd v2 = 3; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; v0.merge(v1, v2, (v1 < v2) & mask); return v0[0] == 2 && v0[4] == 2 && v0[8] == 2 && v0[12] == 2 && v0[3] == 3 && @@ -30,7 +30,7 @@ bool test_simd_merge2() __attribute__((sycl_device)) { bool test_simd_merge2d1() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; auto v0_2d = v0.bit_cast_view(); v0_2d.merge(v1, mask); @@ -41,7 +41,7 @@ bool test_simd_merge2d2() __attribute__((sycl_device)) { simd v0 = 1; simd v1 = 2; simd v2 = 3; - simd mask = 0; + simd_mask<16> mask = 0; mask.select<4, 4>(0) = 1; auto v0_2d = v0.bit_cast_view(); v0_2d.merge(v1, v2, mask); diff --git a/sycl/test/esimd/simd_replicate_deprecated.cpp b/sycl/test/esimd/simd_replicate_deprecated.cpp index 52adc19670b16..07d6d05b54b30 100644 --- a/sycl/test/esimd/simd_replicate_deprecated.cpp +++ b/sycl/test/esimd/simd_replicate_deprecated.cpp @@ -9,8 +9,8 @@ using namespace sycl::ext::intel::experimental::esimd; bool test_replicate1() { simd v0(0, 1); // expected-warning@+3 2 {{deprecated}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{has been explicitly marked deprecated here}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{has been explicitly marked deprecated here}} auto v0_rep = v0.replicate<4, 2>(2); return v0[2] == v0_rep[2] && v0[3] == v0_rep[5]; @@ -19,8 +19,8 @@ bool test_replicate1() { bool test_replicate2() { simd v0(0, 1); // expected-warning@+3 2 {{deprecated}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{has been explicitly marked deprecated here}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{has been explicitly marked deprecated here}} auto v0_rep = v0.replicate<2, 4, 2>(1); return v0_rep[0] == v0[1] && v0_rep[1] == v0[2] && v0_rep[2] == v0[5]; @@ -29,8 +29,8 @@ bool test_replicate2() { bool test_replicate3() { simd v0(0, 1); // expected-warning@+3 2 {{deprecated}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{}} - // expected-note@sycl/ext/intel/experimental/esimd/simd.hpp:* {{has been explicitly marked deprecated here}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{}} + // expected-note@sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:* {{has been explicitly marked deprecated here}} auto v0_rep = v0.replicate<2, 4, 2, 2>(1); return v0_rep[0] == v0[1] && v0_rep[1] == v0[3] && v0_rep[2] == v0[5]; diff --git a/sycl/test/esimd/simd_subscript.cpp b/sycl/test/esimd/simd_subscript.cpp index 5e1ae1b1f56fa..bf41f2d8b895b 100644 --- a/sycl/test/esimd/simd_subscript.cpp +++ b/sycl/test/esimd/simd_subscript.cpp @@ -65,7 +65,7 @@ void test_simd_writable_subscript() SYCL_ESIMD_FUNCTION { v[1] = 0; // returns simd_view // CHECK: simd_subscript.cpp:69{{.*}}warning: {{.*}} deprecated - // CHECK: sycl/ext/intel/experimental/esimd/simd.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here + // CHECK: sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here v(1) = 0; } @@ -76,7 +76,7 @@ void test_simd_const_subscript() SYCL_ESIMD_FUNCTION { cv[1] = 0; // CHECK: simd_subscript.cpp:80{{.*}}warning: {{.*}} deprecated - // CHECK: sycl/ext/intel/experimental/esimd/simd.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here + // CHECK: sycl/ext/intel/experimental/esimd/detail/simd_obj_impl.hpp:{{.*}} note: {{.*}} has been explicitly marked deprecated here int val3 = cv(0); } diff --git a/sycl/test/esimd/simd_view.cpp b/sycl/test/esimd/simd_view.cpp index ffdb3b6f4f791..d7cbb2ed2c35a 100644 --- a/sycl/test/esimd/simd_view.cpp +++ b/sycl/test/esimd/simd_view.cpp @@ -6,7 +6,7 @@ using namespace sycl::ext::intel::experimental::esimd; -SYCL_ESIMD_FUNCTION bool test_simd_view_bin_ops() { +SYCL_ESIMD_FUNCTION auto test_simd_view_bin_ops() { simd v0 = 1; simd v1 = 2; auto ref0 = v0.select<8, 2>(0); @@ -20,7 +20,44 @@ SYCL_ESIMD_FUNCTION bool test_simd_view_bin_ops() { ref0 *= 2; ref0 /= ref1; ref0 /= 2; - return v0[0] == 1; + if (v0[0] == 1) + return ref0 + (short)3; + else + return ref0 + ref1; +} + +SYCL_ESIMD_FUNCTION auto test_simd_view_bitwise_ops() { + simd v0 = 1; + simd v1 = 2; + auto ref0 = v0.select<8, 2>(0); + auto ref1 = v1.select<8, 2>(0); + simd v2 = (ref0 | ref1) & (ref0 | 3); + ref0 |= 3; + ref0 |= ref1; + simd v3 = (ref0 ^ ref1) & (ref0 ^ 3); + ref0 ^= 3; + ref0 ^= ref1; + simd v4 = (ref0 & ref1) | (ref0 & 3); + ref0 &= 3; + ref0 &= ref1; + return ref0; +} + +SYCL_ESIMD_FUNCTION auto test_simd_mask_view_bitwise_ops() { + simd_mask<16> v0 = 1; + simd_mask<16> v1 = 2; + auto ref0 = v0.select<8, 2>(0); + auto ref1 = v1.select<8, 2>(0); + simd_mask<8> v2 = (ref0 | ref1) & (ref0 | 3); + ref0 |= 3; + ref0 |= ref1; + simd_mask<8> v3 = (ref0 ^ ref1) & (ref0 ^ 3); + ref0 ^= 3; + ref0 ^= ref1; + simd_mask<8> v4 = (ref0 & ref1) | (ref0 & 3); + ref0 &= 3; + ref0 &= ref1; + return ref0; } SYCL_ESIMD_FUNCTION bool test_simd_view_unary_ops() { @@ -31,7 +68,7 @@ SYCL_ESIMD_FUNCTION bool test_simd_view_unary_ops() { ref0 <<= ref1; ref1 = -ref0; ref0 = ~ref1; - auto mask = !ref0; + auto mask = !(ref0 < ref1); return v1[0] == 1; } @@ -54,10 +91,10 @@ SYCL_ESIMD_FUNCTION bool test_simd_view_assign3() { simd v1 = 1; auto mask = (v0.select<16, 1>(0) > v1.select<16, 1>(0)); auto mask2 = (v0 > v1); - simd s = 0; + simd_mask<64> s = 0; auto g4 = s.bit_cast_view(); - simd val = (g4.row(2) & mask); - simd val1 = + simd_mask<16> val = (g4.row(2) & mask); + simd_mask<16> val1 = (g4.row(2) & mask2.bit_cast_view().row(0)); return val[0] == 0 && val1[0] == 0; } @@ -123,21 +160,21 @@ void test_simd_view_impl_api_ret_types() SYCL_ESIMD_FUNCTION { simd x = 0; auto v1 = x.select<2, 1>(0); // simd_view, region1d_t> - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v2 = v1.select<1, 1>( 0); // simd_view, std::pair, region_base>> - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v2_int = v2.bit_cast_view(); - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v2_int_2D = v2.bit_cast_view(); - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); auto v3 = x.select<2, 1>(2); auto &v4 = (v1 += v3); - static_assert(detail::is_simd_view_v::value, ""); - static_assert(detail::is_simd_view_v::value, ""); + static_assert(detail::is_simd_view_type_v, ""); + static_assert(detail::is_simd_view_type_v, ""); } void test_simd_view_subscript() SYCL_ESIMD_FUNCTION { @@ -169,9 +206,12 @@ void test_simd_view_writeable_subscript() SYCL_ESIMD_FUNCTION { void test_simd_view_binop_with_conv_to_scalar() SYCL_ESIMD_FUNCTION { simd s = 0; auto g = s.bit_cast_view(); - auto x = g.row(1) - (g.row(1))[0]; // binary op - auto y = g.row(1) & (g.row(1))[0]; // bitwise op - auto z = g.row(1) < (g.row(1))[0]; // relational op + auto x1 = g.row(1) - (g.row(1))[0]; // binary op + auto x2 = (g.row(1))[0] - g.row(1); // binary op + auto y1 = g.row(1) & (g.row(1))[0]; // bitwise op + auto y2 = (g.row(1))[0] & g.row(1); // bitwise op + auto z1 = g.row(1) < (g.row(1))[0]; // relational op + auto z2 = (g.row(1))[0] < g.row(1); // relational op } // This code is OK. The result of bit_cast_view should be mapped @@ -201,3 +241,65 @@ void test_simd_view_len1_binop() SYCL_ESIMD_FUNCTION { auto v2 = s.select<2, 1>(0); auto x = v1 * v2; } + +void test_simd_view_assign_op() SYCL_ESIMD_FUNCTION { + // multiple elements + { +#define N 4 + // simd - assign views of different element type + simd v1 = 0; + simd v2 = 0; + // - region is a region type (top-level region) + v1.select(0) = v2.select(0); + v2.select(0) = v1.select(0); + // - region is a std::pair (nested region) + v1.select<8, 2>(0).select(1) = v2.select<8, 2>(0).select(1); + v2.select<8, 2>(0).select(1) = v1.select<8, 2>(0).select(1); + // - first region is top-level, second - nested + v1.select<4, 2>(0) = v2.select<8, 2>(0).select<4, 1>(1); + // - first region is nested, second - top-level + v2.select<8, 2>(0).select<4, 1>(1) = v1.select<4, 2>(0); + + // simd_mask + simd_mask<32> m1 = 0; + simd_mask<16> m2 = 0; + // - region is a region type (top-level region) + m1.select<4, 2>(0) = m2.select<4, 2>(0); + m2.select<4, 2>(0) = m1.select<4, 2>(0); + // - region is a std::pair (nested region) + m1.select<8, 2>(0).select(1) = m2.select<8, 2>(0).select(1); + m2.select<8, 2>(0).select(1) = m1.select<8, 2>(0).select(1); + // - first region is top-level, second - nested + m1.select<4, 2>(0) = m2.select<8, 2>(0).select<4, 1>(1); + // - first region is nested, second - top-level + m2.select<8, 2>(0).select<4, 1>(1) = m1.select<4, 2>(0); +#undef N + } + // single element + { +#define N 1 + // simd - assign views of different element type + simd v1 = 0; + simd v2 = 0; + // - region is a region type (top-level region) + v1.select(0) = v2.select(0); + v2[0] = v1[0]; + v2[1] = v1.select(1); + // - region is a std::pair (nested region) + v1.select<4, 2>(0).select(1) = v2.select<4, 2>(0).select(1); + v2.select<4, 2>(0).select(1) = v1.select<4, 2>(0).select(1); + + // simd_mask + simd_mask<16> m1 = 0; + simd_mask<8> m2 = 0; + // - region is a region type (top-level region) + m1.select(0) = m2.select(0); + m2[0] = m1[0]; + m2[1] = m1.select(1); + // - region is a std::pair (nested region) + m1.select<4, 2>(0).select(1) = m2.select<4, 2>(0).select(1); + m2.select<4, 2>(0)[1] = m1.select<4, 2>(0)[1]; + m2.select<4, 2>(0)[2] = m1.select<4, 2>(0).select(2); +#undef N + } +} diff --git a/sycl/test/esimd/simd_view_ret_warn.cpp b/sycl/test/esimd/simd_view_ret_warn.cpp index 072c6299331ba..97324bc7bbecf 100644 --- a/sycl/test/esimd/simd_view_ret_warn.cpp +++ b/sycl/test/esimd/simd_view_ret_warn.cpp @@ -7,7 +7,7 @@ using namespace sycl::ext::intel::experimental::esimd; // and it should be programmers fault, similar to string_view. // However, sometimes we could return simd_view from a function // implicitly. This test checks that users will see a warning in such situation. -simd_view, region1d_t> f1(simd x) { +simd_view, region1d_t> f1(simd x) { // expected-warning@+1 {{address of stack memory associated with parameter 'x' returned}} return x[0]; }