1010
1111#pragma once
1212
13+ #include < sycl/ext/intel/esimd/detail/defines_elementary.hpp>
14+ #include < sycl/ext/intel/esimd/native/common.hpp>
15+ #include < sycl/ext/intel/experimental/esimd/common.hpp>
16+
1317#include < sycl/detail/defines.hpp>
1418
1519#include < cstdint> // for uint* types
1822// / @cond ESIMD_DETAIL
1923
2024#ifdef __SYCL_DEVICE_ONLY__
21- #define SYCL_ESIMD_KERNEL __attribute__ ((sycl_explicit_simd))
22- #define SYCL_ESIMD_FUNCTION __attribute__ ((sycl_explicit_simd))
23-
24- // Mark a function being nodebug.
25- #define ESIMD_NODEBUG __attribute__ ((nodebug))
26- // Mark a "ESIMD global": accessible from all functions in current translation
27- // unit, separate copy per subgroup (work-item), mapped to SPIR-V private
28- // storage class.
29- #define ESIMD_PRIVATE \
30- __attribute__ ((opencl_private)) __attribute__((sycl_explicit_simd))
31- // Bind a ESIMD global variable to a specific register.
32- #define ESIMD_REGISTER (n ) __attribute__((register_num(n)))
33-
34- #define __ESIMD_API ESIMD_NODEBUG ESIMD_INLINE
35-
3625#define __ESIMD_UNSUPPORTED_ON_HOST
37-
3826#else // __SYCL_DEVICE_ONLY__
39- #define SYCL_ESIMD_KERNEL
40- #define SYCL_ESIMD_FUNCTION
41-
42- // TODO ESIMD define what this means on Windows host
43- #define ESIMD_NODEBUG
44- // On host device ESIMD global is a thread local static var. This assumes that
45- // each work-item is mapped to a separate OS thread on host device.
46- #define ESIMD_PRIVATE thread_local
47- #define ESIMD_REGISTER (n )
48-
49- #define __ESIMD_API ESIMD_INLINE
50-
5127#define __ESIMD_UNSUPPORTED_ON_HOST \
5228 throw sycl::exception (sycl::errc::feature_not_supported, \
5329 " This ESIMD feature is not supported on HOST" )
54-
5530#endif // __SYCL_DEVICE_ONLY__
5631
57- // Mark a function being noinline
58- #define ESIMD_NOINLINE __attribute__ ((noinline))
59- // Force a function to be inlined. 'inline' is used to preserve ODR for
60- // functions defined in a header.
61- #define ESIMD_INLINE inline __attribute__ ((always_inline))
62-
63- // Macros for internal use
64- #define __ESIMD_NS sycl::ext::intel::esimd
65- #define __ESIMD_DNS sycl::ext::intel::esimd::detail
66- #define __ESIMD_EMU_DNS sycl::ext::intel::esimd::emu::detail
67-
68- #define __ESIMD_QUOTE1 (m ) #m
69- #define __ESIMD_QUOTE (m ) __ESIMD_QUOTE1(m)
70- #define __ESIMD_NS_QUOTED __ESIMD_QUOTE (__ESIMD_NS)
71- #define __ESIMD_DEPRECATED (new_api ) \
72- __SYCL_DEPRECATED (" use " __ESIMD_NS_QUOTED " ::" __ESIMD_QUOTE(new_api))
73-
7432// / @endcond ESIMD_DETAIL
7533
7634namespace sycl {
@@ -106,6 +64,19 @@ enum class rgba_channel : uint8_t { R, G, B, A };
10664using SurfaceIndex = unsigned int ;
10765
10866namespace detail {
67+
68+ // / Check if a given 32 bit positive integer is a power of 2 at compile time.
69+ ESIMD_INLINE constexpr bool isPowerOf2 (unsigned int n) {
70+ return (n & (n - 1 )) == 0 ;
71+ }
72+
73+ // / Check at compile time if given 32 bit positive integer is both:
74+ // / - a power of 2
75+ // / - less or equal to given limit
76+ ESIMD_INLINE constexpr bool isPowerOf2 (unsigned int n, unsigned int limit) {
77+ return (n & (n - 1 )) == 0 && n <= limit;
78+ }
79+
10980template <rgba_channel Ch>
11081static inline constexpr uint8_t ch = 1 << static_cast <int >(Ch);
11182static inline constexpr uint8_t chR = ch<rgba_channel::R>;
@@ -151,6 +122,10 @@ constexpr int get_num_channels_enabled(rgba_channel_mask M) {
151122 is_channel_enabled (M, rgba_channel::A);
152123}
153124
125+ #define __ESIMD_USM_DWORD_ATOMIC_TO_LSC \
126+ " is supported only on ACM, PVC. USM-based atomic will be auto-converted " \
127+ " to LSC version."
128+
154129// / Represents an atomic operation. Operations always return the old value(s) of
155130// / the target memory location(s) as it was before the operation was applied.
156131// / Each operation is annotated with a pseudocode illustrating its semantics,
@@ -167,9 +142,11 @@ enum class atomic_op : uint8_t {
167142 // / Decrement: <code>*addr = *addr - 1</code>.
168143 dec = 0x3 ,
169144 // / Minimum: <code>*addr = min(*addr, src0)</code>.
170- min = 0x4 ,
145+ umin = 0x4 ,
146+ min __SYCL_DEPRECATED (" use umin" ) = umin,
171147 // / Maximum: <code>*addr = max(*addr, src0)</code>.
172- max = 0x5 ,
148+ umax = 0x5,
149+ max __SYCL_DEPRECATED(" use smax" ) = umax,
173150 // / Exchange. <code>*addr == src0;</code>
174151 xchg = 0x6,
175152 // / Compare and exchange. <code>if (*addr == src0) *sddr = src1;</code>
@@ -181,27 +158,177 @@ enum class atomic_op : uint8_t {
181158 // / Bit \c xor: <code>*addr = *addr | src0</code>.
182159 bit_xor = 0xa,
183160 // / Minimum (signed integer): <code>*addr = min(*addr, src0)</code>.
184- minsint = 0xb ,
161+ smin = 0xb,
162+ minsint __SYCL_DEPRECATED(" use smin" ) = smin,
185163 // / Maximum (signed integer): <code>*addr = max(*addr, src0)</code>.
186- maxsint = 0xc ,
164+ smax = 0xc,
165+ maxsint __SYCL_DEPRECATED(" use smax" ) = 0xc,
187166 // / Minimum (floating point): <code>*addr = min(*addr, src0)</code>.
188- fmax = 0x10 ,
167+ fmax __SYCL_DEPRECATED( " fmax " __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x10,
189168 // / Maximum (floating point): <code>*addr = max(*addr, src0)</code>.
190- fmin = 0x11 ,
169+ fmin __SYCL_DEPRECATED( " fmin " __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x11,
191170 // / Compare and exchange (floating point).
192171 // / <code>if (*addr == src0) *addr = src1;</code>
193- fcmpwr = 0x12 ,
194- fadd = 0x13 ,
195- fsub = 0x14 ,
172+ fcmpxchg = 0x12,
173+ fcmpwr __SYCL_DEPRECATED(" fcmpwr" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = fcmpxchg,
174+ fadd __SYCL_DEPRECATED(" fadd" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x13,
175+ fsub __SYCL_DEPRECATED(" fsub" __ESIMD_USM_DWORD_ATOMIC_TO_LSC) = 0x14,
196176 load = 0x15,
197177 store = 0x16,
198178 // / Decrement: <code>*addr = *addr - 1</code>. The only operation which
199179 // / returns new value of the destination rather than old.
200180 predec = 0xff,
201181};
202182
183+ #undef __ESIMD_USM_DWORD_TO_LSC_MSG
184+
203185// / @} sycl_esimd_core
204186
187+ namespace detail {
188+ template <__ESIMD_NS::native::lsc::atomic_op Op> constexpr int get_num_args () {
189+ if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc ||
190+ Op == __ESIMD_NS::native::lsc::atomic_op::dec ||
191+ Op == __ESIMD_NS::native::lsc::atomic_op::load) {
192+ return 0 ;
193+ } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store ||
194+ Op == __ESIMD_NS::native::lsc::atomic_op::add ||
195+ Op == __ESIMD_NS::native::lsc::atomic_op::sub ||
196+ Op == __ESIMD_NS::native::lsc::atomic_op::smin ||
197+ Op == __ESIMD_NS::native::lsc::atomic_op::smax ||
198+ Op == __ESIMD_NS::native::lsc::atomic_op::umin ||
199+ Op == __ESIMD_NS::native::lsc::atomic_op::umax ||
200+ Op == __ESIMD_NS::native::lsc::atomic_op::fadd ||
201+ Op == __ESIMD_NS::native::lsc::atomic_op::fsub ||
202+ Op == __ESIMD_NS::native::lsc::atomic_op::fmin ||
203+ Op == __ESIMD_NS::native::lsc::atomic_op::fmax ||
204+ Op == __ESIMD_NS::native::lsc::atomic_op::bit_and ||
205+ Op == __ESIMD_NS::native::lsc::atomic_op::bit_or ||
206+ Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) {
207+ return 1 ;
208+ } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg ||
209+ Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) {
210+ return 2 ;
211+ } else {
212+ return -1 ; // error
213+ }
214+ }
215+
216+ template <__ESIMD_NS::atomic_op Op> constexpr bool has_lsc_equivalent () {
217+ switch (Op) {
218+ case __ESIMD_NS::atomic_op::xchg:
219+ case __ESIMD_NS::atomic_op::predec:
220+ return false ;
221+ default :
222+ return true ;
223+ }
224+ }
225+
226+ template <__ESIMD_NS::atomic_op Op>
227+ constexpr __ESIMD_NS::native::lsc::atomic_op to_lsc_atomic_op () {
228+ switch (Op) {
229+ case __ESIMD_NS::atomic_op::add:
230+ return __ESIMD_NS::native::lsc::atomic_op::add;
231+ case __ESIMD_NS::atomic_op::sub:
232+ return __ESIMD_NS::native::lsc::atomic_op::sub;
233+ case __ESIMD_NS::atomic_op::inc:
234+ return __ESIMD_NS::native::lsc::atomic_op::inc;
235+ case __ESIMD_NS::atomic_op::dec:
236+ return __ESIMD_NS::native::lsc::atomic_op::dec;
237+ case __ESIMD_NS::atomic_op::min:
238+ return __ESIMD_NS::native::lsc::atomic_op::umin;
239+ case __ESIMD_NS::atomic_op::max:
240+ return __ESIMD_NS::native::lsc::atomic_op::umax;
241+ case __ESIMD_NS::atomic_op::cmpxchg:
242+ return __ESIMD_NS::native::lsc::atomic_op::cmpxchg;
243+ case __ESIMD_NS::atomic_op::bit_and:
244+ return __ESIMD_NS::native::lsc::atomic_op::bit_and;
245+ case __ESIMD_NS::atomic_op::bit_or:
246+ return __ESIMD_NS::native::lsc::atomic_op::bit_or;
247+ case __ESIMD_NS::atomic_op::bit_xor:
248+ return __ESIMD_NS::native::lsc::atomic_op::bit_xor;
249+ case __ESIMD_NS::atomic_op::minsint:
250+ return __ESIMD_NS::native::lsc::atomic_op::smin;
251+ case __ESIMD_NS::atomic_op::maxsint:
252+ return __ESIMD_NS::native::lsc::atomic_op::smax;
253+ case __ESIMD_NS::atomic_op::fmax:
254+ return __ESIMD_NS::native::lsc::atomic_op::fmax;
255+ case __ESIMD_NS::atomic_op::fmin:
256+ return __ESIMD_NS::native::lsc::atomic_op::fmin;
257+ case __ESIMD_NS::atomic_op::fcmpwr:
258+ return __ESIMD_NS::native::lsc::atomic_op::fcmpxchg;
259+ case __ESIMD_NS::atomic_op::fadd:
260+ return __ESIMD_NS::native::lsc::atomic_op::fadd;
261+ case __ESIMD_NS::atomic_op::fsub:
262+ return __ESIMD_NS::native::lsc::atomic_op::fsub;
263+ case __ESIMD_NS::atomic_op::load:
264+ return __ESIMD_NS::native::lsc::atomic_op::load;
265+ case __ESIMD_NS::atomic_op::store:
266+ return __ESIMD_NS::native::lsc::atomic_op::store;
267+ default :
268+ static_assert (has_lsc_equivalent<Op>() && " Unsupported LSC atomic op" );
269+ }
270+ }
271+
272+ template <__ESIMD_NS::native::lsc::atomic_op Op>
273+ constexpr __ESIMD_NS::atomic_op to_atomic_op () {
274+ switch (Op) {
275+ case __ESIMD_NS::native::lsc::atomic_op::add:
276+ return __ESIMD_NS::atomic_op::add;
277+ case __ESIMD_NS::native::lsc::atomic_op::sub:
278+ return __ESIMD_NS::atomic_op::sub;
279+ case __ESIMD_NS::native::lsc::atomic_op::inc:
280+ return __ESIMD_NS::atomic_op::inc;
281+ case __ESIMD_NS::native::lsc::atomic_op::dec:
282+ return __ESIMD_NS::atomic_op::dec;
283+ case __ESIMD_NS::native::lsc::atomic_op::umin:
284+ return __ESIMD_NS::atomic_op::min;
285+ case __ESIMD_NS::native::lsc::atomic_op::umax:
286+ return __ESIMD_NS::atomic_op::max;
287+ case __ESIMD_NS::native::lsc::atomic_op::cmpxchg:
288+ return __ESIMD_NS::atomic_op::cmpxchg;
289+ case __ESIMD_NS::native::lsc::atomic_op::bit_and:
290+ return __ESIMD_NS::atomic_op::bit_and;
291+ case __ESIMD_NS::native::lsc::atomic_op::bit_or:
292+ return __ESIMD_NS::atomic_op::bit_or;
293+ case __ESIMD_NS::native::lsc::atomic_op::bit_xor:
294+ return __ESIMD_NS::atomic_op::bit_xor;
295+ case __ESIMD_NS::native::lsc::atomic_op::smin:
296+ return __ESIMD_NS::atomic_op::minsint;
297+ case __ESIMD_NS::native::lsc::atomic_op::smax:
298+ return __ESIMD_NS::atomic_op::maxsint;
299+ case __ESIMD_NS::native::lsc::atomic_op::fmax:
300+ return __ESIMD_NS::atomic_op::fmax;
301+ case __ESIMD_NS::native::lsc::atomic_op::fmin:
302+ return __ESIMD_NS::atomic_op::fmin;
303+ case __ESIMD_NS::native::lsc::atomic_op::fcmpxchg:
304+ return __ESIMD_NS::atomic_op::fcmpwr;
305+ case __ESIMD_NS::native::lsc::atomic_op::fadd:
306+ return __ESIMD_NS::atomic_op::fadd;
307+ case __ESIMD_NS::native::lsc::atomic_op::fsub:
308+ return __ESIMD_NS::atomic_op::fsub;
309+ case __ESIMD_NS::native::lsc::atomic_op::load:
310+ return __ESIMD_NS::atomic_op::load;
311+ case __ESIMD_NS::native::lsc::atomic_op::store:
312+ return __ESIMD_NS::atomic_op::store;
313+ }
314+ }
315+
316+ template <__ESIMD_NS::atomic_op Op> constexpr int get_num_args () {
317+ if constexpr (has_lsc_equivalent<Op>()) {
318+ return get_num_args<to_lsc_atomic_op<Op>()>();
319+ } else {
320+ switch (Op) {
321+ case __ESIMD_NS::atomic_op::xchg:
322+ case __ESIMD_NS::atomic_op::predec:
323+ return 1 ;
324+ default :
325+ return -1 ; // error
326+ }
327+ }
328+ }
329+
330+ } // namespace detail
331+
205332} // namespace ext::intel::esimd
206333} // __SYCL_INLINE_VER_NAMESPACE(_V1)
207334} // namespace sycl
0 commit comments