Skip to content

Commit bfdd4de

Browse files
committed
Merge commit '9e41f823ddc4bc57c5e47b6b854862384cff5c68' into llvmspirv_pulldown
2 parents 3197c99 + 9e41f82 commit bfdd4de

File tree

17 files changed

+979
-805
lines changed

17 files changed

+979
-805
lines changed

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3501,6 +3501,9 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts,
35013501
if (Opts.getSignReturnAddressKey() ==
35023502
LangOptions::SignReturnAddressKeyKind::BKey)
35033503
GenerateArg(Args, OPT_msign_return_address_key_EQ, "b_key", SA);
3504+
3505+
if (Opts.DeclareSPIRVBuiltins)
3506+
GenerateArg(Args, OPT_fdeclare_spirv_builtins, SA);
35043507
}
35053508

35063509
bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,

llvm/lib/SYCLLowerIR/LowerESIMD.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,12 @@ static Instruction *generateVectorGenXForSpirv(ExtractElementInst *EEI,
813813
Instruction *ExtrI = ExtractElementInst::Create(
814814
IntrI, ConstantInt::get(I32Ty, ExtractIndex), ExtractName, EEI);
815815
Instruction *CastI = addCastInstIfNeeded(EEI, ExtrI);
816+
if (EEI->getDebugLoc()) {
817+
IntrI->setDebugLoc(EEI->getDebugLoc());
818+
ExtrI->setDebugLoc(EEI->getDebugLoc());
819+
// It's OK if ExtrI and CastI is the same instruction
820+
CastI->setDebugLoc(EEI->getDebugLoc());
821+
}
816822
return CastI;
817823
}
818824

@@ -839,6 +845,11 @@ static Instruction *generateGenXForSpirv(ExtractElementInst *EEI,
839845
Instruction *IntrI =
840846
IntrinsicInst::Create(NewFDecl, {}, IntrinName + Suff.str(), EEI);
841847
Instruction *CastI = addCastInstIfNeeded(EEI, IntrI);
848+
if (EEI->getDebugLoc()) {
849+
IntrI->setDebugLoc(EEI->getDebugLoc());
850+
// It's OK if IntrI and CastI is the same instruction
851+
CastI->setDebugLoc(EEI->getDebugLoc());
852+
}
842853
return CastI;
843854
}
844855

@@ -1093,6 +1104,8 @@ static void translateESIMDIntrinsicCall(CallInst &CI) {
10931104
NewFDecl, GenXArgs,
10941105
NewFDecl->getReturnType()->isVoidTy() ? "" : CI.getName() + ".esimd",
10951106
&CI);
1107+
if (CI.getDebugLoc())
1108+
NewCI->setDebugLoc(CI.getDebugLoc());
10961109
NewCI = addCastInstIfNeeded(&CI, NewCI);
10971110
CI.replaceAllUsesWith(NewCI);
10981111
CI.eraseFromParent();
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -debugify -LowerESIMD -S < %s | FileCheck %s
3+
4+
; This test checks that debug info is preserved during lowering
5+
; ESIMD specific constructs.
6+
7+
@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
8+
9+
declare spir_func <16 x float> @_Z18__esimd_block_readIfLi16EPU3AS1fEN2cl4sycl5INTEL3gpu11vector_typeIT_XT0_EE4typeET1_j(float addrspace(1)*, i32)
10+
11+
define spir_func void @func1(float addrspace(1)* %arg1, i32 %arg2 ){
12+
; CHECK-LABEL: @func1(
13+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint float addrspace(1)* [[ARG1:%.*]] to i32, !dbg [[DBG11:![0-9]+]]
14+
; CHECK-NEXT: [[CALL1_I_I_ESIMD:%.*]] = call <16 x float> @llvm.genx.oword.ld.unaligned.v16f32(i32 0, i32 [[TMP1]], i32 [[ARG2:%.*]]), !dbg [[DBG11]]
15+
; CHECK-NEXT: call void @llvm.dbg.value(metadata <16 x float> [[CALL1_I_I_ESIMD]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG11]]
16+
; CHECK-NEXT: ret void, !dbg [[DBG12:![0-9]+]]
17+
;
18+
%call1.i.i = tail call spir_func <16 x float> @_Z18__esimd_block_readIfLi16EPU3AS1fEN2cl4sycl5INTEL3gpu11vector_typeIT_XT0_EE4typeET1_j(float addrspace(1)* %arg1, i32 %arg2)
19+
ret void
20+
}
21+
22+
define spir_func void @func2(i64 addrspace(1)* %arg1) {
23+
; CHECK-LABEL: @func2(
24+
; CHECK-NEXT: call void @llvm.dbg.value(metadata <3 x i64> undef, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]]
25+
; CHECK-NEXT: [[DOTESIMD:%.*]] = call <3 x i32> @llvm.genx.local.id.v3i32(), !dbg [[DBG22:![0-9]+]]
26+
; CHECK-NEXT: [[LOCAL_ID_X:%.*]] = extractelement <3 x i32> [[DOTESIMD]], i32 0, !dbg [[DBG22]]
27+
; CHECK-NEXT: [[LOCAL_ID_X_CAST_TY:%.*]] = zext i32 [[LOCAL_ID_X]] to i64, !dbg [[DBG22]]
28+
; CHECK-NEXT: [[DOTESIMD1:%.*]] = call <3 x i32> @llvm.genx.local.size.v3i32(), !dbg [[DBG22]]
29+
; CHECK-NEXT: [[WGSIZE_X:%.*]] = extractelement <3 x i32> [[DOTESIMD1]], i32 0, !dbg [[DBG22]]
30+
; CHECK-NEXT: [[WGSIZE_X_CAST_TY:%.*]] = zext i32 [[WGSIZE_X]] to i64, !dbg [[DBG22]]
31+
; CHECK-NEXT: [[GROUP_ID_X:%.*]] = call i32 @llvm.genx.group.id.x(), !dbg [[DBG22]]
32+
; CHECK-NEXT: [[GROUP_ID_X_CAST_TY:%.*]] = zext i32 [[GROUP_ID_X]] to i64, !dbg [[DBG22]]
33+
; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[WGSIZE_X_CAST_TY]], [[GROUP_ID_X_CAST_TY]]
34+
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LOCAL_ID_X_CAST_TY]], [[MUL]]
35+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[ADD]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]]
36+
; CHECK-NEXT: [[PTRIDX_I_I:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[ARG1:%.*]], i64 2, !dbg [[DBG23:![0-9]+]]
37+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 addrspace(1)* [[PTRIDX_I_I]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]]
38+
; CHECK-NEXT: [[PTRIDX_ASCAST_I_I:%.*]] = addrspacecast i64 addrspace(1)* [[PTRIDX_I_I]] to i64 addrspace(4)*, !dbg [[DBG24:![0-9]+]]
39+
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 addrspace(4)* [[PTRIDX_ASCAST_I_I]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]]
40+
; CHECK-NEXT: store i64 [[ADD]], i64 addrspace(4)* [[PTRIDX_ASCAST_I_I]], align 4, !dbg [[DBG25:![0-9]+]]
41+
; CHECK-NEXT: ret void, !dbg [[DBG26:![0-9]+]]
42+
;
43+
%1 = load <3 x i64>, <3 x i64> addrspace(4)* addrspacecast (<3 x i64> addrspace(1)* @__spirv_BuiltInGlobalInvocationId to <3 x i64> addrspace(4)*)
44+
%2 = extractelement <3 x i64> %1, i64 0
45+
%ptridx.i.i = getelementptr inbounds i64, i64 addrspace(1)* %arg1, i64 2
46+
%ptridx.ascast.i.i = addrspacecast i64 addrspace(1)* %ptridx.i.i to i64 addrspace(4)*
47+
store i64 %2, i64 addrspace(4)* %ptridx.ascast.i.i
48+
ret void
49+
}

sycl/include/CL/sycl/INTEL/esimd/detail/esimd_host_util.hpp

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,19 @@
1414

1515
#define SIMDCF_ELEMENT_SKIP(i)
1616

17-
namespace cl {
17+
__SYCL_INLINE_NAMESPACE(cl) {
1818
namespace sycl {
19+
1920
namespace detail {
2021
namespace half_impl {
2122
class half;
2223
} // namespace half_impl
2324
} // namespace detail
24-
} // namespace sycl
25-
} // namespace cl
2625

27-
using half = cl::sycl::detail::half_impl::half;
28-
29-
namespace EsimdEmulSys {
26+
namespace INTEL {
27+
namespace gpu {
28+
namespace emu {
29+
namespace detail {
3030

3131
constexpr int sat_is_on = 1;
3232

@@ -44,14 +44,10 @@ template <typename RT> struct satur {
4444
return (RT)val;
4545
}
4646

47-
#ifdef max
48-
#undef max
49-
#endif
50-
#ifdef min
51-
#undef min
52-
#endif
53-
const RT t_max = std::numeric_limits<RT>::max();
54-
const RT t_min = std::numeric_limits<RT>::min();
47+
// min/max can be macros on Windows, so wrap them into parens to avoid their
48+
// expansion
49+
const RT t_max = (std::numeric_limits<RT>::max)();
50+
const RT t_min = (std::numeric_limits<RT>::min)();
5551

5652
if (val > t_max) {
5753
return t_max;
@@ -112,8 +108,6 @@ template <> struct SetSatur<double, true> {
112108
static unsigned int set() { return sat_is_on; }
113109
};
114110

115-
} // namespace EsimdEmulSys
116-
117111
// used for intermediate type in dp4a emulation
118112
template <typename T1, typename T2> struct restype_ex {
119113
private:
@@ -470,10 +464,11 @@ template <typename T> struct dwordtype;
470464
template <> struct dwordtype<int> { static const bool value = true; };
471465
template <> struct dwordtype<unsigned int> { static const bool value = true; };
472466

473-
template <unsigned int N1, unsigned int N2> struct ressize {
474-
static const unsigned int size = (N1 > N2) ? N1 : N2;
475-
static const bool conformable =
476-
check_true < N1 % size == 0 && N2 % size == 0 > ::value;
477-
};
467+
} // namespace detail
468+
} // namespace emu
469+
} // namespace gpu
470+
} // namespace INTEL
471+
} // namespace sycl
472+
} // __SYCL_INLINE_NAMESPACE(cl)
478473

479-
#endif
474+
#endif // #ifndef __SYCL_DEVICE_ONLY__

sycl/include/CL/sycl/INTEL/esimd/detail/esimd_intrin.hpp

Lines changed: 44 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@
1414
#include <CL/sycl/INTEL/esimd/detail/esimd_types.hpp>
1515
#include <CL/sycl/INTEL/esimd/detail/esimd_util.hpp>
1616
#include <CL/sycl/INTEL/esimd/esimd_enum.hpp>
17-
#include <CL/sycl/detail/accessor_impl.hpp>
1817

1918
#include <assert.h>
2019
#include <cstdint>
2120

21+
#define __SIGD sycl::INTEL::gpu::detail
22+
2223
// \brief __esimd_rdregion: region access intrinsic.
2324
//
2425
// @param T the element data type, one of i8, i16, i32, i64, half, float,
@@ -63,13 +64,13 @@
6364
//
6465
template <typename T, int N, int M, int VStride, int Width, int Stride,
6566
int ParentWidth = 0>
66-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
67-
__esimd_rdregion(sycl::INTEL::gpu::vector_type_t<T, N> Input, uint16_t Offset);
67+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
68+
__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset);
6869

6970
template <typename T, int N, int M, int ParentWidth = 0>
70-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
71-
__esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
72-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset);
71+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
72+
__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
73+
__SIGD::vector_type_t<uint16_t, M> Offset);
7374

7475
// __esimd_wrregion returns the updated vector with the region updated.
7576
//
@@ -120,46 +121,28 @@ __esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
120121
//
121122
template <typename T, int N, int M, int VStride, int Width, int Stride,
122123
int ParentWidth = 0>
123-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
124-
__esimd_wrregion(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
125-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal, uint16_t Offset,
124+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
125+
__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
126+
__SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
126127
sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
127128

128129
template <typename T, int N, int M, int ParentWidth = 0>
129-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
130-
__esimd_wrindirect(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
131-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal,
132-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset,
130+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
131+
__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
132+
__SIGD::vector_type_t<T, M> NewVal,
133+
__SIGD::vector_type_t<uint16_t, M> Offset,
133134
sycl::INTEL::gpu::mask_type_t<M> Mask = 1);
134135

135136
__SYCL_INLINE_NAMESPACE(cl) {
136137
namespace sycl {
137138
namespace INTEL {
138139
namespace gpu {
139-
// TODO dependencies on the std SYCL concepts like images
140-
// should be refactored in a separate header
141-
class AccessorPrivateProxy {
142-
public:
143-
#ifdef __SYCL_DEVICE_ONLY__
144-
template <typename AccessorTy>
145-
static auto getNativeImageObj(const AccessorTy &Acc) {
146-
return Acc.getNativeImageObj();
147-
}
148-
#else
149-
template <typename AccessorTy>
150-
static auto getImageRange(const AccessorTy &Acc) {
151-
return Acc.getAccessRange();
152-
}
153-
static auto getElemSize(const sycl::detail::AccessorBaseHost &Acc) {
154-
return Acc.getElemSize();
155-
}
156-
#endif
157-
};
140+
namespace detail {
158141

159142
/// read from a basic region of a vector, return a vector
160143
template <typename BT, int BN, typename RTy>
161-
vector_type_t<typename RTy::element_type, RTy::length>
162-
ESIMD_INLINE readRegion(const vector_type_t<BT, BN> &Base, RTy Region) {
144+
__SIGD::vector_type_t<typename RTy::element_type, RTy::length> ESIMD_INLINE
145+
readRegion(const __SIGD::vector_type_t<BT, BN> &Base, RTy Region) {
163146
using ElemTy = typename RTy::element_type;
164147
auto Base1 = bitcast<ElemTy, BT, BN>(Base);
165148
constexpr int Bytes = BN * sizeof(BT);
@@ -180,8 +163,8 @@ vector_type_t<typename RTy::element_type, RTy::length>
180163

181164
/// read from a nested region of a vector, return a vector
182165
template <typename BT, int BN, typename T, typename U>
183-
ESIMD_INLINE vector_type_t<typename T::element_type, T::length>
184-
readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
166+
ESIMD_INLINE __SIGD::vector_type_t<typename T::element_type, T::length>
167+
readRegion(const __SIGD::vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
185168
// parent-region type
186169
using PaTy = typename shape_type<U>::type;
187170
constexpr int BN1 = PaTy::length;
@@ -222,6 +205,7 @@ readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
222205
}
223206
}
224207

208+
} // namespace detail
225209
} // namespace gpu
226210
} // namespace INTEL
227211
} // namespace sycl
@@ -233,37 +217,37 @@ readRegion(const vector_type_t<BT, BN> &Base, std::pair<T, U> Region) {
233217
// optimization on simd object
234218
//
235219
template <typename T, int N>
236-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
237-
__esimd_vload(const sycl::INTEL::gpu::vector_type_t<T, N> *ptr);
220+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
221+
__esimd_vload(const __SIGD::vector_type_t<T, N> *ptr);
238222

239223
// vstore
240224
//
241225
// map to the backend vstore intrinsic, used by compiler to control
242226
// optimization on simd object
243227
template <typename T, int N>
244-
SYCL_EXTERNAL void __esimd_vstore(sycl::INTEL::gpu::vector_type_t<T, N> *ptr,
245-
sycl::INTEL::gpu::vector_type_t<T, N> vals);
228+
SYCL_EXTERNAL void __esimd_vstore(__SIGD::vector_type_t<T, N> *ptr,
229+
__SIGD::vector_type_t<T, N> vals);
246230

247231
template <typename T, int N>
248-
SYCL_EXTERNAL uint16_t __esimd_any(sycl::INTEL::gpu::vector_type_t<T, N> src);
232+
SYCL_EXTERNAL uint16_t __esimd_any(__SIGD::vector_type_t<T, N> src);
249233

250234
template <typename T, int N>
251-
SYCL_EXTERNAL uint16_t __esimd_all(sycl::INTEL::gpu::vector_type_t<T, N> src);
235+
SYCL_EXTERNAL uint16_t __esimd_all(__SIGD::vector_type_t<T, N> src);
252236

253237
#ifndef __SYCL_DEVICE_ONLY__
254238

255239
// Implementations of ESIMD intrinsics for the SYCL host device
256240
template <typename T, int N, int M, int VStride, int Width, int Stride,
257241
int ParentWidth>
258-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
259-
__esimd_rdregion(sycl::INTEL::gpu::vector_type_t<T, N> Input, uint16_t Offset) {
242+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
243+
__esimd_rdregion(__SIGD::vector_type_t<T, N> Input, uint16_t Offset) {
260244
uint16_t EltOffset = Offset / sizeof(T);
261245
assert(Offset % sizeof(T) == 0);
262246

263247
int NumRows = M / Width;
264248
assert(M % Width == 0);
265249

266-
sycl::INTEL::gpu::vector_type_t<T, M> Result;
250+
__SIGD::vector_type_t<T, M> Result;
267251
int Index = 0;
268252
for (int i = 0; i < NumRows; ++i) {
269253
for (int j = 0; j < Width; ++j) {
@@ -274,10 +258,10 @@ __esimd_rdregion(sycl::INTEL::gpu::vector_type_t<T, N> Input, uint16_t Offset) {
274258
}
275259

276260
template <typename T, int N, int M, int ParentWidth>
277-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, M>
278-
__esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
279-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset) {
280-
sycl::INTEL::gpu::vector_type_t<T, M> Result;
261+
SYCL_EXTERNAL __SIGD::vector_type_t<T, M>
262+
__esimd_rdindirect(__SIGD::vector_type_t<T, N> Input,
263+
__SIGD::vector_type_t<uint16_t, M> Offset) {
264+
__SIGD::vector_type_t<T, M> Result;
281265
for (int i = 0; i < M; ++i) {
282266
uint16_t EltOffset = Offset[i] / sizeof(T);
283267
assert(Offset[i] % sizeof(T) == 0);
@@ -289,17 +273,17 @@ __esimd_rdindirect(sycl::INTEL::gpu::vector_type_t<T, N> Input,
289273

290274
template <typename T, int N, int M, int VStride, int Width, int Stride,
291275
int ParentWidth>
292-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
293-
__esimd_wrregion(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
294-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal, uint16_t Offset,
276+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
277+
__esimd_wrregion(__SIGD::vector_type_t<T, N> OldVal,
278+
__SIGD::vector_type_t<T, M> NewVal, uint16_t Offset,
295279
sycl::INTEL::gpu::mask_type_t<M> Mask) {
296280
uint16_t EltOffset = Offset / sizeof(T);
297281
assert(Offset % sizeof(T) == 0);
298282

299283
int NumRows = M / Width;
300284
assert(M % Width == 0);
301285

302-
sycl::INTEL::gpu::vector_type_t<T, N> Result = OldVal;
286+
__SIGD::vector_type_t<T, N> Result = OldVal;
303287
int Index = 0;
304288
for (int i = 0; i < NumRows; ++i) {
305289
for (int j = 0; j < Width; ++j) {
@@ -312,12 +296,12 @@ __esimd_wrregion(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
312296
}
313297

314298
template <typename T, int N, int M, int ParentWidth>
315-
SYCL_EXTERNAL sycl::INTEL::gpu::vector_type_t<T, N>
316-
__esimd_wrindirect(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
317-
sycl::INTEL::gpu::vector_type_t<T, M> NewVal,
318-
sycl::INTEL::gpu::vector_type_t<uint16_t, M> Offset,
299+
SYCL_EXTERNAL __SIGD::vector_type_t<T, N>
300+
__esimd_wrindirect(__SIGD::vector_type_t<T, N> OldVal,
301+
__SIGD::vector_type_t<T, M> NewVal,
302+
__SIGD::vector_type_t<uint16_t, M> Offset,
319303
sycl::INTEL::gpu::mask_type_t<M> Mask) {
320-
sycl::INTEL::gpu::vector_type_t<T, N> Result = OldVal;
304+
__SIGD::vector_type_t<T, N> Result = OldVal;
321305
for (int i = 0; i < M; ++i) {
322306
if (Mask[i]) {
323307
uint16_t EltOffset = Offset[i] / sizeof(T);
@@ -330,3 +314,5 @@ __esimd_wrindirect(sycl::INTEL::gpu::vector_type_t<T, N> OldVal,
330314
}
331315

332316
#endif // __SYCL_DEVICE_ONLY__
317+
318+
#undef __SIGD

0 commit comments

Comments
 (0)