Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 22 additions & 20 deletions SYCL/ESIMD/ext_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

using namespace cl::sycl;
using namespace sycl::ext::intel;
using namespace sycl::ext::intel::esimd;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we can use something like this (not sure about the proper spelling)

  template <class T>
  using simd = esimd::simd<T>

to eliminate the changes below.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using esimd::simd<T, N> in the code below is clearer compared to introducing simd alias.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @kbobrovs I think esimd::simd<T, N> is more clear.


// --- Data initialization functions

Expand Down Expand Up @@ -150,12 +149,13 @@ DEFINE_HOST_BIN_OP(pow, std::pow(X, Y));

#define DEFINE_ESIMD_DEVICE_OP(Op) \
template <class T, int N> struct ESIMDf<T, N, MathOp::Op, AllVec> { \
simd<T, N> operator()(simd<T, N> X) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> \
operator()(esimd::simd<T, N> X) const SYCL_ESIMD_FUNCTION { \
return esimd::Op<T, N>(X); \
} \
}; \
template <class T, int N> struct ESIMDf<T, N, MathOp::Op, AllSca> { \
simd<T, N> operator()(T X) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> operator()(T X) const SYCL_ESIMD_FUNCTION { \
return esimd::Op<T, N>(X); \
} \
};
Expand All @@ -176,25 +176,26 @@ DEFINE_ESIMD_DEVICE_OP(log2);

#define DEFINE_ESIMD_DEVICE_BIN_OP(Op) \
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, AllSca> { \
simd<T, N> operator()(T X, \
T Y) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> operator()(T X, T Y) const SYCL_ESIMD_FUNCTION { \
return esimd::Op<T, N>(X, Y); \
} \
}; \
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, AllVec> { \
simd<T, N> operator()(simd<T, N>X, \
simd<T, N>Y) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> \
operator()(esimd::simd<T, N> X, \
esimd::simd<T, N> Y) const SYCL_ESIMD_FUNCTION { \
return esimd::Op<T, N>(X, Y); \
} \
}; \
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, Sca1Vec2> { \
simd<T, N> operator()(T X, simd<T, N> Y) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> \
operator()(T X, esimd::simd<T, N> Y) const SYCL_ESIMD_FUNCTION { \
return esimd::Op<T, N>(X, Y); \
} \
}; \
template <class T, int N> struct BinESIMDf<T, N, MathOp::Op, Sca2Vec1> { \
simd<T, N> operator()(simd<T, N>X, \
T Y) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> operator()(esimd::simd<T, N> X, \
T Y) const SYCL_ESIMD_FUNCTION { \
return esimd::Op<T, N>(X, Y); \
} \
};
Expand All @@ -204,13 +205,14 @@ DEFINE_ESIMD_DEVICE_BIN_OP(pow);

#define DEFINE_SYCL_DEVICE_OP(Op) \
template <class T, int N> struct SYCLf<T, N, MathOp::Op, AllVec> { \
simd<T, N> operator()(simd<T, N>X) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> \
operator()(esimd::simd<T, N> X) const SYCL_ESIMD_FUNCTION { \
/* T must be float for SYCL, so not a template parameter for sycl::Op*/ \
return sycl::Op<N>(X); \
} \
}; \
template <class T, int N> struct SYCLf<T, N, MathOp::Op, AllSca> { \
simd<T, N> operator()(T X) const SYCL_ESIMD_FUNCTION { \
esimd::simd<T, N> operator()(T X) const SYCL_ESIMD_FUNCTION { \
return sycl::Op<N>(X); \
} \
};
Expand All @@ -233,14 +235,14 @@ struct UnaryDeviceFunc {

void operator()(id<1> I) const SYCL_ESIMD_KERNEL {
unsigned int Offset = I * N * sizeof(T);
simd<T, N> Vx;
esimd::simd<T, N> Vx;
Vx.copy_from(In, Offset);

if (I.get(0) % 2 == 0) {
for (int J = 0; J < N; J++) {
Kernel<T, N, Op, AllSca> DevF{};
T Val = Vx[J];
simd<T, N> V = DevF(Val); // scalar arg
esimd::simd<T, N> V = DevF(Val); // scalar arg
Vx[J] = V[J];
}
} else {
Expand All @@ -264,31 +266,31 @@ struct BinaryDeviceFunc {

void operator()(id<1> I) const SYCL_ESIMD_KERNEL {
unsigned int Offset = I * N * sizeof(T);
simd<T, N> V1(In1, Offset);
simd<T, N> V2(In2, Offset);
simd<T, N> V;
esimd::simd<T, N> V1(In1, Offset);
esimd::simd<T, N> V2(In2, Offset);
esimd::simd<T, N> V;

if (I.get(0) % 2 == 0) {
int Ind = 0;
{
Kernel<T, N, Op, AllSca> DevF{};
T Val2 = V2[Ind];
simd<T, N> Vv = DevF(V1[Ind], Val2); // both arguments are scalar
esimd::simd<T, N> Vv = DevF(V1[Ind], Val2); // both arguments are scalar
V[Ind] = Vv[Ind];
}
Ind++;
{
Kernel<T, N, Op, Sca1Vec2> DevF{};
T Val1 = V1[Ind];
simd<T, N> Vv = DevF(Val1, V2); // scalar, vector
esimd::simd<T, N> Vv = DevF(Val1, V2); // scalar, vector
V[Ind] = Vv[Ind];
}
Ind++;
{
for (int J = Ind; J < N; ++J) {
Kernel<T, N, Op, Sca2Vec1> DevF{};
T Val2 = V2[J];
simd<T, N> Vv = DevF(V1, Val2); // scalar 2nd arg
esimd::simd<T, N> Vv = DevF(V1, Val2); // scalar 2nd arg
V[J] = Vv[J];
}
}
Expand Down
2 changes: 1 addition & 1 deletion SYCL/SubGroup/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ template <typename T> void exit_if_not_equal(T *val, T *ref, const char *name) {
template <> void exit_if_not_equal(half val, half ref, const char *name) {
int16_t cmp_val = reinterpret_cast<int16_t &>(val);
int16_t cmp_ref = reinterpret_cast<int16_t &>(ref);
if (std::abs(cmp_val - cmp_ref) > 1) {
if (std::abs(cmp_val - cmp_ref) > 2) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know why we need this at all? Other fp types seem to be checked for exact equality (line 112).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This compares the results on the host vs device. The host does not have support for _Float16 so this is implemented with uint16_t data type and converting into 32-bit float for arithmetic and back to uint16_t.
The device has support for _Float16, so it is performing the same operations at 16-bit precision. So the difference in the precision/computation between host and device means there is a larger difference in output, compared to other data types.

std::cout << "Unexpected result for " << name << ": " << (float)val
<< " expected value: " << (float)ref << std::endl;
exit(1);
Expand Down