Skip to content

[WIP][SYCL][ESIMD] Replace mask_type_t with simd_mask to represent Gen predicates. #3787

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <cstdint>

#define __SEIEED sycl::ext::intel::experimental::esimd::detail
#define __SEIEE sycl::ext::intel::experimental::esimd

// \brief __esimd_rdregion: region access intrinsic.
//
Expand Down Expand Up @@ -125,14 +124,14 @@ template <typename T, int N, int M, int VStride, int Width, int Stride,
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
__esimd_wrregion(__SEIEED::vector_type_t<T, N> OldVal,
__SEIEED::vector_type_t<T, M> NewVal, uint16_t Offset,
__SEIEE::mask_type_t<M> Mask = 1);
__SEIEED::simd_mask_impl_t<M> Mask = 1);

template <typename T, int N, int M, int ParentWidth = 0>
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
__esimd_wrindirect(__SEIEED::vector_type_t<T, N> OldVal,
__SEIEED::vector_type_t<T, M> NewVal,
__SEIEED::vector_type_t<uint16_t, M> Offset,
__SEIEE::mask_type_t<M> Mask = 1);
__SEIEED::simd_mask_impl_t<M> Mask = 1);

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
Expand Down Expand Up @@ -286,7 +285,7 @@ template <typename T, int N, int M, int VStride, int Width, int Stride,
SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
__esimd_wrregion(__SEIEED::vector_type_t<T, N> OldVal,
__SEIEED::vector_type_t<T, M> NewVal, uint16_t Offset,
__SEIEE::mask_type_t<M> Mask) {
__SEIEED::simd_mask_impl_t<M> Mask) {
uint16_t EltOffset = Offset / sizeof(T);
assert(Offset % sizeof(T) == 0);

Expand All @@ -310,7 +309,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t<T, N>
__esimd_wrindirect(__SEIEED::vector_type_t<T, N> OldVal,
__SEIEED::vector_type_t<T, M> NewVal,
__SEIEED::vector_type_t<uint16_t, M> Offset,
__SEIEE::mask_type_t<M> Mask) {
__SEIEED::simd_mask_impl_t<M> Mask) {
__SEIEED::vector_type_t<T, N> Result = OldVal;
for (int i = 0; i < M; ++i) {
if (Mask[i]) {
Expand All @@ -325,5 +324,4 @@ __esimd_wrindirect(__SEIEED::vector_type_t<T, N> OldVal,

#endif // __SYCL_DEVICE_ONLY__

#undef __SEIEE
#undef __SEIEED

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -248,19 +248,23 @@ inline std::istream &operator>>(std::istream &I, half &rhs) {
rhs = ValFloat;
return I;
}
} // namespace detail

// TODO @rolandschulz on May 21
// {quote}
// - The mask should also be a wrapper around the clang - vector type rather
// than the clang - vector type itself.
// - The internal storage should be implementation defined.uint16_t is a bad
// choice for some HW.Nor is it how clang - vector types works(using the same
// size int as the corresponding vector type used for comparison(e.g. long for
// double and int for float)).
template <int N>
using mask_type_t = typename detail::vector_type<uint16_t, N>::type;
// internal implementation for the mask type
template <typename T, int N> struct mask_impl {
static_assert(N > 0, "mask must have at least one element");

static constexpr int length = N;
using type = T __attribute__((ext_vector_type(N)));
using value_type = T;
};

template <typename T, int N> using mask_impl_t = typename mask_impl<T, N>::type;

template <int N> using simd_mask_impl = mask_impl<unsigned short, N>;

template <int N> using simd_mask_impl_t = typename simd_mask_impl<N>::type;

} // namespace detail
} // namespace esimd
} // namespace experimental
} // namespace intel
Expand Down
43 changes: 32 additions & 11 deletions sycl/include/sycl/ext/intel/experimental/esimd/esimd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <sycl/ext/intel/experimental/esimd/detail/esimd_memory_intrin.hpp>
#include <sycl/ext/intel/experimental/esimd/detail/esimd_sycl_util.hpp>
#include <sycl/ext/intel/experimental/esimd/detail/esimd_types.hpp>
#include <sycl/ext/intel/experimental/esimd/simd_mask.hpp>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
Expand All @@ -32,6 +33,7 @@ namespace esimd {
/// \ingroup sycl_esimd
template <typename Ty, int N> class simd {
template <typename, typename> friend class simd_view;
template <int> friend class simd_mask;

public:
/// The underlying builtin data type.
Expand Down Expand Up @@ -127,7 +129,7 @@ template <typename Ty, int N> class simd {
/// Whole region update with predicates.
void merge(const simd &Val, const mask_type_t<N> &Mask) {
set(__esimd_wrregion<element_type, N, N, 0 /*VS*/, N, 1, N>(
data(), Val.data(), 0, Mask));
data(), Val.data(), 0, Mask.data()));
}
void merge(const simd &Val1, simd Val2, const mask_type_t<N> &Mask) {
Val2.merge(Val1, Mask);
Expand Down Expand Up @@ -247,18 +249,12 @@ template <typename Ty, int N> class simd {

#undef DEF_BINOP

// TODO @rolandschulz, @mattkretz
// Introduce simd_mask type and let user use this type instead of specific
// type representation (simd<uint16_t, N>) to make it more portable
// TODO @iburyl should be mask_type_t, which might become more abstracted in
// the future revisions.
//
#define DEF_RELOP(RELOP) \
ESIMD_INLINE friend simd<uint16_t, N> operator RELOP(const simd &X, \
const simd &Y) { \
ESIMD_INLINE friend simd_mask<N> operator RELOP(const simd &X, \
const simd &Y) { \
auto R = X.data() RELOP Y.data(); \
mask_type_t<N> M(1); \
return M & detail::convert<mask_type_t<N>>(R); \
using mask_elem_t = std::remove_reference_t<decltype(R[0])>; \
return simd_mask<N>::template create<mask_elem_t>(R); \
}

DEF_RELOP(>)
Expand Down Expand Up @@ -627,6 +623,31 @@ ESIMD_INLINE
#endif // __SYCL_DEVICE_ONLY__
}

// Some simd_mask member definitions. Put here because their implementation
// needs full definition of the simd class.

template <int N>
simd_mask<N>::simd_mask(const simd<unsigned short, N> &v) noexcept {
set(__builtin_convertvector(v.data(), simd_mask_impl_t));
}

template <int N> simd_mask<N>::simd_mask(simd<unsigned short, N> &&v) noexcept {
set(__builtin_convertvector(v.data(), simd_mask_impl_t));
}

template <int N>
simd_mask<N> &
simd_mask<N>::operator=(const simd<unsigned short, N> &v) noexcept {
set(__builtin_convertvector(v.data(), simd_mask_impl_t));
return *this;
}

template <int N>
simd_mask<N> &simd_mask<N>::operator=(simd<unsigned short, N> &&v) noexcept {
set(__builtin_convertvector(v.data(), simd_mask_impl_t));
return *this;
}

} // namespace esimd
} // namespace experimental
} // namespace intel
Expand Down
18 changes: 9 additions & 9 deletions sycl/include/sycl/ext/intel/experimental/esimd/esimd_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1108,8 +1108,8 @@ ESIMD_NODEBUG ESIMD_INLINE
esimd_atan(simd<T, SZ> src0, int flag = GENX_NOSAT) {
simd<T, SZ> Src0 = esimd_abs(src0);

simd<ushort, SZ> Neg = src0 < T(0.0);
simd<ushort, SZ> Gt1 = Src0 > T(1.0);
simd_mask<SZ> Neg = src0 < T(0.0);
simd_mask<SZ> Gt1 = Src0 > T(1.0);

Src0.merge(esimd_inv(Src0), Gt1);

Expand Down Expand Up @@ -1151,8 +1151,8 @@ ESIMD_NODEBUG ESIMD_INLINE
esimd_acos(simd<T, SZ> src0, int flag = GENX_NOSAT) {
simd<T, SZ> Src0 = esimd_abs(src0);

simd<ushort, SZ> Neg = src0 < T(0.0);
simd<ushort, SZ> TooBig = Src0 >= T(0.999998);
simd_mask<SZ> Neg = src0 < T(0.0);
simd_mask<SZ> TooBig = Src0 >= T(0.999998);

// Replace oversized values to ensure no possibility of sqrt of
// a negative value later
Expand Down Expand Up @@ -1194,7 +1194,7 @@ ESIMD_NODEBUG ESIMD_INLINE
typename sycl::detail::enable_if_t<std::is_floating_point<T>::value,
simd<T, SZ>>
esimd_asin(simd<T, SZ> src0, int flag = GENX_NOSAT) {
simd<ushort, SZ> Neg = src0 < T(0.0);
simd_mask<SZ> Neg = src0 < T(0.0);

simd<T, SZ> Result =
T(ESIMD_HDR_CONST_PI / 2.0) - esimd_acos(esimd_abs(src0));
Expand Down Expand Up @@ -1487,7 +1487,7 @@ ESIMD_INLINE simd<float, N> esimd_atan2_fast(simd<float, N> y, simd<float, N> x,
simd<float, N> a1;
simd<float, N> atan2;

simd<unsigned short, N> mask = (y >= 0.0f);
simd_mask<N> mask = (y >= 0.0f);
a0.merge(ESIMD_CONST_PI * 0.5f, ESIMD_CONST_PI * 1.5f, mask);
a1.merge(0, ESIMD_CONST_PI * 2.0f, mask);

Expand Down Expand Up @@ -1523,7 +1523,7 @@ ESIMD_INLINE simd<float, N> esimd_atan2(simd<float, N> y, simd<float, N> x,
simd<float, N> v_distance;
simd<float, N> v_y0;
simd<float, N> atan2;
simd<unsigned short, N> mask;
simd_mask<N> mask;

mask = (x < 0);
v_y0.merge(ESIMD_CONST_PI, 0, mask);
Expand All @@ -1541,10 +1541,10 @@ template <> ESIMD_INLINE float esimd_atan2(float y, float x, const uint flags) {
float v_distance;
float v_y0;
simd<float, 1> atan2;
unsigned short mask;
simd_mask<1> mask;

mask = (x < 0);
v_y0 = mask ? ESIMD_CONST_PI : 0;
v_y0 = mask[0] ? ESIMD_CONST_PI : 0;
v_distance = esimd_sqrt<float>(x * x + y * y);
mask = (esimd_abs<float>(y) < 0.000001f);
atan2.merge(v_y0, (2 * esimd_atan((v_distance - x) / y)), mask);
Expand Down
Loading