Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions sycl/include/sycl/ext/intel/esimd/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,12 +529,23 @@ gather_rgba(const Tx *p, simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
return __esimd_svm_gather4_scaled<T, N, Mask>(addrs.data(), mask.data());
}

namespace detail {
template <rgba_channel_mask M> static void validate_rgba_write_channel_mask() {
using CM = rgba_channel_mask;
static_assert(
(M == CM::ABGR || M == CM::BGR || M == CM::GR || M == CM::R) &&
"Only ABGR, BGR, GR, R channel masks are valid in write operations");
}
} // namespace detail

/// @anchor usm_scatter_rgba
/// Transpose and scatter pixels to given memory locations defined by the base
/// pointer \c p and \c offsets. Up to 4 32-bit data elements may be accessed at
/// each address depending on the channel mask \c Mask template parameter. Each
/// pixel's address must be 4 byte aligned. This is basically an inverse
/// operation for gather_rgba.
/// operation for gather_rgba. Unlike \c gather_rgba, this function imposes
/// restrictions on possible \c Mask template argument values. It can only be
/// one of the following: \c ABGR, \c BGR, \c GR, \c R.
///
/// @tparam Tx Element type of the returned vector. Must be 4 bytes in size.
/// @tparam N Number of pixels to access (matches the size of the \c offsets
Expand All @@ -553,6 +564,7 @@ __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)>
scatter_rgba(Tx *p, simd<uint32_t, N> offsets,
simd<Tx, N * get_num_channels_enabled(Mask)> vals,
simd_mask<N> mask = 1) {
detail::validate_rgba_write_channel_mask<Mask>();
simd<uint64_t, N> offsets_i = convert<uint64_t>(offsets);
simd<uint64_t, N> addrs(reinterpret_cast<uint64_t>(p));
addrs = addrs + offsets_i;
Expand Down Expand Up @@ -875,7 +887,7 @@ slm_gather_rgba(simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
}

/// Gather data from the Shared Local Memory at specified \c offsets and return
/// it as simd vector. See @ref usm_gather_rgba for information about the
/// it as simd vector. See @ref usm_scatter_rgba for information about the
/// operation semantics and parameter restrictions/interdependencies.
/// @tparam T The element type of the returned vector.
/// @tparam N The number of elements to access.
Expand All @@ -889,6 +901,7 @@ __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)>
slm_scatter_rgba(simd<uint32_t, N> offsets,
simd<T, N * get_num_channels_enabled(Mask)> vals,
simd_mask<N> mask = 1) {
detail::validate_rgba_write_channel_mask<Mask>();
const auto si = __ESIMD_GET_SURF_HANDLE(detail::LocalAccessorMarker());
constexpr int16_t Scale = 0;
constexpr int global_offset = 0;
Expand Down
19 changes: 17 additions & 2 deletions sycl/test/esimd/gather_scatter_rgba.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
// RUN: %clangxx -fsycl -fsyntax-only -Wno-unused-command-line-argument %s
// RUN: %clangxx -fsycl -fsycl-device-only -fsyntax-only -Xclang -verify %s

// This test checks compilation of ESIMD slm gather_rgba/scatter_rgba APIs.
// This test checks that device compiler can:
// - successfully compile gather_rgba/scatter_rgba APIs
// - emit an error if some of the restrictions on template parameters are
// violated

#include <CL/sycl.hpp>
#include <limits>
Expand All @@ -20,3 +23,15 @@ void kernel(int *ptr) SYCL_ESIMD_FUNCTION {

scatter_rgba<int, 32, rgba_channel_mask::ABGR>(ptr, offsets, v0);
}

constexpr int AGR_N_CHANNELS = 3;

void kernel1(int *ptr, simd<int, 32 * AGR_N_CHANNELS> v) SYCL_ESIMD_FUNCTION {
simd<uint32_t, 32> offsets(0, sizeof(int) * 4);
// only 1, 2, 3, 4-element masks covering consequitive channels starting from
// R are supported
// expected-error-re@* {{static_assert failed{{.*}}Only ABGR, BGR, GR, R channel masks are valid in write operations}}
// expected-note@* {{in instantiation }}
// expected-note@+1 {{in instantiation }}
scatter_rgba<int, 32, rgba_channel_mask::AGR>(ptr, offsets, v);
}