@@ -529,12 +529,23 @@ gather_rgba(const Tx *p, simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
529529 return __esimd_svm_gather4_scaled<T, N, Mask>(addrs.data (), mask.data ());
530530}
531531
532+ namespace detail {
533+ template <rgba_channel_mask M> static void validate_rgba_write_channel_mask () {
534+ using CM = rgba_channel_mask;
535+ static_assert (
536+ (M == CM::ABGR || M == CM::BGR || M == CM::GR || M == CM::R) &&
537+ " Only ABGR, BGR, GR, R channel masks are valid in write operations" );
538+ }
539+ } // namespace detail
540+
532541// / @anchor usm_scatter_rgba
533542// / Transpose and scatter pixels to given memory locations defined by the base
534543// / pointer \c p and \c offsets. Up to 4 32-bit data elements may be accessed at
535544// / each address depending on the channel mask \c Mask template parameter. Each
536545// / pixel's address must be 4 byte aligned. This is basically an inverse
537- // / operation for gather_rgba.
546+ // / operation for gather_rgba. Unlike \c gather_rgba, this function imposes
547+ // / restrictions on possible \c Mask template argument values. It can only be
548+ // / one of the following: \c ABGR, \c BGR, \c GR, \c R.
538549// /
539550// / @tparam Tx Element type of the returned vector. Must be 4 bytes in size.
540551// / @tparam N Number of pixels to access (matches the size of the \c offsets
@@ -553,6 +564,7 @@ __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)>
553564scatter_rgba (Tx *p, simd<uint32_t , N> offsets,
554565 simd<Tx, N * get_num_channels_enabled (Mask)> vals,
555566 simd_mask<N> mask = 1) {
567+ detail::validate_rgba_write_channel_mask<Mask>();
556568 simd<uint64_t , N> offsets_i = convert<uint64_t >(offsets);
557569 simd<uint64_t , N> addrs (reinterpret_cast <uint64_t >(p));
558570 addrs = addrs + offsets_i;
@@ -875,7 +887,7 @@ slm_gather_rgba(simd<uint32_t, N> offsets, simd_mask<N> mask = 1) {
875887}
876888
877889// / Gather data from the Shared Local Memory at specified \c offsets and return
878- // / it as simd vector. See @ref usm_gather_rgba for information about the
890+ // / it as simd vector. See @ref usm_scatter_rgba for information about the
879891// / operation semantics and parameter restrictions/interdependencies.
880892// / @tparam T The element type of the returned vector.
881893// / @tparam N The number of elements to access.
@@ -889,6 +901,7 @@ __ESIMD_API std::enable_if_t<(N == 8 || N == 16 || N == 32) && (sizeof(T) == 4)>
889901slm_scatter_rgba (simd<uint32_t , N> offsets,
890902 simd<T, N * get_num_channels_enabled (Mask)> vals,
891903 simd_mask<N> mask = 1) {
904+ detail::validate_rgba_write_channel_mask<Mask>();
892905 const auto si = __ESIMD_GET_SURF_HANDLE (detail::LocalAccessorMarker ());
893906 constexpr int16_t Scale = 0 ;
894907 constexpr int global_offset = 0 ;
0 commit comments