1616#include " hdr/stdint_proxy.h"
1717#include " src/__support/CPP/algorithm.h"
1818#include " src/__support/CPP/limits.h"
19+ #include " src/__support/CPP/tuple.h"
1920#include " src/__support/CPP/type_traits.h"
21+ #include " src/__support/CPP/utility/integer_sequence.h"
2022#include " src/__support/macros/attributes.h"
2123#include " src/__support/macros/config.h"
2224
@@ -32,9 +34,6 @@ namespace cpp {
3234
3335namespace internal {
3436
35- template <typename T>
36- using get_as_integer_type_t = unsigned _BitInt (sizeof (T) * CHAR_BIT);
37-
3837#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
3938template <typename T>
4039LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof (T);
@@ -48,9 +47,6 @@ LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T);
4847template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1 ;
4948#endif
5049
51- template <typename T> LIBC_INLINE constexpr T poison () {
52- return __builtin_nondeterministic_value (T ());
53- }
5450} // namespace internal
5551
5652// Type aliases.
@@ -61,6 +57,74 @@ using simd = T [[clang::ext_vector_type(N)]];
6157template <typename T>
6258using simd_mask = simd<bool , internal::native_vector_size<T>>;
6359
60+ namespace internal {
61+
62+ template <typename T>
63+ using get_as_integer_type_t = unsigned _BitInt (sizeof (T) * CHAR_BIT);
64+
65+ template <typename T> LIBC_INLINE constexpr T poison () {
66+ return __builtin_nondeterministic_value (T ());
67+ }
68+
69+ template <typename T, size_t N, size_t OriginalSize, size_t ... Indices>
70+ LIBC_INLINE constexpr static cpp::simd<T, sizeof ...(Indices)>
71+ extend (cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
72+ return __builtin_shufflevector (
73+ x, x, (Indices < OriginalSize ? static_cast <int >(Indices) : -1 )...);
74+ }
75+
76+ template <typename T, size_t N, size_t TargetSize, size_t OriginalSize>
77+ LIBC_INLINE constexpr static auto extend (cpp::simd<T, N> x) {
78+ // Recursively resize an input vector to the target size, increasing its size
79+ // by at most double the input size each step due to shufflevector limitation.
80+ if constexpr (N == TargetSize)
81+ return x;
82+ else if constexpr (TargetSize <= 2 * N)
83+ return extend<T, N, TargetSize>(x, cpp::make_index_sequence<TargetSize>{});
84+ else
85+ return extend<T, 2 * N, TargetSize, OriginalSize>(
86+ extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));
87+ }
88+
89+ template <typename T, size_t N, size_t M, size_t ... Indices>
90+ LIBC_INLINE constexpr static cpp::simd<T, N + M>
91+ concat (cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<Indices...>) {
92+ constexpr size_t Size = cpp::max (N, M);
93+ auto remap = [](size_t idx) -> int {
94+ if (idx < N)
95+ return static_cast <int >(idx);
96+ if (idx < N + M)
97+ return static_cast <int >((idx - N) + Size);
98+ return -1 ;
99+ };
100+
101+ // Extend the input vectors until they are the same size, then use the indices
102+ // to shuffle in only the indices that correspond to the original values.
103+ auto x_ext = extend<T, N, Size, N>(x);
104+ auto y_ext = extend<T, M, Size, M>(y);
105+ return __builtin_shufflevector (x_ext, y_ext, remap (Indices)...);
106+ }
107+
108+ template <typename T, size_t N, size_t Count, size_t Offset, size_t ... Indices>
109+ LIBC_INLINE constexpr static cpp::simd<T, Count>
110+ slice (cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
111+ return __builtin_shufflevector (x, x, (Offset + Indices)...);
112+ }
113+
114+ template <typename T, size_t N, size_t Offset, size_t Head, size_t ... Tail>
115+ LIBC_INLINE constexpr static auto split (cpp::simd<T, N> x) {
116+ // Recursively splits the input vector by walking the variadic template list,
117+ // increasing our current head each call.
118+ auto result = cpp::make_tuple (
119+ slice<T, N, Head, Offset>(x, cpp::make_index_sequence<Head>{}));
120+ if constexpr (sizeof ...(Tail) > 0 )
121+ return cpp::tuple_cat (result, split<T, N, Offset + Head, Tail...>(x));
122+ else
123+ return result;
124+ }
125+
126+ } // namespace internal
127+
64128// Type trait helpers.
65129template <typename T>
66130struct simd_size : cpp::integral_constant<size_t , __builtin_vectorelements(T)> {
@@ -273,6 +337,25 @@ LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x,
273337 return m ? x : y;
274338}
275339
340+ // Shuffling helpers.
341+ template <typename T, size_t N, size_t M>
342+ LIBC_INLINE constexpr static auto concat (cpp::simd<T, N> x, cpp::simd<T, M> y) {
343+ return internal::concat (x, y, make_index_sequence<N + M>{});
344+ }
345+ template <typename T, size_t N, size_t M, typename ... Rest>
346+ LIBC_INLINE constexpr static auto concat (cpp::simd<T, N> x, cpp::simd<T, M> y,
347+ Rest... rest) {
348+ auto xy = concat (x, y);
349+ if constexpr (sizeof ...(Rest))
350+ return concat (xy, rest...);
351+ else
352+ return xy;
353+ }
354+ template <size_t ... Sizes, typename T, size_t N> auto split (cpp::simd<T, N> x) {
355+ static_assert ((... + Sizes) == N, " split sizes must sum to vector size" );
356+ return internal::split<T, N, 0 , Sizes...>(x);
357+ }
358+
276359// TODO: where expressions, scalar overloads, ABI types.
277360
278361} // namespace cpp
0 commit comments