-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Headers][X86] Allow AVX512VLBW integer reduction intrinsics to be used in constexpr #155199
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: Chaitanya Koparkar (ckoparkar) ChangesFixes #154284 Add constexpr support for the following: _mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16 _mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16 _mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16 _mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16 Patch is 48.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155199.diff 2 Files Affected:
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 2e2052ad1b682..eaf292a0949f5 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2197,7 +2197,7 @@ _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2205,7 +2205,7 @@ _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
(__v8hi) __W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2213,7 +2213,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
(__v8hi) _mm_setzero_si128 ());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2221,7 +2221,7 @@ _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
(__v16hi) __W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2229,7 +2229,7 @@ _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
(__v16hi) _mm256_setzero_si256 ());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2237,7 +2237,7 @@ _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
(__v16qi) __W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2245,7 +2245,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
(__v16qi) _mm_setzero_si128 ());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2253,7 +2253,7 @@ _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
(__v32qi) __W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2812,353 +2812,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
(__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
(__v16hi)_mm256_setzero_si256()))
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_add_epi16(__m128i __W) {
return __builtin_reduce_add((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_mul_epi16(__m128i __W) {
return __builtin_reduce_mul((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_and_epi16(__m128i __W) {
return __builtin_reduce_and((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_or_epi16(__m128i __W) {
return __builtin_reduce_or((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
__W = _mm_maskz_mov_epi16(__M, __W);
return __builtin_reduce_add((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
__W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
return __builtin_reduce_mul((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
__W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
return __builtin_reduce_and((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
__W = _mm_maskz_mov_epi16(__M, __W);
return __builtin_reduce_or((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epi16(__m128i __V) {
return __builtin_reduce_max((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epu16(__m128i __V) {
return __builtin_reduce_max((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epi16(__m128i __V) {
return __builtin_reduce_min((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epu16(__m128i __V) {
return __builtin_reduce_min((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
return __builtin_reduce_max((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
__V = _mm_maskz_mov_epi16(__M, __V);
return __builtin_reduce_max((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
return __builtin_reduce_min((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
return __builtin_reduce_min((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_add_epi16(__m256i __W) {
return __builtin_reduce_add((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_mul_epi16(__m256i __W) {
return __builtin_reduce_mul((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_and_epi16(__m256i __W) {
return __builtin_reduce_and((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_or_epi16(__m256i __W) {
return __builtin_reduce_or((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi16(__M, __W);
return __builtin_reduce_add((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
__W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
return __builtin_reduce_mul((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
__W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
return __builtin_reduce_and((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi16(__M, __W);
return __builtin_reduce_or((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epi16(__m256i __V) {
return __builtin_reduce_max((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epu16(__m256i __V) {
return __builtin_reduce_max((__v16hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epi16(__m256i __V) {
return __builtin_reduce_min((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epu16(__m256i __V) {
return __builtin_reduce_min((__v16hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
__V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
return __builtin_reduce_max((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
__V = _mm256_maskz_mov_epi16(__M, __V);
return __builtin_reduce_max((__v16hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
__V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
return __builtin_reduce_min((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
__V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
return __builtin_reduce_min((__v16hu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_add_epi8(__m128i __W) {
return __builtin_reduce_add((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_mul_epi8(__m128i __W) {
return __builtin_reduce_mul((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_and_epi8(__m128i __W) {
return __builtin_reduce_and((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_or_epi8(__m128i __W) {
return __builtin_reduce_or((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_maskz_mov_epi8(__M, __W);
return __builtin_reduce_add((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
return __builtin_reduce_mul((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
return __builtin_reduce_and((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_maskz_mov_epi8(__M, __W);
return __builtin_reduce_or((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epi8(__m128i __V) {
return __builtin_reduce_max((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epu8(__m128i __V) {
return __builtin_reduce_max((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epi8(__m128i __V) {
return __builtin_reduce_min((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epu8(__m128i __V) {
return __builtin_reduce_min((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
return __builtin_reduce_max((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
__V = _mm_maskz_mov_epi8(__M, __V);
return __builtin_reduce_max((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
return __builtin_reduce_min((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
return __builtin_reduce_min((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_add_epi8(__m256i __W) {
return __builtin_reduce_add((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_mul_epi8(__m256i __W) {
return __builtin_reduce_mul((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_and_epi8(__m256i __W) {
return __builtin_reduce_and((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_or_epi8(__m256i __W) {
return __builtin_reduce_or((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi8(__M, __W);
return __builtin_reduce_add((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
return __builtin_reduce_mul((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
return __builtin_reduce_and((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi8(__M, __W);
return __builtin_reduce_or((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epi8(__m256i __V) {
return __builtin_reduce_max((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epu8(__m256i __V) {
return __builtin_reduce_max((__v32qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epi8(__m256i __V) {
return __builtin_reduce_min((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epu8(__m256i __V) {
return __builtin_reduce_min((__v32qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
__V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
return __builtin_reduce_max((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
__V = _mm256_maskz_mov_epi8(__M, __V);
return __builtin_reduce_max((__v32qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
__V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
return __builtin_reduce_min((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
__V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
return __builtin_reduce_min((__v32qu)__V);
diff --git a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
index faa3b54624a77..8e53db3f45300 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
@@ -8,30 +8,35 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
+#include "builtin_test_helpers.h"
short test_mm_reduce_add_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_add_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_add_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_add_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 36);
short test_mm_reduce_mul_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_mul_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_mul_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_mul_epi16((__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 72);
short test_mm_reduce_or_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_or_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_or_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_or_epi16((__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15);
short test_mm_reduce_and_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_and_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_and_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_and_epi16((__m128i)(__v8hi){1,3,5,7,9,11,13,15}) == 1);
short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
// CHECK-LABEL: test_mm_mask_reduce_add_epi16
@@ -39,6 +44,8 @@ short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
/...
[truncated]
|
|
@llvm/pr-subscribers-clang Author: Chaitanya Koparkar (ckoparkar) ChangesFixes #154284 Add constexpr support for the following: _mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16 _mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16 _mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16 _mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16 Patch is 48.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155199.diff 2 Files Affected:
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 2e2052ad1b682..eaf292a0949f5 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2197,7 +2197,7 @@ _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2205,7 +2205,7 @@ _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
(__v8hi) __W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2213,7 +2213,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
(__v8hi) _mm_setzero_si128 ());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2221,7 +2221,7 @@ _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
(__v16hi) __W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2229,7 +2229,7 @@ _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
(__v16hi) _mm256_setzero_si256 ());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2237,7 +2237,7 @@ _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
(__v16qi) __W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
{
return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2245,7 +2245,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
(__v16qi) _mm_setzero_si128 ());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2253,7 +2253,7 @@ _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
(__v32qi) __W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
{
return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2812,353 +2812,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
(__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
(__v16hi)_mm256_setzero_si256()))
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_add_epi16(__m128i __W) {
return __builtin_reduce_add((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_mul_epi16(__m128i __W) {
return __builtin_reduce_mul((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_and_epi16(__m128i __W) {
return __builtin_reduce_and((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_or_epi16(__m128i __W) {
return __builtin_reduce_or((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
__W = _mm_maskz_mov_epi16(__M, __W);
return __builtin_reduce_add((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
__W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
return __builtin_reduce_mul((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
__W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
return __builtin_reduce_and((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
__W = _mm_maskz_mov_epi16(__M, __W);
return __builtin_reduce_or((__v8hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epi16(__m128i __V) {
return __builtin_reduce_max((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epu16(__m128i __V) {
return __builtin_reduce_max((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epi16(__m128i __V) {
return __builtin_reduce_min((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epu16(__m128i __V) {
return __builtin_reduce_min((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
return __builtin_reduce_max((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
__V = _mm_maskz_mov_epi16(__M, __V);
return __builtin_reduce_max((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
return __builtin_reduce_min((__v8hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
return __builtin_reduce_min((__v8hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_add_epi16(__m256i __W) {
return __builtin_reduce_add((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_mul_epi16(__m256i __W) {
return __builtin_reduce_mul((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_and_epi16(__m256i __W) {
return __builtin_reduce_and((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_or_epi16(__m256i __W) {
return __builtin_reduce_or((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi16(__M, __W);
return __builtin_reduce_add((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
__W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
return __builtin_reduce_mul((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
__W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
return __builtin_reduce_and((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi16(__M, __W);
return __builtin_reduce_or((__v16hi)__W);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epi16(__m256i __V) {
return __builtin_reduce_max((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epu16(__m256i __V) {
return __builtin_reduce_max((__v16hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epi16(__m256i __V) {
return __builtin_reduce_min((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epu16(__m256i __V) {
return __builtin_reduce_min((__v16hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
__V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
return __builtin_reduce_max((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
__V = _mm256_maskz_mov_epi16(__M, __V);
return __builtin_reduce_max((__v16hu)__V);
}
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
__V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
return __builtin_reduce_min((__v16hi)__V);
}
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
__V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
return __builtin_reduce_min((__v16hu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_add_epi8(__m128i __W) {
return __builtin_reduce_add((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_mul_epi8(__m128i __W) {
return __builtin_reduce_mul((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_and_epi8(__m128i __W) {
return __builtin_reduce_and((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_or_epi8(__m128i __W) {
return __builtin_reduce_or((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_maskz_mov_epi8(__M, __W);
return __builtin_reduce_add((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
return __builtin_reduce_mul((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
return __builtin_reduce_and((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
__W = _mm_maskz_mov_epi8(__M, __W);
return __builtin_reduce_or((__v16qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epi8(__m128i __V) {
return __builtin_reduce_max((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_max_epu8(__m128i __V) {
return __builtin_reduce_max((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epi8(__m128i __V) {
return __builtin_reduce_min((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_reduce_min_epu8(__m128i __V) {
return __builtin_reduce_min((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
return __builtin_reduce_max((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
__V = _mm_maskz_mov_epi8(__M, __V);
return __builtin_reduce_max((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
return __builtin_reduce_min((__v16qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
__V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
return __builtin_reduce_min((__v16qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_add_epi8(__m256i __W) {
return __builtin_reduce_add((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_mul_epi8(__m256i __W) {
return __builtin_reduce_mul((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_and_epi8(__m256i __W) {
return __builtin_reduce_and((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_or_epi8(__m256i __W) {
return __builtin_reduce_or((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi8(__M, __W);
return __builtin_reduce_add((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
return __builtin_reduce_mul((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
return __builtin_reduce_and((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
__W = _mm256_maskz_mov_epi8(__M, __W);
return __builtin_reduce_or((__v32qs)__W);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epi8(__m256i __V) {
return __builtin_reduce_max((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_max_epu8(__m256i __V) {
return __builtin_reduce_max((__v32qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epi8(__m256i __V) {
return __builtin_reduce_min((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_reduce_min_epu8(__m256i __V) {
return __builtin_reduce_min((__v32qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
__V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
return __builtin_reduce_max((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
__V = _mm256_maskz_mov_epi8(__M, __V);
return __builtin_reduce_max((__v32qu)__V);
}
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
__V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
return __builtin_reduce_min((__v32qs)__V);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
__V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
return __builtin_reduce_min((__v32qu)__V);
diff --git a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
index faa3b54624a77..8e53db3f45300 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
@@ -8,30 +8,35 @@
// RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
#include <immintrin.h>
+#include "builtin_test_helpers.h"
short test_mm_reduce_add_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_add_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_add_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_add_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 36);
short test_mm_reduce_mul_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_mul_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_mul_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_mul_epi16((__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 72);
short test_mm_reduce_or_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_or_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_or_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_or_epi16((__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15);
short test_mm_reduce_and_epi16(__m128i __W){
// CHECK-LABEL: test_mm_reduce_and_epi16
// CHECK: call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}})
return _mm_reduce_and_epi16(__W);
}
+TEST_CONSTEXPR(_mm_reduce_and_epi16((__m128i)(__v8hi){1,3,5,7,9,11,13,15}) == 1);
short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
// CHECK-LABEL: test_mm_mask_reduce_add_epi16
@@ -39,6 +44,8 @@ short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
/...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
981fa7d to
c5cf1b2
Compare
|
@ckoparkar please can you update to trunk latest to see if we can clear that CI failure? |
…ed in constexpr Add constexpr support for the following: _mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16 _mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16 _mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16 _mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16
c5cf1b2 to
799837c
Compare
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
|
D'oh, yes. I saw the Linux tests fail at the 7min mark on a lot of other PRs yesterday. Was waiting for that to be resolved, looks like it's OK now. |
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
|
Thanks everyone for reviews :-) |
Fixes #154284
Add constexpr support for the following:
_mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16
_mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16
_mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16
_mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16